From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 13 Aug 2009 16:34:53 -0400
Subject: tracing: Ftrace dynamic ftrace_event_call support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add dynamic ftrace_event_call support to ftrace. Trace engines can add
new ftrace_event_call to ftrace on the fly. Each operator function of
the call takes an ftrace_event_call data structure as an argument,
because these functions may be shared among several ftrace_event_calls.

Changes from v13:
 - Define remove_subsystem_dir() always (revirt a2ca5e03), because
   trace_remove_event_call() uses it.
 - Modify syscall tracer because of ftrace_event_call change.

[fweisbec@gmail.com: Fixed conflict against latest tracing/core]

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h | 19 ++++++++++---------
 include/linux/syscalls.h     |  4 ++--
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ace2da9e0a0d..1ab3089b5c59 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -112,12 +112,12 @@ struct ftrace_event_call {
 	struct dentry		*dir;
 	struct trace_event	*event;
 	int			enabled;
-	int			(*regfunc)(void *);
-	void			(*unregfunc)(void *);
+	int			(*regfunc)(struct ftrace_event_call *);
+	void			(*unregfunc)(struct ftrace_event_call *);
 	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct ftrace_event_call *call,
-					       struct trace_seq *s);
+	int			(*raw_init)(struct ftrace_event_call *);
+	int			(*show_format)(struct ftrace_event_call *,
+					       struct trace_seq *);
 	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
 	int			filter_active;
@@ -147,11 +147,12 @@ enum {
 	FILTER_PTR_STRING,
 };
 
-extern int trace_define_field(struct ftrace_event_call *call,
-			      const char *type, const char *name,
-			      int offset, int size, int is_signed,
-			      int filter_type);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size, int is_signed,
+			      int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern void trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f124c8995555..646102eeff92 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	static int init_enter_##sname(void)				\
+	static int init_enter_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
@@ -202,7 +202,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	static int init_exit_##sname(void)				\
+	static int init_exit_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
-- 
cgit v1.2.3


From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 26 Aug 2009 23:38:30 +0200
Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion

Kprobes can enter into a probing recursion, ie: a kprobe that does an
endless loop because one of its core mechanism function used during
probing is also probed itself.

This patch helps pinpointing the kprobe that raised such recursion
by dumping it and raising a BUG instead of a warning (we also disarm
the kprobe to try avoiding recursion in BUG itself). Having a BUG
instead of a warning stops the stacktrace in the right place and
doesn't pollute the logs with hundreds of traces that eventually end
up in a stack overflow.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
---
 arch/x86/kernel/kprobes.c | 8 ++++++--
 include/linux/kprobes.h   | 2 ++
 kernel/kprobes.c          | 7 +++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 16ae9610f6ff..ecee3d23fef8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -490,9 +490,13 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 			/* A probe has been hit in the codepath leading up
 			 * to, or just after, single-stepping of a probed
 			 * instruction. This entire codepath should strictly
-			 * reside in .kprobes.text section. Raise a warning
-			 * to highlight this peculiar case.
+			 * reside in .kprobes.text section.
+			 * Raise a BUG or we'll continue in an endless
+			 * reentering loop and eventually a stack overflow.
 			 */
+			arch_disarm_kprobe(p);
+			dump_kprobe(p);
+			BUG();
 		}
 	default:
 		/* impossible cases */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bcd9c07848be..87eb79c9dd60 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 int disable_kprobe(struct kprobe *kp);
 int enable_kprobe(struct kprobe *kp);
 
+void dump_kprobe(struct kprobe *kp);
+
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobes_built_in(void)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ef177d653b2c..f72e96c25a38 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1141,6 +1141,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 	arch_remove_kprobe(p);
 }
 
+void __kprobes dump_kprobe(struct kprobe *kp)
+{
+	printk(KERN_WARNING "Dumping kprobe:\n");
+	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+	       kp->symbol_name, kp->addr, kp->offset);
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 					     unsigned long val, void *data)
-- 
cgit v1.2.3


From aeaeae1187d7520f1c5559623f0a149da6a1c96e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 27 Aug 2009 05:09:51 +0200
Subject: tracing: Restore the const qualifier for field names and types
 definition

Restore the const qualifier in field's name and type parameters of
trace_define_field that was lost while solving a conflict.

Fields names and types are defined as builtin constant strings in
static TRACE_EVENTs. But kprobes allocates these dynamically.

That said, we still want to always pass these strings as const char *
in trace_define_fields() to avoid any further accidental writes on
the pointed strings.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h  | 6 +++---
 kernel/trace/trace_events.c   | 4 ++--
 kernel/trace/trace_syscalls.c | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 1ab3089b5c59..73edf5a52e31 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -148,9 +148,9 @@ enum {
 };
 
 extern int trace_define_common_fields(struct ftrace_event_call *call);
-extern int trace_define_field(struct ftrace_event_call *call, char *type,
-			      char *name, int offset, int size, int is_signed,
-			      int filter_type);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+			      const char *name, int offset, int size,
+			      int is_signed, int filter_type);
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8079bb511c43..197cdaa96c43 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size, int is_signed,
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+		       const char *name, int offset, int size, int is_signed,
 		       int filter_type)
 {
 	struct ftrace_event_field *field;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5931933587e9..a928dd004535 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 		return ret;
 
 	for (i = 0; i < meta->nb_args; i++) {
-		ret = trace_define_field(call, (char *)meta->types[i],
-					 (char *)meta->args[i], offset,
+		ret = trace_define_field(call, meta->types[i],
+					 meta->args[i], offset,
 					 sizeof(unsigned long), 0,
 					 FILTER_OTHER);
 		offset += sizeof(unsigned long);
-- 
cgit v1.2.3


From 20f3097bfe5fb5ced0b14f9ea2620c4039bf1dde Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 4 Aug 2009 12:07:08 -0700
Subject: intr-remap: generic support for remapping HPET MSIs

Generic support for remapping HPET MSI's by parsing the HPET timer block
device scope in the ACPI DRHD tables. This is needed for platforms
supporting interrupt-remapping and MSI capable HPET timer block.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Jay Fenlason <fenlason@redhat.com>
LKML-Reference: <20090804190729.477649000@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/intr_remapping.c | 89 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/pci/intr_remapping.h |  7 ++++
 include/linux/dmar.h         | 10 +++++
 include/linux/hpet.h         |  2 +
 4 files changed, 104 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 4f5b8712931f..2cc3f70ad425 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -2,6 +2,7 @@
 #include <linux/dmar.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
+#include <linux/hpet.h>
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <asm/io_apic.h>
@@ -14,7 +15,8 @@
 #include "pci.h"
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
-static int ir_ioapic_num;
+static struct hpet_scope ir_hpet[MAX_HPET_TBS];
+static int ir_ioapic_num, ir_hpet_num;
 int intr_remapping_enabled;
 
 static int disable_intremap;
@@ -351,6 +353,16 @@ int flush_irte(int irq)
 	return rc;
 }
 
+struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
+{
+	int i;
+
+	for (i = 0; i < MAX_HPET_TBS; i++)
+		if (ir_hpet[i].id == hpet_id)
+			return ir_hpet[i].iommu;
+	return NULL;
+}
+
 struct intel_iommu *map_ioapic_to_ir(int apic)
 {
 	int i;
@@ -478,6 +490,36 @@ int set_ioapic_sid(struct irte *irte, int apic)
 	return 0;
 }
 
+int set_hpet_sid(struct irte *irte, u8 id)
+{
+	int i;
+	u16 sid = 0;
+
+	if (!irte)
+		return -1;
+
+	for (i = 0; i < MAX_HPET_TBS; i++) {
+		if (ir_hpet[i].id == id) {
+			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
+			break;
+		}
+	}
+
+	if (sid == 0) {
+		pr_warning("Failed to set source-id of HPET block (%d)\n", id);
+		return -1;
+	}
+
+	/*
+	 * Should really use SQ_ALL_16. Some platforms are broken.
+	 * While we figure out the right quirks for these broken platforms, use
+	 * SQ_13_IGNORE_3 for now.
+	 */
+	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
+
+	return 0;
+}
+
 int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 {
 	struct pci_dev *bridge;
@@ -711,6 +753,34 @@ error:
 	return -1;
 }
 
+static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
+				      struct intel_iommu *iommu)
+{
+	struct acpi_dmar_pci_path *path;
+	u8 bus;
+	int count;
+
+	bus = scope->bus;
+	path = (struct acpi_dmar_pci_path *)(scope + 1);
+	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
+		/ sizeof(struct acpi_dmar_pci_path);
+
+	while (--count > 0) {
+		/*
+		 * Access PCI directly due to the PCI
+		 * subsystem isn't initialized yet.
+		 */
+		bus = read_pci_config_byte(bus, path->dev, path->fn,
+					   PCI_SECONDARY_BUS);
+		path++;
+	}
+	ir_hpet[ir_hpet_num].bus   = bus;
+	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn);
+	ir_hpet[ir_hpet_num].iommu = iommu;
+	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;
+	ir_hpet_num++;
+}
+
 static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
 				      struct intel_iommu *iommu)
 {
@@ -740,8 +810,8 @@ static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
 	ir_ioapic_num++;
 }
 
-static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
-				 struct intel_iommu *iommu)
+static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
+				      struct intel_iommu *iommu)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct acpi_dmar_device_scope *scope;
@@ -765,6 +835,17 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
 			       drhd->address);
 
 			ir_parse_one_ioapic_scope(scope, iommu);
+		} else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
+			if (ir_hpet_num == MAX_HPET_TBS) {
+				printk(KERN_WARNING "Exceeded Max HPET blocks\n");
+				return -1;
+			}
+
+			printk(KERN_INFO "HPET id %d under DRHD base"
+			       " 0x%Lx\n", scope->enumeration_id,
+			       drhd->address);
+
+			ir_parse_one_hpet_scope(scope, iommu);
 		}
 		start += scope->length;
 	}
@@ -785,7 +866,7 @@ int __init parse_ioapics_under_ir(void)
 		struct intel_iommu *iommu = drhd->iommu;
 
 		if (ecap_ir_support(iommu->ecap)) {
-			if (ir_parse_ioapic_scope(drhd->hdr, iommu))
+			if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
 				return -1;
 
 			ir_supported = 1;
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
index 63a263c18415..5662fecfee60 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/pci/intr_remapping.h
@@ -7,4 +7,11 @@ struct ioapic_scope {
 	unsigned int devfn;	/* PCI devfn number */
 };
 
+struct hpet_scope {
+	struct intel_iommu *iommu;
+	u8 id;
+	unsigned int bus;
+	unsigned int devfn;
+};
+
 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 4a2b162c256a..69a6fbac0921 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -126,7 +126,9 @@ extern int free_irte(int irq);
 extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
+extern struct intel_iommu *map_hpet_to_ir(u8 id);
 extern int set_ioapic_sid(struct irte *irte, int apic);
+extern int set_hpet_sid(struct irte *irte, u8 id);
 extern int set_msi_sid(struct irte *irte, struct pci_dev *dev);
 #else
 static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
@@ -158,10 +160,18 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 {
 	return NULL;
 }
+static inline struct intel_iommu *map_hpet_to_ir(unsigned int hpet_id)
+{
+	return NULL;
+}
 static inline int set_ioapic_sid(struct irte *irte, int apic)
 {
 	return 0;
 }
+static inline int set_hpet_sid(struct irte *irte, u8 id)
+{
+	return -1;
+}
 static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 {
 	return 0;
diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 79f63a27bcef..219ca4f6bea6 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -126,4 +126,6 @@ struct hpet_info {
 #define	HPET_DPI	_IO('h', 0x05)	/* disable periodic */
 #define	HPET_IRQFREQ	_IOW('h', 0x6, unsigned long)	/* IRQFREQ usec */
 
+#define MAX_HPET_TBS	8		/* maximum hpet timer blocks */
+
 #endif				/* !__HPET__ */
-- 
cgit v1.2.3


From c95b4502ad7fe8f3b9954aec794b00ac0046ab3a Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Thu, 27 Aug 2009 17:04:42 -0700
Subject: ntp: Provide compability defines (You say MOD_NANO, I say ADJ_NANO)

MOD_NANO, ADJ_NANO, MOD_NANO, ADJ_NANO!
Lets call the whole thing off!
But oh! If we call the whole thing off,
Then we must part.
And oh! If we ever part,
Then that might break my heart^H^H^H^Hclock!
So, if you like MOD_NANO and I like ADJ_NANO,
I'll include MOD_NANO and give up ADJ_NANO (not really!).
For we know we need each other,
So we better call the calling off off.
Let's call the whole thing off!

The tumultuous NTP and Linux relationship has hit another snag: Ends
up NTPd still uses the "xntp 3.4 compatability names" and when the
STA_NANO value was added (along with ADJ_NANO), NTPd expected MOD_NANO
to be added and has apparently hit some build errors.

Report to ntp hackers:
https://lists.ntp.org/pipermail/hackers/2009-August/004455.html

Related Bugs:
https://support.ntp.org/bugs/show_bug.cgi?id=1219
https://bugzilla.redhat.com/show_bug.cgi?id=505566

So in an effort to make peace, here's a patch to help get things
building again. I also have updated the comment to make sure folks
don't think the MOD_* values are just legacy constants.

Of course, NTPd really uses the glibc-headers, so those will need to
be similarly updated before things are working again (the RH bug above
should probably cover that).

Thanks to Michael Tatarinov and Hal Murray for finding and reporting
the issue!

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: hmurray@megapathdsl.net
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Michael Tatarinov <kukabu@gmail.com>
LKML-Reference: <1251417882.7905.42.camel@localhost.localdomain>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index e6967d10d9e5..782ccd45c0d9 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -115,13 +115,16 @@ struct timex {
 #define ADJ_OFFSET_SS_READ	0xa001	/* read-only adjtime */
 #endif
 
-/* xntp 3.4 compatibility names */
+/* NTP userland likes the MOD_ prefix better */
 #define MOD_OFFSET	ADJ_OFFSET
 #define MOD_FREQUENCY	ADJ_FREQUENCY
 #define MOD_MAXERROR	ADJ_MAXERROR
 #define MOD_ESTERROR	ADJ_ESTERROR
 #define MOD_STATUS	ADJ_STATUS
 #define MOD_TIMECONST	ADJ_TIMECONST
+#define MOD_TAI	ADJ_TAI
+#define MOD_MICRO	ADJ_MICRO
+#define MOD_NANO	ADJ_NANO
 
 
 /*
-- 
cgit v1.2.3


From 8ebc423238341b52912c7295b045a32477b33f09 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Apr 2009 04:19:49 +0200
Subject: reiserfs: kill-the-BKL

This patch is an attempt to remove the Bkl based locking scheme from
reiserfs and is intended.

It is a bit inspired from an old attempt by Peter Zijlstra:

   http://lkml.indiana.edu/hypermail/linux/kernel/0704.2/2174.html

The bkl is heavily used in this filesystem to prevent from
concurrent write accesses on the filesystem.

Reiserfs makes a deep use of the specific properties of the Bkl:

- It can be acqquired recursively by a same task
- It is released on the schedule() calls and reacquired when schedule() returns

The two properties above are a roadmap for the reiserfs write locking so it's
very hard to simply replace it with a common mutex.

- We need a recursive-able locking unless we want to restructure several blocks
  of the code.
- We need to identify the sites where the bkl was implictly relaxed
  (schedule, wait, sync, etc...) so that we can in turn release and
  reacquire our new lock explicitly.
  Such implicit releases of the lock are often required to let other
  resources producer/consumer do their job or we can suffer unexpected
  starvations or deadlocks.

So the new lock that replaces the bkl here is a per superblock mutex with a
specific property: it can be acquired recursively by a same task, like the
bkl.

For such purpose, we integrate a lock owner and a lock depth field on the
superblock information structure.

The first axis on this patch is to turn reiserfs_write_(un)lock() function
into a wrapper to manage this mutex. Also some explicit calls to
lock_kernel() have been converted to reiserfs_write_lock() helpers.

The second axis is to find the important blocking sites (schedule...(),
wait_on_buffer(), sync_dirty_buffer(), etc...) and then apply an explicit
release of the write lock on these locations before blocking. Then we can
safely wait for those who can give us resources or those who need some.
Typically this is a fight between the current writer, the reiserfs workqueue
(aka the async commiter) and the pdflush threads.

The third axis is a consequence of the second. The write lock is usually
on top of a lock dependency chain which can include the journal lock, the
flush lock or the commit lock. So it's dangerous to release and trying to
reacquire the write lock while we still hold other locks.

This is fine with the bkl:

      T1                       T2

lock_kernel()
    mutex_lock(A)
    unlock_kernel()
    // do something
                            lock_kernel()
                                mutex_lock(A) -> already locked by T1
                                schedule() (and then unlock_kernel())
    lock_kernel()
    mutex_unlock(A)
    ....

This is not fine with a mutex:

      T1                       T2

mutex_lock(write)
    mutex_lock(A)
    mutex_unlock(write)
    // do something
                           mutex_lock(write)
                              mutex_lock(A) -> already locked by T1
                              schedule()

    mutex_lock(write) -> already locked by T2
    deadlock

The solution in this patch is to provide a helper which releases the write
lock and sleep a bit if we can't lock a mutex that depend on it. It's another
simulation of the bkl behaviour.

The last axis is to locate the fs callbacks that are called with the bkl held,
according to Documentation/filesystem/Locking.

Those are:

- reiserfs_remount
- reiserfs_fill_super
- reiserfs_put_super

Reiserfs didn't need to explicitly lock because of the context of these callbacks.
But now we must take care of that with the new locking.

After this patch, reiserfs suffers from a slight performance regression (for now).
On UP, a high volume write with dd reports an average of 27 MB/s instead
of 30 MB/s without the patch applied.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Bron Gondwana <brong@fastmail.fm>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
LKML-Reference: <1239070789-13354-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/Makefile           |   2 +-
 fs/reiserfs/bitmap.c           |   2 +
 fs/reiserfs/dir.c              |   8 +++
 fs/reiserfs/fix_node.c         |  10 +++
 fs/reiserfs/inode.c            |  23 +++++--
 fs/reiserfs/ioctl.c            |   6 +-
 fs/reiserfs/journal.c          | 134 ++++++++++++++++++++++++++++++++---------
 fs/reiserfs/lock.c             |  63 +++++++++++++++++++
 fs/reiserfs/resize.c           |   2 +
 fs/reiserfs/stree.c            |   2 +
 fs/reiserfs/super.c            |  37 +++++++++---
 include/linux/reiserfs_fs.h    |  12 ++--
 include/linux/reiserfs_fs_sb.h |   9 +++
 13 files changed, 261 insertions(+), 49 deletions(-)
 create mode 100644 fs/reiserfs/lock.c

(limited to 'include/linux')

diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 7c5ab6330dd6..6a9e30c041dd 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
 reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
 		 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
 		 hashes.o tail_conversion.o journal.o resize.o \
-		 item_ops.o ioctl.o procfs.o xattr.o
+		 item_ops.o ioctl.o procfs.o xattr.o lock.o
 
 ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
 reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e716161ab325..147033461b87 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1256,7 +1256,9 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
 	else {
 		if (buffer_locked(bh)) {
 			PROC_INFO_INC(sb, scan_bitmap.wait);
+			reiserfs_write_unlock(sb);
 			__wait_on_buffer(bh);
+			reiserfs_write_lock(sb);
 		}
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6d2668fdc384..17f31ad379c8 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
 				// user space buffer is swapped out. At that time
 				// entry can move to somewhere else
 				memcpy(local_buf, d_name, d_reclen);
+
+				/*
+				 * Since filldir might sleep, we can release
+				 * the write lock here for other waiters
+				 */
+				reiserfs_write_unlock(inode->i_sb);
 				if (filldir
 				    (dirent, local_buf, d_reclen, d_off, d_ino,
 				     DT_UNKNOWN) < 0) {
+					reiserfs_write_lock(inode->i_sb);
 					if (local_buf != small_buf) {
 						kfree(local_buf);
 					}
 					goto end;
 				}
+				reiserfs_write_lock(inode->i_sb);
 				if (local_buf != small_buf) {
 					kfree(local_buf);
 				}
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5e5a4e6fbaf8..bf5f2cbdb063 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -1022,7 +1022,11 @@ static int get_far_parent(struct tree_balance *tb,
 	/* Check whether the common parent is locked. */
 
 	if (buffer_locked(*pcom_father)) {
+
+		/* Release the write lock while the buffer is busy */
+		reiserfs_write_unlock(tb->tb_sb);
 		__wait_on_buffer(*pcom_father);
+		reiserfs_write_lock(tb->tb_sb);
 		if (FILESYSTEM_CHANGED_TB(tb)) {
 			brelse(*pcom_father);
 			return REPEAT_SEARCH;
@@ -1927,7 +1931,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
 		return REPEAT_SEARCH;
 
 	if (buffer_locked(bh)) {
+		reiserfs_write_unlock(tb->tb_sb);
 		__wait_on_buffer(bh);
+		reiserfs_write_lock(tb->tb_sb);
 		if (FILESYSTEM_CHANGED_TB(tb))
 			return REPEAT_SEARCH;
 	}
@@ -2278,7 +2284,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
 				    REPEAT_SEARCH : CARRY_ON;
 			}
 #endif
+			reiserfs_write_unlock(tb->tb_sb);
 			__wait_on_buffer(locked);
+			reiserfs_write_lock(tb->tb_sb);
 			if (FILESYSTEM_CHANGED_TB(tb))
 				return REPEAT_SEARCH;
 		}
@@ -2349,7 +2357,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
 
 	/* if it possible in indirect_to_direct conversion */
 	if (buffer_locked(tbS0)) {
+		reiserfs_write_unlock(tb->tb_sb);
 		__wait_on_buffer(tbS0);
+		reiserfs_write_lock(tb->tb_sb);
 		if (FILESYSTEM_CHANGED_TB(tb))
 			return REPEAT_SEARCH;
 	}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a14d6cd9eeda..1893c8198439 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -489,10 +489,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
 	   disappeared */
 	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
 		int err;
-		lock_kernel();
+
+		reiserfs_write_lock(inode->i_sb);
+
 		err = reiserfs_commit_for_inode(inode);
 		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
-		unlock_kernel();
+
+		reiserfs_write_unlock(inode->i_sb);
+
 		if (err < 0)
 			ret = err;
 	}
@@ -616,7 +620,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
 	loff_t new_offset =
 	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
 
-	/* bad.... */
 	reiserfs_write_lock(inode->i_sb);
 	version = get_inode_item_key_version(inode);
 
@@ -997,10 +1000,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
 			if (retval)
 				goto failure;
 		}
-		/* inserting indirect pointers for a hole can take a
-		 ** long time.  reschedule if needed
+		/*
+		 * inserting indirect pointers for a hole can take a
+		 * long time.  reschedule if needed and also release the write
+		 * lock for others.
 		 */
+		reiserfs_write_unlock(inode->i_sb);
 		cond_resched();
+		reiserfs_write_lock(inode->i_sb);
 
 		retval = search_for_position_by_key(inode->i_sb, &key, &path);
 		if (retval == IO_ERROR) {
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
 	int ret;
 	int old_ref = 0;
 
+	reiserfs_write_unlock(inode->i_sb);
 	reiserfs_wait_on_write_block(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
+
 	fix_tail_page_for_writing(page);
 	if (reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th;
@@ -2758,7 +2768,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 	int update_sd = 0;
 	struct reiserfs_transaction_handle *th = NULL;
 
+	reiserfs_write_unlock(inode->i_sb);
 	reiserfs_wait_on_write_block(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
+
 	if (reiserfs_transaction_running(inode->i_sb)) {
 		th = current->journal_info;
 	}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0ccc3fdda7bf..5e40b0cd4c3d 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -141,9 +141,11 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 	default:
 		return -ENOIOCTLCMD;
 	}
-	lock_kernel();
+
+	reiserfs_write_lock(inode->i_sb);
 	ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-	unlock_kernel();
+	reiserfs_write_unlock(inode->i_sb);
+
 	return ret;
 }
 #endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39c..438c71f0bc91 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
 	clear_buffer_journal_restore_dirty(bh);
 }
 
-/* utility function to force a BUG if it is called without the big
-** kernel lock held.  caller is the string printed just before calling BUG()
-*/
-void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
-{
-#ifdef CONFIG_SMP
-	if (current->lock_depth < 0) {
-		reiserfs_panic(sb, "journal-1", "%s called without kernel "
-			       "lock held", caller);
-	}
-#else
-	;
-#endif
-}
-
 /* return a cnode with same dev, block number and size in table, or null if not found */
 static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 								  super_block
@@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 	journal_hash(table, cn->sb, cn->blocknr) = cn;
 }
 
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+			       struct super_block *s)
+{
+	while (!mutex_trylock(m)) {
+		reiserfs_write_unlock(s);
+		schedule();
+		reiserfs_write_lock(s);
+	}
+}
+
 /* lock the current transaction */
 static inline void lock_journal(struct super_block *sb)
 {
 	PROC_INFO_INC(sb, journal.lock_journal);
-	mutex_lock(&SB_JOURNAL(sb)->j_mutex);
+
+	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
 }
 
 /* unlock the current transaction */
@@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s,
 		disable_barrier(s);
 		set_buffer_uptodate(bh);
 		set_buffer_dirty(bh);
+		reiserfs_write_unlock(s);
 		sync_dirty_buffer(bh);
+		reiserfs_write_lock(s);
 	}
 }
 
@@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
 {
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
-	if (atomic_read(&j->j_async_throttle))
+
+	if (atomic_read(&j->j_async_throttle)) {
+		reiserfs_write_unlock(s);
 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
+		reiserfs_write_lock(s);
+	}
+
 	return 0;
 }
 
@@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s,
 	}
 
 	/* make sure nobody is trying to flush this one at the same time */
-	mutex_lock(&jl->j_commit_mutex);
+	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
+
 	if (!journal_list_still_alive(s, trans_id)) {
 		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
@@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s,
 
 	if (!list_empty(&jl->j_bh_list)) {
 		int ret;
-		unlock_kernel();
+
+		/*
+		 * We might sleep in numerous places inside
+		 * write_ordered_buffers. Relax the write lock.
+		 */
+		reiserfs_write_unlock(s);
 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
 					    journal, jl, &jl->j_bh_list);
 		if (ret < 0 && retval == 0)
 			retval = ret;
-		lock_kernel();
+		reiserfs_write_lock(s);
 	}
 	BUG_ON(!list_empty(&jl->j_bh_list));
 	/*
@@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s,
 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
 		tbh = journal_find_get_block(s, bn);
+
+		reiserfs_write_unlock(s);
 		wait_on_buffer(tbh);
+		reiserfs_write_lock(s);
 		// since we're using ll_rw_blk above, it might have skipped over
 		// a locked buffer.  Double check here
 		//
-		if (buffer_dirty(tbh))	/* redundant, sync_dirty_buffer() checks */
+		/* redundant, sync_dirty_buffer() checks */
+		if (buffer_dirty(tbh)) {
+			reiserfs_write_unlock(s);
 			sync_dirty_buffer(tbh);
+			reiserfs_write_lock(s);
+		}
 		if (unlikely(!buffer_uptodate(tbh))) {
 #ifdef CONFIG_REISERFS_CHECK
 			reiserfs_warning(s, "journal-601",
@@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s,
 			if (buffer_dirty(jl->j_commit_bh))
 				BUG();
 			mark_buffer_dirty(jl->j_commit_bh) ;
+			reiserfs_write_unlock(s);
 			sync_dirty_buffer(jl->j_commit_bh) ;
+			reiserfs_write_lock(s);
 		}
-	} else
+	} else {
+		reiserfs_write_unlock(s);
 		wait_on_buffer(jl->j_commit_bh);
+		reiserfs_write_lock(s);
+	}
 
 	check_barrier_completion(s, jl->j_commit_bh);
 
@@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb,
 
 	if (trans_id >= journal->j_last_flush_trans_id) {
 		if (buffer_locked((journal->j_header_bh))) {
+			reiserfs_write_unlock(sb);
 			wait_on_buffer((journal->j_header_bh));
+			reiserfs_write_lock(sb);
 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
 				reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb,
 				disable_barrier(sb);
 				goto sync;
 			}
+			reiserfs_write_unlock(sb);
 			wait_on_buffer(journal->j_header_bh);
+			reiserfs_write_lock(sb);
 			check_barrier_completion(sb, journal->j_header_bh);
 		} else {
 		      sync:
 			set_buffer_dirty(journal->j_header_bh);
+			reiserfs_write_unlock(sb);
 			sync_dirty_buffer(journal->j_header_bh);
+			reiserfs_write_lock(sb);
 		}
 		if (!buffer_uptodate(journal->j_header_bh)) {
 			reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s,
 
 	/* if flushall == 0, the lock is already held */
 	if (flushall) {
-		mutex_lock(&journal->j_flush_mutex);
+		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
 		BUG();
 	}
@@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s,
 					reiserfs_panic(s, "journal-1011",
 						       "cn->bh is NULL");
 				}
+
+				reiserfs_write_unlock(s);
 				wait_on_buffer(cn->bh);
+				reiserfs_write_lock(s);
+
 				if (!cn->bh) {
 					reiserfs_panic(s, "journal-1012",
 						       "cn->bh is NULL");
@@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
 	reiserfs_mounted_fs_count--;
 	/* wait for all commits to finish */
 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
+
+	/*
+	 * We must release the write lock here because
+	 * the workqueue job (flush_async_commit) needs this lock
+	 */
+	reiserfs_write_unlock(sb);
 	flush_workqueue(commit_wq);
+
 	if (!reiserfs_mounted_fs_count) {
 		destroy_workqueue(commit_wq);
 		commit_wq = NULL;
 	}
+	reiserfs_write_lock(sb);
 
 	free_journal_ram(sb);
 
@@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb,
 	/* read in the log blocks, memcpy to the corresponding real block */
 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
+
+		reiserfs_write_unlock(sb);
 		wait_on_buffer(log_blocks[i]);
+		reiserfs_write_lock(sb);
+
 		if (!buffer_uptodate(log_blocks[i])) {
 			reiserfs_warning(sb, "journal-1212",
 					 "REPLAY FAILURE fsck required! "
@@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s)
 	init_waitqueue_entry(&wait, current);
 	add_wait_queue(&journal->j_join_wait, &wait);
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
+	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
+		reiserfs_write_unlock(s);
 		schedule();
+		reiserfs_write_lock(s);
+	}
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&journal->j_join_wait, &wait);
 }
@@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	unsigned long bcount = journal->j_bcount;
 	while (1) {
+		reiserfs_write_unlock(sb);
 		schedule_timeout_uninterruptible(1);
+		reiserfs_write_lock(sb);
 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
 		while ((atomic_read(&journal->j_wcount) > 0 ||
 			atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
 
 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
 		unlock_journal(sb);
+		reiserfs_write_unlock(sb);
 		reiserfs_wait_on_write_block(sb);
+		reiserfs_write_lock(sb);
 		PROC_INFO_INC(sb, journal.journal_relock_writers);
 		goto relock;
 	}
@@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work)
 	struct reiserfs_journal_list *jl;
 	struct list_head *entry;
 
-	lock_kernel();
+	reiserfs_write_lock(sb);
 	if (!list_empty(&journal->j_journal_list)) {
 		/* last entry is the youngest, commit it and you get everything */
 		entry = journal->j_journal_list.prev;
 		jl = JOURNAL_LIST_ENTRY(entry);
 		flush_commit_list(sb, jl, 1);
 	}
-	unlock_kernel();
+	reiserfs_write_unlock(sb);
 }
 
 /*
@@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	 * the new transaction is fully setup, and we've already flushed the
 	 * ordered bh list
 	 */
-	mutex_lock(&jl->j_commit_mutex);
+	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
 
 	/* save the transaction id in case we need to commit it later */
 	commit_trans_id = jl->j_trans_id;
@@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	 * is lost.
 	 */
 	if (!list_empty(&jl->j_tail_bh_list)) {
-		unlock_kernel();
+		reiserfs_write_unlock(sb);
 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
 				      journal, jl, &jl->j_tail_bh_list);
-		lock_kernel();
+		reiserfs_write_lock(sb);
 	}
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
 	mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
new file mode 100644
index 000000000000..cdd8d9ef048e
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,63 @@
+#include <linux/reiserfs_fs.h>
+#include <linux/mutex.h>
+
+/*
+ * The previous reiserfs locking scheme was heavily based on
+ * the tricky properties of the Bkl:
+ *
+ * - it was acquired recursively by a same task
+ * - the performances relied on the release-while-schedule() property
+ *
+ * Now that we replace it by a mutex, we still want to keep the same
+ * recursive property to avoid big changes in the code structure.
+ * We use our own lock_owner here because the owner field on a mutex
+ * is only available in SMP or mutex debugging, also we only need this field
+ * for this mutex, no need for a system wide mutex facility.
+ *
+ * Also this lock is often released before a call that could block because
+ * reiserfs performances were partialy based on the release while schedule()
+ * property of the Bkl.
+ */
+void reiserfs_write_lock(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	if (sb_i->lock_owner != current) {
+		mutex_lock(&sb_i->lock);
+		sb_i->lock_owner = current;
+	}
+
+	/* No need to protect it, only the current task touches it */
+	sb_i->lock_depth++;
+}
+
+void reiserfs_write_unlock(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	/*
+	 * Are we unlocking without even holding the lock?
+	 * Such a situation could even raise a BUG() if we don't
+	 * want the data become corrupted
+	 */
+	WARN_ONCE(sb_i->lock_owner != current,
+		  "Superblock write lock imbalance");
+
+	if (--sb_i->lock_depth == -1) {
+		sb_i->lock_owner = NULL;
+		mutex_unlock(&sb_i->lock);
+	}
+}
+
+/*
+ * Utility function to force a BUG if it is called without the superblock
+ * write lock held.  caller is the string printed just before calling BUG()
+ */
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	if (sb_i->lock_depth < 0)
+		reiserfs_panic(sb, "%s called without kernel lock held %d",
+			       caller);
+}
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 18b315d3d104..b3a94d20f0fc 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
+			reiserfs_write_unlock(s);
 			sync_dirty_buffer(bh);
+			reiserfs_write_lock(s);
 			// update bitmap_info stuff
 			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
 			brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d036ee5b1c81..6bd99a99a652 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -629,7 +629,9 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,	/* Key to s
 				search_by_key_reada(sb, reada_bh,
 						    reada_blocks, reada_count);
 			ll_rw_block(READ, 1, &bh);
+			reiserfs_write_unlock(sb);
 			wait_on_buffer(bh);
+			reiserfs_write_lock(sb);
 			if (!buffer_uptodate(bh))
 				goto io_error;
 		} else {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..e1cfb80d0bf3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
-	lock_kernel();
+	reiserfs_write_lock(s);
 
 	if (s->s_dirt)
 		reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
 
 	reiserfs_proc_info_done(s);
 
+	reiserfs_write_unlock(s);
+	mutex_destroy(&REISERFS_SB(s)->lock);
 	kfree(s->s_fs_info);
 	s->s_fs_info = NULL;
-
-	unlock_kernel();
 }
 
 static struct kmem_cache *reiserfs_inode_cachep;
@@ -1168,11 +1168,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	unsigned int qfmt = 0;
 #ifdef CONFIG_QUOTA
 	int i;
+#endif
+
+	reiserfs_write_lock(s);
 
+#ifdef CONFIG_QUOTA
 	memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
 #endif
 
-	lock_kernel();
 	rs = SB_DISK_SUPER_BLOCK(s);
 
 	if (!reiserfs_parse_options
@@ -1295,12 +1298,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 
 out_ok:
 	replace_mount_options(s, new_opts);
-	unlock_kernel();
+	reiserfs_write_unlock(s);
 	return 0;
 
 out_err:
 	kfree(new_opts);
-	unlock_kernel();
+	reiserfs_write_unlock(s);
 	return err;
 }
 
@@ -1404,7 +1407,9 @@ static int read_super_block(struct super_block *s, int offset)
 static int reread_meta_blocks(struct super_block *s)
 {
 	ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
+	reiserfs_write_unlock(s);
 	wait_on_buffer(SB_BUFFER_WITH_SB(s));
+	reiserfs_write_lock(s);
 	if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
 		reiserfs_warning(s, "reiserfs-2504", "error reading the super");
 		return 1;
@@ -1613,7 +1618,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
 	if (!sbi) {
 		errval = -ENOMEM;
-		goto error;
+		goto error_alloc;
 	}
 	s->s_fs_info = sbi;
 	/* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1632,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	/* setup default block allocator options */
 	reiserfs_init_alloc_options(s);
 
+	mutex_init(&REISERFS_SB(s)->lock);
+	REISERFS_SB(s)->lock_depth = -1;
+
+	/*
+	 * This function is called with the bkl, which also was the old
+	 * locking used here.
+	 * do_journal_begin() will soon check if we hold the lock (ie: was the
+	 * bkl). This is likely because do_journal_begin() has several another
+	 * callers because at this time, it doesn't seem to be necessary to
+	 * protect against anything.
+	 * Anyway, let's be conservative and lock for now.
+	 */
+	reiserfs_write_lock(s);
+
 	jdev_name = NULL;
 	if (reiserfs_parse_options
 	    (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1871,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	init_waitqueue_head(&(sbi->s_wait));
 	spin_lock_init(&sbi->bitmap_lock);
 
+	reiserfs_write_unlock(s);
+
 	return (0);
 
 error:
+	reiserfs_write_unlock(s);
+error_alloc:
 	if (jinit_done) {	/* kill the commit thread, free journal ram */
 		journal_release_error(NULL, s);
 	}
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index dd31e7bae35c..e47328f51801 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -52,11 +52,13 @@
 #define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
 #define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
 
-/* Locking primitives */
-/* Right now we are still falling back to (un)lock_kernel, but eventually that
-   would evolve into real per-fs locks */
-#define reiserfs_write_lock( sb ) lock_kernel()
-#define reiserfs_write_unlock( sb ) unlock_kernel()
+/*
+ * Locking primitives. The write lock is a per superblock
+ * special mutex that has properties close to the Big Kernel Lock
+ * which was used in the previous locking scheme.
+ */
+void reiserfs_write_lock(struct super_block *s);
+void reiserfs_write_unlock(struct super_block *s);
 
 struct fid;
 
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index dab68bbed675..045c37213675 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/workqueue.h>
 #include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
 #endif
 
 typedef enum {
@@ -355,6 +357,13 @@ struct reiserfs_sb_info {
 	struct reiserfs_journal *s_journal;	/* pointer to journal information */
 	unsigned short s_mount_state;	/* reiserfs state (valid, invalid) */
 
+	/* Serialize writers access, replace the old bkl */
+	struct mutex lock;
+	/* Owner of the lock (can be recursive) */
+	struct task_struct *lock_owner;
+	/* Depth of the lock, start from -1 like the bkl */
+	int lock_depth;
+
 	/* Comment? -Hans */
 	void (*end_io_handler) (struct buffer_head *, int);
 	hashf_t s_hash_function;	/* pointer to function which is used
-- 
cgit v1.2.3


From daf88c898312a22b5385655bc6e0b064eaa2efba Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 14 Apr 2009 05:34:23 +0200
Subject: kill-the-BKL/reiserfs: provide a tool to lock only once the write
 lock

Sometimes we don't want to recursively hold the per superblock write
lock because we want to be sure it is actually released when we come
to sleep.

This patch introduces the necessary tools for that.

reiserfs_write_lock_once() does the same job than reiserfs_write_lock()
except that it won't try to acquire recursively the lock if the current
task already owns it. Also the lock_depth before the call of this function
is returned.

reiserfs_write_unlock_once() unlock only if reiserfs_write_lock_once()
returned a depth equal to -1, ie: only if it actually locked.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@texware.it>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
LKML-Reference: <1239680065-25013-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 26 ++++++++++++++++++++++++++
 include/linux/reiserfs_fs.h |  2 ++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index cdd8d9ef048e..cb1bba3802dd 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -49,6 +49,32 @@ void reiserfs_write_unlock(struct super_block *s)
 	}
 }
 
+/*
+ * If we already own the lock, just exit and don't increase the depth.
+ * Useful when we don't want to lock more than once.
+ *
+ * We always return the lock_depth we had before calling
+ * this function.
+ */
+int reiserfs_write_lock_once(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+	if (sb_i->lock_owner != current) {
+		mutex_lock(&sb_i->lock);
+		sb_i->lock_owner = current;
+		return sb_i->lock_depth++;
+	}
+
+	return sb_i->lock_depth;
+}
+
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
+{
+	if (lock_depth == -1)
+		reiserfs_write_unlock(s);
+}
+
 /*
  * Utility function to force a BUG if it is called without the superblock
  * write lock held.  caller is the string printed just before calling BUG()
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index e47328f51801..4a2df57c8b1d 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -59,6 +59,8 @@
  */
 void reiserfs_write_lock(struct super_block *s);
 void reiserfs_write_unlock(struct super_block *s);
+int reiserfs_write_lock_once(struct super_block *s);
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
 struct fid;
 
-- 
cgit v1.2.3


From f32049dc244f4d394c8faa161b4f13cb8c4f5c8c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 30 Apr 2009 22:05:25 +0200
Subject: kill-the-BKL/reiserfs: release write lock on fs_changed()

fs_changed() is a macro used by reiserfs to check whether its tree has been
rebalanced. It has been designed to check parallel changes on the tree after
calling a sleeping function, which released the Bkl.

fs_changed() also calls cond_resched(), so that if rescheduling is needed,
we are in the best place to do that, since we check if the tree has changed
just after (because of the bkl release on schedule()).

Even if we are not anymore using the Bkl, we still want to release the lock
while we reschedule, so that other waiters for the lock can acquire it safely,
because of the following __fs_changed() check.

[ Impact: release the reiserfs write lock when it is not needed ]

Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/reiserfs_fs.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4a2df57c8b1d..fa5dbf307c40 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1333,7 +1333,13 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
 #define get_generation(s) atomic_read (&fs_generation(s))
 #define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
 #define __fs_changed(gen,s) (gen != get_generation (s))
-#define fs_changed(gen,s) ({cond_resched(); __fs_changed(gen, s);})
+#define fs_changed(gen,s)		\
+({					\
+	reiserfs_write_unlock(s);	\
+	cond_resched();			\
+	reiserfs_write_lock(s);		\
+	__fs_changed(gen, s);		\
+})
 
 /***************************************************************************/
 /*                  FIXATE NODES                                           */
-- 
cgit v1.2.3


From e43d3f21c502dec786f2885a75e25859f18d6ffa Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 7 May 2009 22:51:20 +0200
Subject: kill-the-BKL/reiserfs: add reiserfs_cond_resched()

Usually, when we call cond_resched(), we want the write lock
to be released and then reacquired once we return from scheduling.
Not only does it follow the previous bkl based locking scheme, but
it also let other waiters to get the lock.

But if we aren't going to reschedule(), such as in !TIF_NEED_RESCHED
case, it's useless to release the lock. Worse, if we release and reacquire
the lock whereas it is not needed, we create useless contentions. Also
if someone takes the lock while we are modifying or reading the tree,
there are good chances we'll have to retry our operation, eg if the
block we were seeeking has moved.

So this patch introduces a helper which only unlock the write lock
if we are going to schedule.

[ Impact: prepare to inject less lock contention and less tree operation attempts ]

Reported-by: Andi Kleen <andi@firstfloor.org>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/reiserfs_fs.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index fa5dbf307c40..27f4ecc28180 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,19 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+/*
+ * When we schedule, we usually want to also release the write lock,
+ * according to the previous bkl based locking scheme of reiserfs.
+ */
+static inline void reiserfs_cond_resched(struct super_block *s)
+{
+	if (need_resched()) {
+		reiserfs_write_unlock(s);
+		schedule();
+		reiserfs_write_lock(s);
+	}
+}
+
 struct fid;
 
 /* in reading the #defines, it may help to understand that they employ
-- 
cgit v1.2.3


From d663af807d8bb226394cb7e02f4665f6141a8140 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 7 May 2009 23:25:29 +0200
Subject: kill-the-bkl/reiserfs: conditionaly release the write lock on
 fs_changed()

The goal of fs_changed() is to check whether the tree changed during a
schedule(). This is a BKL legacy.

A recent patch added an explicit unconditional release/reacquire of the
write lock around the cond_resched() called inside fs_changed.

But it's wasteful to unconditionally do that, we are creating superfluous
lock contention in !TIF_NEED_RESCHED case.

This patch manage that by calling reiserfs_cond_resched() from fs_changed()
which only releases the lock if we are going to reschedule.

[ Impact: inject less lock contention and tree job retries ]

Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/reiserfs_fs.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 27f4ecc28180..508fb523863e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1348,9 +1348,7 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
 #define __fs_changed(gen,s) (gen != get_generation (s))
 #define fs_changed(gen,s)		\
 ({					\
-	reiserfs_write_unlock(s);	\
-	cond_resched();			\
-	reiserfs_write_lock(s);		\
+	reiserfs_cond_resched(s);	\
 	__fs_changed(gen, s);		\
 })
 
-- 
cgit v1.2.3


From c72e05756b900b3be24cd73a16de52bab80984c0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 May 2009 18:12:08 +0200
Subject: kill-the-bkl/reiserfs: acquire the inode mutex safely

While searching a pathname, an inode mutex can be acquired
in do_lookup() which calls reiserfs_lookup() which in turn
acquires the write lock.

On the other side reiserfs_fill_super() can acquire the write_lock
and then call reiserfs_lookup_privroot() which can acquire an
inode mutex (the root of the mount point).

So we theoretically risk an AB - BA lock inversion that could lead
to a deadlock.

As for other lock dependencies found since the bkl to mutex
conversion, the fix is to use reiserfs_mutex_lock_safe() which
drops the lock dependency to the write lock.

[ Impact: fix a possible deadlock with reiserfs ]

Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 fs/reiserfs/journal.c       | 34 ----------------------------------
 fs/reiserfs/xattr.c         |  4 ++--
 include/linux/reiserfs_fs.h | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e9a972bd0323..d23d6d7a45a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -537,40 +537,6 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 	journal_hash(table, cn->sb, cn->blocknr) = cn;
 }
 
-/*
- * Several mutexes depend on the write lock.
- * However sometimes we want to relax the write lock while we hold
- * these mutexes, according to the release/reacquire on schedule()
- * properties of the Bkl that were used.
- * Reiserfs performances and locking were based on this scheme.
- * Now that the write lock is a mutex and not the bkl anymore, doing so
- * may result in a deadlock:
- *
- * A acquire write_lock
- * A acquire j_commit_mutex
- * A release write_lock and wait for something
- * B acquire write_lock
- * B can't acquire j_commit_mutex and sleep
- * A can't acquire write lock anymore
- * deadlock
- *
- * What we do here is avoiding such deadlock by playing the same game
- * than the Bkl: if we can't acquire a mutex that depends on the write lock,
- * we release the write lock, wait a bit and then retry.
- *
- * The mutexes concerned by this hack are:
- * - The commit mutex of a journal list
- * - The flush mutex
- * - The journal lock
- */
-static inline void reiserfs_mutex_lock_safe(struct mutex *m,
-			       struct super_block *s)
-{
-	reiserfs_write_unlock(s);
-	mutex_lock(m);
-	reiserfs_write_lock(s);
-}
-
 /* lock the current transaction */
 static inline void lock_journal(struct super_block *sb)
 {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6925b835a43b..59870a4751cc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
 	int err = 0;
 
 	/* If we don't have the privroot located yet - go find it */
-	mutex_lock(&s->s_root->d_inode->i_mutex);
+	reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
 	dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
@@ -1011,7 +1011,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 
 	if (privroot->d_inode) {
 		s->s_xattr = reiserfs_xattr_handlers;
-		mutex_lock(&privroot->d_inode->i_mutex);
+		reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
 		if (!REISERFS_SB(s)->xattr_root) {
 			struct dentry *dentry;
 			dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 508fb523863e..a498d9266d8c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,41 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ * - The inode mutex
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock(m);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From 08f14fc8963e585e65b71212ce8050607b9b6c36 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 May 2009 19:10:38 +0200
Subject: kill-the-bkl/reiserfs: move the concurrent tree accesses checks per
 superblock

When do_balance() balances the tree, a trick is performed to
provide the ability for other tree writers/readers to check whether
do_balance() is executing concurrently (requires CONFIG_REISERFS_CHECK).

This is done to protect concurrent accesses to the tree. The trick
is the following:

When do_balance is called, a unique global variable called cur_tb
takes a pointer to the current tree to be rebalanced.
Once do_balance finishes its work, cur_tb takes the NULL value.

Then, concurrent tree readers/writers just have to check the value
of cur_tb to ensure do_balance isn't executing concurrently.
If it is, then it proves that schedule() occured on do_balance(),
which then relaxed the bkl that protected the tree.

Now that the bkl has be turned into a mutex, this check is still
fine even though do_balance() becomes preemptible: the write lock
will not be automatically released on schedule(), so the tree is
still protected.

But this is only fine if we have a single reiserfs mountpoint.
Indeed, because the bkl is a global lock, it didn't allowed
concurrent executions between a tree reader/writer in a mount point
and a do_balance() on another tree from another mountpoint.

So assuming all these readers/writers weren't supposed to be
reentrant, the current check now sometimes detect false positives with
the current per-superblock mutex which allows this reentrancy.

This patch keeps the concurrent tree accesses check but moves it
per superblock, so that only trees from a same mount point are
checked to be not accessed concurrently.

[ Impact: fix spurious panic while running several reiserfs mount-points ]

Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 fs/reiserfs/do_balan.c         | 17 +++++------------
 fs/reiserfs/fix_node.c         |  5 +----
 fs/reiserfs/prints.c           |  4 ----
 fs/reiserfs/stree.c            |  5 +----
 include/linux/reiserfs_fs_sb.h | 11 +++++++++++
 5 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 128d3f7c8aa5..60c080440661 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,14 +21,6 @@
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 
-#ifdef CONFIG_REISERFS_CHECK
-
-struct tree_balance *cur_tb = NULL;	/* detects whether more than one
-					   copy of tb exists as a means
-					   of checking whether schedule
-					   is interrupting do_balance */
-#endif
-
 static inline void buffer_info_init_left(struct tree_balance *tb,
                                          struct buffer_info *bi)
 {
@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb)
 {
 	int retval = 0;
 
-	if (cur_tb) {
+	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
 		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
 			       "occurred based on cur_tb not being null at "
 			       "this point in code. do_balance cannot properly "
-			       "handle schedule occurring while it runs.");
+			       "handle concurrent tree accesses on a same "
+			       "mount point.");
 	}
 
 	/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
 	     "check");*/
 	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
 #ifdef CONFIG_REISERFS_CHECK
-	cur_tb = tb;
+	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
 #endif
 }
 
@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb)
 #ifdef CONFIG_REISERFS_CHECK
 	check_leaf_level(tb);
 	check_internal_levels(tb);
-	cur_tb = NULL;
+	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
 #endif
 
 	/* reiserfs_free_block is no longer schedule safe.  So, we need to
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 3a685e3f754f..d2f31330dcae 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
 	return needed_nodes;
 }
 
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
 
 /* Set parameters for balancing.
  * Performs write of results of analysis of balancing into structure tb,
@@ -2368,7 +2365,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
 			return REPEAT_SEARCH;
 	}
 #ifdef CONFIG_REISERFS_CHECK
-	if (cur_tb) {
+	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
 		print_cur_tb("fix_nodes");
 		reiserfs_panic(tb->tb_sb, "PAP-8305",
 			       "there is pending do_balance");
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 536eacaeb710..adbc6f538515 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
 
    .  */
 
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
-
 void __reiserfs_panic(struct super_block *sb, const char *id,
 		      const char *function, const char *fmt, ...)
 {
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 6b025a42d510..5fa7118f04e1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -222,9 +222,6 @@ static inline int bin_search(const void *key,	/* Key to search for. */
 	return ITEM_NOT_FOUND;
 }
 
-#ifdef CONFIG_REISERFS_CHECK
-extern struct tree_balance *cur_tb;
-#endif
 
 /* Minimal possible key. It is never in the tree. */
 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
@@ -711,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,	/* Key to s
 		       !key_in_buffer(search_path, key, sb),
 		       "PAP-5130: key is not in the buffer");
 #ifdef CONFIG_REISERFS_CHECK
-		if (cur_tb) {
+		if (REISERFS_SB(sb)->cur_tb) {
 			print_cur_tb("5140");
 			reiserfs_panic(sb, "PAP-5140",
 				       "schedule occurred in do_balance!");
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 045c37213675..52c83b6a758a 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -417,6 +417,17 @@ struct reiserfs_sb_info {
 	char *s_qf_names[MAXQUOTAS];
 	int s_jquota_fmt;
 #endif
+#ifdef CONFIG_REISERFS_CHECK
+
+	struct tree_balance *cur_tb;	/*
+					 * Detects whether more than one
+					 * copy of tb exists per superblock
+					 * as a means of checking whether
+					 * do_balance is executing concurrently
+					 * against another tree reader/writer
+					 * on a same mount point.
+					 */
+#endif
 };
 
 /* Definitions of reiserfs on-disk properties: */
-- 
cgit v1.2.3


From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 22 Sep 2009 14:44:11 +0200
Subject: ratelimit: Use per ratelimit context locking

I'd like to use printk_ratelimit() in atomic context, but that's
not possible right now due to the spinlock usage this commit
introduced more than a year ago:

  717115e: printk ratelimiting rewrite

As a first step push the lock into the ratelimit state structure.
This allows us to deal with locking failures to be considered as an
event related to that state being too busy.

Also clean up the code a bit (without changing functionality):

 - tidy up the definitions

 - clean up the code flow

This also shrinks the code a tiny bit:

   text	   data	    bss	    dec	    hex	filename
    264	      0	      4	    268	    10c	ratelimit.o.before
    255	      0	      0	    255	     ff	ratelimit.o.after

( Whole-kernel data size got a bit larger, because we have
  two ratelimit-state data structures right now. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ratelimit.h | 30 ++++++++++++++++++++----------
 lib/ratelimit.c           | 29 +++++++++++++----------------
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 00044b856453..187bc16c1f15 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -1,20 +1,30 @@
 #ifndef _LINUX_RATELIMIT_H
 #define _LINUX_RATELIMIT_H
+
 #include <linux/param.h>
+#include <linux/spinlock_types.h>
 
-#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
-#define DEFAULT_RATELIMIT_BURST 10
+#define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
+#define DEFAULT_RATELIMIT_BURST		10
 
 struct ratelimit_state {
-	int interval;
-	int burst;
-	int printed;
-	int missed;
-	unsigned long begin;
+	spinlock_t	lock;		/* protect the state */
+
+	int		interval;
+	int		burst;
+	int		printed;
+	int		missed;
+	unsigned long	begin;
 };
 
-#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
-		struct ratelimit_state name = {interval, burst,}
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
+									\
+	struct ratelimit_state name = {					\
+		.lock		= __SPIN_LOCK_UNLOCKED(name.lock),	\
+		.interval	= interval_init,			\
+		.burst		= burst_init,				\
+	}
 
 extern int __ratelimit(struct ratelimit_state *rs);
-#endif
+
+#endif /* _LINUX_RATELIMIT_H */
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 26187edcc7ea..0e2c28e8a0ca 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -7,15 +7,12 @@
  * parameter. Now every user can use their own standalone ratelimit_state.
  *
  * This file is released under the GPLv2.
- *
  */
 
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
-static DEFINE_SPINLOCK(ratelimit_lock);
-
 /*
  * __ratelimit - rate limiting
  * @rs: ratelimit_state data
@@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock);
 int __ratelimit(struct ratelimit_state *rs)
 {
 	unsigned long flags;
+	int ret;
 
 	if (!rs->interval)
 		return 1;
 
-	spin_lock_irqsave(&ratelimit_lock, flags);
+	spin_lock_irqsave(&rs->lock, flags);
 	if (!rs->begin)
 		rs->begin = jiffies;
 
@@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs)
 		if (rs->missed)
 			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
 				__func__, rs->missed);
-		rs->begin = 0;
+		rs->begin   = 0;
 		rs->printed = 0;
-		rs->missed = 0;
+		rs->missed  = 0;
 	}
-	if (rs->burst && rs->burst > rs->printed)
-		goto print;
-
-	rs->missed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 0;
+	if (rs->burst && rs->burst > rs->printed) {
+		rs->printed++;
+		ret = 1;
+	} else {
+		rs->missed++;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&rs->lock, flags);
 
-print:
-	rs->printed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL(__ratelimit);
-- 
cgit v1.2.3


From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 22 Sep 2009 16:18:09 +0200
Subject: printk: Remove ratelimit.h from kernel.h

Decouple kernel.h from ratelimit.h: the global declaration of
printk's ratelimit_state is not needed, and it leads to messy
circular dependencies due to ratelimit.h's (new) adding of a
spinlock_types.h include.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h     | 2 --
 include/linux/net.h        | 1 +
 kernel/printk.c            | 1 +
 kernel/sysctl.c            | 3 +++
 lib/ratelimit.c            | 2 +-
 net/core/sysctl_net_core.c | 2 ++
 net/core/utils.c           | 2 ++
 7 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0899a8..3305f33201be 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,7 +15,6 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
-#include <linux/ratelimit.h>
 #include <linux/dynamic_debug.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
@@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
diff --git a/include/linux/net.h b/include/linux/net.h
index 9040a10584f7..df20f680f455 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
+#include <linux/ratelimit.h>
 extern struct ratelimit_state net_ratelimit_state;
 #endif
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 602033acd6c7..b997c893cdcf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
+#include <linux/ratelimit.h>
 
 #include <asm/uaccess.h>
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba684a4..6c37048b9db9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -37,6 +37,7 @@
 #include <linux/sysrq.h>
 #include <linux/highuid.h>
 #include <linux/writeback.h>
+#include <linux/ratelimit.h>
 #include <linux/hugetlb.h>
 #include <linux/initrd.h>
 #include <linux/key.h>
@@ -155,6 +156,8 @@ extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
 #endif
 
+extern struct ratelimit_state printk_ratelimit_state;
+
 #ifdef CONFIG_RT_MUTEXES
 extern int max_lock_depth;
 #endif
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 69bfcacda16d..5551731ae1d4 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -9,7 +9,7 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/kernel.h>
+#include <linux/ratelimit.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7db1de0497c6..887c03c4e3c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,7 +10,9 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/netdevice.h>
+#include <linux/ratelimit.h>
 #include <linux/init.h>
+
 #include <net/ip.h>
 #include <net/sock.h>
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 83221aee7084..838250241d26 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -24,6 +24,8 @@
 #include <linux/types.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
+#include <linux/ratelimit.h>
+
 #include <net/sock.h>
 
 #include <asm/byteorder.h>
-- 
cgit v1.2.3


From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 1 Aug 2009 01:34:24 +0200
Subject: tracing/bkl: Add bkl ftrace events

Add two events lock_kernel and unlock_kernel() to trace the bkl uses.
This opens the door for userspace tools to perform statistics about
the callsites that use it, dependencies with other locks (by pairing
the trace with lock events), use with recursivity and so on...

The {__reacquire,release}_kernel_lock() events are not traced because
these are called from schedule, thus the sched events are sufficient
to trace them.

Example of a trace:

hald-addon-stor-4152  [000]   165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put()
hald-addon-stor-4152  [000]   167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get()

How to get the callsites that acquire it recursively:

cd /debug/tracing/events/bkl
echo "lock_depth > 0" > filter

firefox-4951  [001]   206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode()

You can also filter by file and/or line.

v2: Use of FILTER_PTR_STRING attribute for files and lines fields to
    make them traceable.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h   | 19 ++++++++++++---
 include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/kernel_lock.c          | 11 +++++----
 3 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/bkl.h

(limited to 'include/linux')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 813be59bf345..d48cc77ba70d 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
+#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
+extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+
+#define lock_kernel()	{					\
+	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
+	_lock_kernel();						\
+}
+
+#define unlock_kernel()	{					\
+	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
+	_unlock_kernel();					\
+}
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()				do { } while(0)
-#define unlock_kernel()				do { } while(0)
+#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
+#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
new file mode 100644
index 000000000000..8abd620a490e
--- /dev/null
+++ b/include/trace/events/bkl.h
@@ -0,0 +1,61 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bkl
+
+#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BKL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(	int,		lock_depth		)
+		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
+		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
+		__field(	int,		line			)
+	),
+
+	TP_fast_assign(
+		/* We want to record the lock_depth after lock is acquired */
+		__entry->lock_depth = current->lock_depth + 1;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+TRACE_EVENT(unlock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(int,		lock_depth)
+		__field(const char *,	func)
+		__field(const char *,	file)
+		__field(int,		line)
+	),
+
+	TP_fast_assign(
+		__entry->lock_depth = current->lock_depth;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+#endif /* _TRACE_BKL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e3525..5c10b2e1fd08 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,11 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#define CREATE_TRACE_POINTS
+#include <linux/smp_lock.h>
 
 /*
  * The 'big kernel lock'
@@ -113,7 +114,7 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(void)
 {
 	int depth = current->lock_depth+1;
 	if (likely(!depth))
@@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void)
 	current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(void)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
-- 
cgit v1.2.3


From c62d81bcfe82526cc3da10cf4fc63faad368bc60 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Sun, 20 Sep 2009 23:28:04 +0200
Subject: mtd: use bbm.h in nand.h

This consolidates common code in nand.h and bbm.h. The
comments and data structures were the same, this keeps
the comment from nand.h as it fits 80 columns, while the one
in bbm.h did not.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/bbm.h  | 35 +++++++++++-----------
 include/linux/mtd/nand.h | 76 +-----------------------------------------------
 2 files changed, 19 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index fff8c53e5434..9c3757c5759d 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -19,22 +19,21 @@
 
 /**
  * struct nand_bbt_descr - bad block table descriptor
- * @options:		options for this descriptor
- * @pages:		the page(s) where we find the bbt, used with
- * 			option BBT_ABSPAGE when bbt is searched,
- * 			then we store the found bbts pages here.
- *			Its an array and supports up to 8 chips now
- * @offs:		offset of the pattern in the oob area of the page
- * @veroffs:		offset of the bbt version counter in the oob area of the page
- * @version:		version read from the bbt page during scan
- * @len:		length of the pattern, if 0 no pattern check is performed
- * @maxblocks:		maximum number of blocks to search for a bbt. This
- *			number of blocks is reserved at the end of the device
- *			where the tables are written.
- * @reserved_block_code: if non-0, this pattern denotes a reserved
- *			(rather than bad) block in the stored bbt
- * @pattern:		pattern to identify bad block table or factory marked
- *			good / bad blocks, can be NULL, if len = 0
+ * @options:	options for this descriptor
+ * @pages:	the page(s) where we find the bbt, used with option BBT_ABSPAGE
+ *		when bbt is searched, then we store the found bbts pages here.
+ *		Its an array and supports up to 8 chips now
+ * @offs:	offset of the pattern in the oob area of the page
+ * @veroffs:	offset of the bbt version counter in the oob are of the page
+ * @version:	version read from the bbt page during scan
+ * @len:	length of the pattern, if 0 no pattern check is performed
+ * @maxblocks:	maximum number of blocks to search for a bbt. This number of
+ *		blocks is reserved at the end of the device where the tables are
+ *		written.
+ * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than
+ *              bad) block in the stored bbt
+ * @pattern:	pattern to identify bad block table or factory marked good /
+ *		bad blocks, can be NULL, if len = 0
  *
  * Descriptor for the bad block table marker and the descriptor for the
  * pattern which identifies good and bad blocks. The assumption is made
@@ -90,7 +89,9 @@ struct nand_bbt_descr {
 /*
  * Constants for oob configuration
  */
-#define ONENAND_BADBLOCK_POS	0
+#define NAND_SMALL_BADBLOCK_POS		5
+#define NAND_LARGE_BADBLOCK_POS		0
+#define ONENAND_BADBLOCK_POS		0
 
 /*
  * Bad block scanning errors
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a232a9bdd62..d87ada538d17 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -21,6 +21,7 @@
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/bbm.h>
 
 struct mtd_info;
 /* Scan and identify a NAND device */
@@ -470,75 +471,6 @@ struct nand_manufacturers {
 extern struct nand_flash_dev nand_flash_ids[];
 extern struct nand_manufacturers nand_manuf_ids[];
 
-/**
- * struct nand_bbt_descr - bad block table descriptor
- * @options:	options for this descriptor
- * @pages:	the page(s) where we find the bbt, used with option BBT_ABSPAGE
- *		when bbt is searched, then we store the found bbts pages here.
- *		Its an array and supports up to 8 chips now
- * @offs:	offset of the pattern in the oob area of the page
- * @veroffs:	offset of the bbt version counter in the oob are of the page
- * @version:	version read from the bbt page during scan
- * @len:	length of the pattern, if 0 no pattern check is performed
- * @maxblocks:	maximum number of blocks to search for a bbt. This number of
- *		blocks is reserved at the end of the device where the tables are
- *		written.
- * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than
- *              bad) block in the stored bbt
- * @pattern:	pattern to identify bad block table or factory marked good /
- *		bad blocks, can be NULL, if len = 0
- *
- * Descriptor for the bad block table marker and the descriptor for the
- * pattern which identifies good and bad blocks. The assumption is made
- * that the pattern and the version count are always located in the oob area
- * of the first block.
- */
-struct nand_bbt_descr {
-	int	options;
-	int	pages[NAND_MAX_CHIPS];
-	int	offs;
-	int	veroffs;
-	uint8_t	version[NAND_MAX_CHIPS];
-	int	len;
-	int	maxblocks;
-	int	reserved_block_code;
-	uint8_t	*pattern;
-};
-
-/* Options for the bad block table descriptors */
-
-/* The number of bits used per block in the bbt on the device */
-#define NAND_BBT_NRBITS_MSK	0x0000000F
-#define NAND_BBT_1BIT		0x00000001
-#define NAND_BBT_2BIT		0x00000002
-#define NAND_BBT_4BIT		0x00000004
-#define NAND_BBT_8BIT		0x00000008
-/* The bad block table is in the last good block of the device */
-#define	NAND_BBT_LASTBLOCK	0x00000010
-/* The bbt is at the given page, else we must scan for the bbt */
-#define NAND_BBT_ABSPAGE	0x00000020
-/* The bbt is at the given page, else we must scan for the bbt */
-#define NAND_BBT_SEARCH		0x00000040
-/* bbt is stored per chip on multichip devices */
-#define NAND_BBT_PERCHIP	0x00000080
-/* bbt has a version counter at offset veroffs */
-#define NAND_BBT_VERSION	0x00000100
-/* Create a bbt if none axists */
-#define NAND_BBT_CREATE		0x00000200
-/* Search good / bad pattern through all pages of a block */
-#define NAND_BBT_SCANALLPAGES	0x00000400
-/* Scan block empty during good / bad block scan */
-#define NAND_BBT_SCANEMPTY	0x00000800
-/* Write bbt if neccecary */
-#define NAND_BBT_WRITE		0x00001000
-/* Read and write back block contents when writing bbt */
-#define NAND_BBT_SAVECONTENT	0x00002000
-/* Search good / bad pattern on the first and the second page */
-#define NAND_BBT_SCAN2NDPAGE	0x00004000
-
-/* The maximum number of blocks to scan for a bbt */
-#define NAND_BBT_SCAN_MAXBLOCKS	4
-
 extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int nand_update_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_default_bbt(struct mtd_info *mtd);
@@ -548,12 +480,6 @@ extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
 			size_t * retlen, uint8_t * buf);
 
-/*
-* Constants for oob configuration
-*/
-#define NAND_SMALL_BADBLOCK_POS		5
-#define NAND_LARGE_BADBLOCK_POS		0
-
 /**
  * struct platform_nand_chip - chip level device structure
  * @nr_chips:		max. number of chips to scan for
-- 
cgit v1.2.3


From 30631cb82d5c6c662d5ec682beaa834c1f9f0987 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Sun, 20 Sep 2009 23:28:14 +0200
Subject: mtd: unify status enum from three headers

nand.h, onenand.h and flashchip.h defined enumeration types
for chip status using the same symbolic names. This prevented
a board file to include more than one of them. In particular,
no nand and onenand platform devices could live in the same file.
This patch augments flashchip.h with a few status values in order
to cover all cases, so nand.h and onenand.h can use flstate_t
without declaring their own status enum.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/flashchip.h |  7 +++++++
 include/linux/mtd/nand.h      | 17 ++---------------
 include/linux/mtd/onenand.h   | 19 ++-----------------
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index d4f38c5fd44e..f350a4879f75 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -38,6 +38,13 @@ typedef enum {
 	FL_XIP_WHILE_ERASING,
 	FL_XIP_WHILE_WRITING,
 	FL_SHUTDOWN,
+	/* These 2 come from nand_state_t, which has been unified here */
+	FL_READING,
+	FL_CACHEDPRG,
+	/* These 2 come from onenand_state_t, which has been unified here */
+	FL_RESETING,
+	FL_OTPING,
+
 	FL_UNKNOWN
 } flstate_t;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index d87ada538d17..2476078a032f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -21,6 +21,7 @@
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
 
 struct mtd_info;
@@ -203,20 +204,6 @@ typedef enum {
 #define NAND_CI_CHIPNR_MSK	0x03
 #define NAND_CI_CELLTYPE_MSK	0x0C
 
-/*
- * nand_state_t - chip states
- * Enumeration for NAND flash chip state
- */
-typedef enum {
-	FL_READY,
-	FL_READING,
-	FL_WRITING,
-	FL_ERASING,
-	FL_SYNCING,
-	FL_CACHEDPRG,
-	FL_PM_SUSPENDED,
-} nand_state_t;
-
 /* Keep gcc happy */
 struct nand_chip;
 
@@ -403,7 +390,7 @@ struct nand_chip {
 	uint8_t		cellinfo;
 	int		badblockpos;
 
-	nand_state_t	state;
+	flstate_t	state;
 
 	uint8_t		*oob_poi;
 	struct nand_hw_control  *controller;
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 4e49f3350678..f57e29e17bb0 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -14,6 +14,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/completion.h>
+#include <linux/mtd/flashchip.h>
 #include <linux/mtd/onenand_regs.h>
 #include <linux/mtd/bbm.h>
 
@@ -25,22 +26,6 @@ extern int onenand_scan(struct mtd_info *mtd, int max_chips);
 /* Free resources held by the OneNAND device */
 extern void onenand_release(struct mtd_info *mtd);
 
-/*
- * onenand_state_t - chip states
- * Enumeration for OneNAND flash chip state
- */
-typedef enum {
-	FL_READY,
-	FL_READING,
-	FL_WRITING,
-	FL_ERASING,
-	FL_SYNCING,
-	FL_LOCKING,
-	FL_RESETING,
-	FL_OTPING,
-	FL_PM_SUSPENDED,
-} onenand_state_t;
-
 /**
  * struct onenand_bufferram - OneNAND BufferRAM Data
  * @blockpage:		block & page address in BufferRAM
@@ -137,7 +122,7 @@ struct onenand_chip {
 
 	spinlock_t		chip_lock;
 	wait_queue_head_t	wq;
-	onenand_state_t		state;
+	flstate_t		state;
 	unsigned char		*page_buf;
 	unsigned char		*oob_buf;
 
-- 
cgit v1.2.3


From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sat, 26 Sep 2009 14:33:01 +0200
Subject: x86: Use __builtin_object_size() to validate the buffer size for
 copy_from_user()

gcc (4.x) supports the __builtin_object_size() builtin, which
reports the size of an object that a pointer point to, when known
at compile time. If the buffer size is not known at compile time, a
constant -1 is returned.

This patch uses this feature to add a sanity check to
copy_from_user(); if the target buffer is known to be smaller than
the copy size, the copy is aborted and a WARNing is emitted in
memory debug mode.

These extra checks compile away when the object size is not known,
or if both the buffer size and the copy length are constants.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20090926143301.2c396b94@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 19 ++++++++++++++++++-
 arch/x86/include/asm/uaccess_64.h | 19 ++++++++++++++++++-
 arch/x86/kernel/x8664_ksyms_64.c  |  2 +-
 arch/x86/lib/copy_user_64.S       |  4 ++--
 arch/x86/lib/usercopy_32.c        |  4 ++--
 include/linux/compiler-gcc4.h     |  2 ++
 include/linux/compiler.h          |  4 ++++
 7 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 632fb44b4cb5..582d6aef7417 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
 
 unsigned long __must_check copy_to_user(void __user *to,
 					const void *from, unsigned long n);
-unsigned long __must_check copy_from_user(void *to,
+unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
+
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
 long __must_check strncpy_from_user(char *dst, const char __user *src,
 				    long count);
 long __must_check __strncpy_from_user(char *dst,
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index db24b215fc50..ce6fec7ce38d 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_to_user(void __user *to, const void *from, unsigned len);
 __must_check unsigned long
-copy_from_user(void *to, const void __user *from, unsigned len);
+_copy_from_user(void *to, const void __user *from, unsigned len);
 __must_check unsigned long
 copy_in_user(void __user *to, const void __user *from, unsigned len);
 
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
+
 static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 3909e3ba5ce3..a0cdd8cc1d67 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(copy_user_generic);
 EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(__copy_from_user_inatomic);
 
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 6ba0f7bb85ea..4be3c415b3e9 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -78,7 +78,7 @@ ENTRY(copy_to_user)
 ENDPROC(copy_to_user)
 
 /* Standard copy_from_user with segment limit checking */
-ENTRY(copy_from_user)
+ENTRY(_copy_from_user)
 	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rsi,%rcx
@@ -88,7 +88,7 @@ ENTRY(copy_from_user)
 	jae bad_from_user
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
-ENDPROC(copy_from_user)
+ENDPROC(_copy_from_user)
 
 ENTRY(copy_user_generic)
 	CFI_STARTPROC
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 1f118d462acc..8498684e45b0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user);
  * data to the requested size using zero bytes.
  */
 unsigned long
-copy_from_user(void *to, const void __user *from, unsigned long n)
+_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
 		n = __copy_from_user(to, from, n);
@@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 		memset(to, 0, n);
 	return n;
 }
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(_copy_from_user);
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 450fa597c94d..a3aef5d55dba 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -37,3 +37,5 @@
 #define __cold			__attribute__((__cold__))
 
 #endif
+
+#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..8e54108688f9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Compile time object size, -1 for unknown */
+#ifndef __compiletime_object_size
+# define __compiletime_object_size(obj) -1
+#endif
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 28 Sep 2009 17:12:49 +0200
Subject: tracing: Pushdown the bkl tracepoints calls

Currently we are calling the bkl tracepoint callbacks just before the
bkl lock/unlock operations, ie the tracepoint call is not inside a
lock_kernel() function but inside a lock_kernel() macro. Hence the
bkl trace event header must be included from smp_lock.h. This raises
some nasty circular header dependencies:

linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h
-> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h
-> linux/smp_lock.h

This results in incomplete event declarations, spurious event
definitions and other kind of funny behaviours.

This is hardly fixable without ugly workarounds. So instead, we push
the file name, line number and function name as lock_kernel()
parameters, so that we only deal with the trace event header from
lib/kernel_lock.c

This adds two parameters to lock_kernel() and unlock_kernel() but
it should be fine wrt to performances because this pair dos not seem
to be called in fast paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h | 28 +++++++++++++++-------------
 lib/kernel_lock.c        | 15 +++++++++++----
 2 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index d48cc77ba70d..2ea1dd1ba21c 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,7 +3,6 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
-#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc
+_lock_kernel(const char *func, const char *file, int line)
+__acquires(kernel_lock);
 
-#define lock_kernel()	{					\
-	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
-	_lock_kernel();						\
-}
+extern void __lockfunc
+_unlock_kernel(const char *func, const char *file, int line)
+__releases(kernel_lock);
 
-#define unlock_kernel()	{					\
-	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
-	_unlock_kernel();					\
-}
+#define lock_kernel() do {					\
+	_lock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
+
+#define unlock_kernel()	do {					\
+	_unlock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
-#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
+#define lock_kernel()
+#define unlock_kernel()
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5c10b2e1fd08..4ebfa5a164d7 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -8,9 +8,11 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
-#define CREATE_TRACE_POINTS
 #include <linux/smp_lock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
+
 /*
  * The 'big kernel lock'
  *
@@ -114,19 +116,24 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc _lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-	int depth = current->lock_depth+1;
+	int depth = current->lock_depth + 1;
+
+	trace_lock_kernel(func, file, line);
+
 	if (likely(!depth))
 		__lock_kernel();
 	current->lock_depth = depth;
 }
 
-void __lockfunc _unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
+
+	trace_unlock_kernel(func, file, line);
 }
 
 EXPORT_SYMBOL(_lock_kernel);
-- 
cgit v1.2.3


From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Sep 2009 13:05:23 +0200
Subject: x86: Turn the copy_from_user check into an (optional) compile time
 warning

A previous patch added the buffer size check to copy_from_user().

One of the things learned from analyzing the result of the previous
patch is that in general, gcc is really good at proving that the
code contains sufficient security checks to not need to do a
runtime check. But that for those cases where gcc could not prove
this, there was a relatively high percentage of real security
issues.

This patch turns the case of "gcc cannot prove" into a compile time
warning, as long as a sufficiently new gcc is in use that supports
this. The objective is that these warnings will trigger developers
checking new cases out before a security hole enters a linux kernel
release.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <20090930130523.348ae6c4@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 12 +++++++++---
 arch/x86/lib/usercopy_32.c        |  6 ++++++
 include/linux/compiler-gcc4.h     |  3 +++
 include/linux/compiler.h          |  4 ++++
 4 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 582d6aef7417..952f9e793c3e 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
 
+
+extern void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	__compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
 static inline unsigned long __must_check copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n)
@@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to,
 
 	if (likely(sz == -1 || sz >= n))
 		ret = _copy_from_user(to, from, n);
-#ifdef CONFIG_DEBUG_VM
 	else
-		WARN(1, "Buffer overflow detected!\n");
-#endif
+		copy_from_user_overflow();
+
 	return ret;
 }
 
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8498684e45b0..e218d5df85ff 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
 	return n;
 }
 EXPORT_SYMBOL(_copy_from_user);
+
+void copy_from_user_overflow(void)
+{
+	WARN(1, "Buffer overflow detected!\n");
+}
+EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index a3aef5d55dba..f1709c1f9eae 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -39,3 +39,6 @@
 #endif
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#if __GNUC_MINOR__ >= 4
+#define __compiletime_warning(message) __attribute__((warning(message)))
+#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8e54108688f9..950356311f12 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
 #endif
+#ifndef __compiletime_warning
+# define __compiletime_warning(message)
+#endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From 7c68af6e32c73992bad24107311f3433c89016e2 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 19 Sep 2009 09:40:22 +0300
Subject: core, x86: Add user return notifiers

Add a general per-cpu notifier that is called whenever the kernel is
about to return to userspace.  The notifier uses a thread_info flag
and existing checks, so there is no impact on user return or context
switch fast paths.

This will be used initially to speed up KVM task switching by lazily
updating MSRs.

Signed-off-by: Avi Kivity <avi@redhat.com>
LKML-Reference: <1253342422-13811-1-git-send-email-avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/Kconfig                         | 10 ++++++++
 arch/x86/Kconfig                     |  1 +
 arch/x86/include/asm/thread_info.h   |  7 ++++--
 arch/x86/kernel/process.c            |  2 ++
 arch/x86/kernel/signal.c             |  3 +++
 include/linux/user-return-notifier.h | 42 ++++++++++++++++++++++++++++++++
 kernel/user-return-notifier.c        | 46 ++++++++++++++++++++++++++++++++++++
 7 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/user-return-notifier.h
 create mode 100644 kernel/user-return-notifier.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..4e312fffbfd7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -83,6 +83,13 @@ config KRETPROBES
 	def_bool y
 	depends on KPROBES && HAVE_KRETPROBES
 
+config USER_RETURN_NOTIFIER
+	bool
+	depends on HAVE_USER_RETURN_NOTIFIER
+	help
+	  Provide a kernel-internal notification when a cpu is about to
+	  switch to user mode.
+
 config HAVE_IOREMAP_PROT
 	bool
 
@@ -126,4 +133,7 @@ config HAVE_DMA_API_DEBUG
 config HAVE_DEFAULT_NO_SPIN_MUTEXES
 	bool
 
+config HAVE_USER_RETURN_NOTIFIER
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8da93745c087..1df175d15aa8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
 	select HAVE_ARCH_KMEMCHECK
+	select HAVE_USER_RETURN_NOTIFIER
 
 config OUTPUT_FORMAT
 	string
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d27d0a2fec4c..375c917c37d2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -142,13 +144,14 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
+	 _TIF_USER_RETURN_NOTIFY)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
 	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
 
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
 #define PREEMPT_ACTIVE		0x10000000
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..e51b056fc88f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
+#include <linux/user-return-notifier.h>
 #include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
@@ -224,6 +225,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 */
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
+	propagate_user_return_notify(prev_p, next_p);
 }
 
 int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..c49f90f7957a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
 
 #include <asm/processor.h>
 #include <asm/ucontext.h>
@@ -872,6 +873,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		if (current->replacement_session_keyring)
 			key_replace_session_keyring();
 	}
+	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+		fire_user_return_notifiers();
 
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h
new file mode 100644
index 000000000000..b6ac056291d7
--- /dev/null
+++ b/include/linux/user-return-notifier.h
@@ -0,0 +1,42 @@
+#ifndef _LINUX_USER_RETURN_NOTIFIER_H
+#define _LINUX_USER_RETURN_NOTIFIER_H
+
+#ifdef CONFIG_USER_RETURN_NOTIFIER
+
+#include <linux/list.h>
+#include <linux/sched.h>
+
+struct user_return_notifier {
+	void (*on_user_return)(struct user_return_notifier *urn);
+	struct hlist_node link;
+};
+
+
+void user_return_notifier_register(struct user_return_notifier *urn);
+void user_return_notifier_unregister(struct user_return_notifier *urn);
+
+static inline void propagate_user_return_notify(struct task_struct *prev,
+						struct task_struct *next)
+{
+	if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
+		clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
+		set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
+	}
+}
+
+void fire_user_return_notifiers(void);
+
+#else
+
+struct user_return_notifier {};
+
+static inline void propagate_user_return_notify(struct task_struct *prev,
+						struct task_struct *next)
+{
+}
+
+static inline void fire_user_return_notifiers(void) {}
+
+#endif
+
+#endif
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
new file mode 100644
index 000000000000..530ccb816513
--- /dev/null
+++ b/kernel/user-return-notifier.c
@@ -0,0 +1,46 @@
+
+#include <linux/user-return-notifier.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
+
+#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
+
+/*
+ * Request a notification when the current cpu returns to userspace.  Must be
+ * called in atomic context.  The notifier will also be called in atomic
+ * context.
+ */
+void user_return_notifier_register(struct user_return_notifier *urn)
+{
+	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+	hlist_add_head(&urn->link, &URN_LIST_HEAD);
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_register);
+
+/*
+ * Removes a registered user return notifier.  Must be called from atomic
+ * context, and from the same cpu registration occured in.
+ */
+void user_return_notifier_unregister(struct user_return_notifier *urn)
+{
+	hlist_del(&urn->link);
+	if (hlist_empty(&URN_LIST_HEAD))
+		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
+
+/* Calls registered user return notifiers */
+void fire_user_return_notifiers(void)
+{
+	struct user_return_notifier *urn;
+	struct hlist_node *tmp1, *tmp2;
+	struct hlist_head *head;
+
+	head = &get_cpu_var(return_notifier_list);
+	hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
+		urn->on_user_return(urn);
+	put_cpu_var();
+}
-- 
cgit v1.2.3


From 1122a26f2abe4245ccdaed95ec23f63fe086b332 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:52:12 +0200
Subject: block: use normal I/O path for discard requests

prepare_discard_fn() was being called in a place where memory allocation
was effectively impossible.  This makes it inappropriate for all but
the most trivial translations of Linux's DISCARD operation to the block
command set.  Additionally adding a payload there makes the ownership
of the bio backing unclear as it's now allocated by the device driver
and not the submitter as usual.

It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether
the queue supports discard operations or not.  blkdev_issue_discard now
allocates a one-page, sector-length payload which is the right thing
for the common ATA and SCSI implementations.

The mtd implementation of prepare_discard_fn() is replaced with simply
checking for the request being a discard.

Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx>
which did the prepare_discard_fn but not the different payload allocation
yet.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-barrier.c         | 35 ++++++++++++++++++++++++++++++-----
 block/blk-core.c            |  3 +--
 block/blk-settings.c        | 17 -----------------
 drivers/mtd/mtd_blkdevs.c   | 19 +++++--------------
 drivers/staging/dst/dcore.c |  2 +-
 include/linux/blkdev.h      |  6 ++----
 6 files changed, 39 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab39cfe9..21f5025c3945 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
+	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & DISCARD_FL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
 	int ret = 0;
 
 	if (!q)
 		return -ENXIO;
 
-	if (!q->prepare_discard_fn)
+	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
-		if (!bio)
-			return -ENOMEM;
+		unsigned int sector_size = q->limits.logical_block_size;
 
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & DISCARD_FL_WAIT)
 			bio->bi_private = &wait;
 
-		bio->bi_sector = sector;
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
 
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
 		if (nr_sects > queue_max_hw_sectors(q)) {
 			bio->bi_size = queue_max_hw_sectors(q) << 9;
 			nr_sects -= queue_max_hw_sectors(q);
@@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_put(bio);
 	}
 	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228e4b29..80a020dd1580 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !q->prepare_discard_fn) {
+		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eaf122ff5f16..d29498ef1eb5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
-/**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:		queue
- * @dfn:	prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-	q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0acbf4f5be50..8ca17a3e96ea 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
 	spinlock_t queue_lock;
 };
 
-static int blktrans_discard_request(struct request_queue *q,
-				    struct request *req)
-{
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_DISCARD;
-	return 0;
-}
-
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	buf = req->buffer;
 
-	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return tr->discard(dev, block, nsect);
-
 	if (!blk_fs_request(req))
 		return -EIO;
 
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
+	if (blk_discard_rq(req))
+		return tr->discard(dev, block, nsect);
+
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
-		blk_queue_set_discard(tr->blkcore_priv->rq,
-				      blktrans_discard_request);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+					tr->blkcore_priv->rq);
 
 	tr->blkshift = ffs(tr->blksize) - 1;
 
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..5e8db0677582 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
 	struct dst_node *n = q->queuedata;
 	int err = -EIO;
 
-	if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
+	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
 		/*
 		 * This is a dirty^Wnice hack, but if we complete this
 		 * operation with -EOPNOTSUPP like intended, XFS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86cae5ac..f62d45e87618 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
-	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -340,7 +338,6 @@ struct request_queue
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
-	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
@@ -460,6 +457,7 @@ struct request_queue
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
+#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -591,6 +589,7 @@ enum {
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
+#define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit v1.2.3


From ca80650cfbde5b17a5fa957a261c7973f84599a7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:54:20 +0200
Subject: block: allow large discard requests

Currently we set the bio size to the byte equivalent of the blocks to
be trimmed when submitting the initial DISCARD ioctl.  That means it
is subject to the max_hw_sectors limitation of the HBA which is
much lower than the size of a DISCARD request we can support.
Add a separate max_discard_sectors tunable to limit the size for discard
requests.

We limit the max discard request size in bytes to 32bit as that is the
limit for bio->bi_size.  This could be much larger if we had a way to pass
that information through the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 10 ++++++----
 block/blk-core.c       |  3 ++-
 block/blk-settings.c   | 13 +++++++++++++
 include/linux/blkdev.h |  3 +++
 4 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 21f5025c3945..8873b9b439ff 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects && !ret) {
 		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio)
@@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * touch many more blocks on disk than the actual payload
 		 * length.
 		 */
-		if (nr_sects > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			nr_sects -= queue_max_hw_sectors(q);
-			sector += queue_max_hw_sectors(q);
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 80a020dd1580..34504f309728 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1436,7 +1436,8 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d29498ef1eb5..e0695bca7027 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
+	lim->max_discard_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -238,6 +239,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
+/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors)
+{
+	q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f62d45e87618..1a03b715dfad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -311,6 +311,7 @@ struct queue_limits {
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
+	unsigned int		max_discard_sectors;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
@@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
-- 
cgit v1.2.3


From 1a35e0f6443f4266dad4c569c55c57a9032596fa Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Thu, 1 Oct 2009 21:16:13 +0200
Subject: Add a tracepoint for block request remapping

Since 2.6.31 now has request-based device-mapper, it's useful to have
a tracepoint for request-remapping as well as bio-remapping.
This patch adds a tracepoint for request-remapping, trace_block_rq_remap().

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c             |  1 +
 include/linux/blktrace_api.h |  2 +-
 include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/blktrace.c      | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 34504f309728..ddaaea4fdffc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 622939a23299..3b73b9992b26 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
-# define blk_trace_remove_sysfs(struct device *dev)	do { } while (0)
+# define blk_trace_remove_sysfs(dev)			do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d86af94691c2..00405b5f624a 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -488,6 +488,39 @@ TRACE_EVENT(block_remap,
 		  (unsigned long long)__entry->old_sector)
 );
 
+TRACE_EVENT(block_rq_remap,
+
+	TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, rq, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= disk_devt(rq->rq_disk);
+		__entry->sector		= blk_rq_pos(rq);
+		__entry->nr_sector	= blk_rq_sectors(rq);
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
 #endif /* _TRACE_BLOCK_H */
 
 /* This part must be outside protection */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 60b5c5a3d4b4..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 			sizeof(r), &r);
 }
 
+/**
+ * blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @dev:	target device
+ * @from:	source sector
+ *
+ * Description:
+ *     Device mapper remaps request to other devices.
+ *     Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_rq_remap(struct request_queue *q,
+				   struct request *rq, dev_t dev,
+				   sector_t from)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+	r.sector_from = cpu_to_be64(from);
+
+	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+			rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+			sizeof(r), &r);
+}
+
 /**
  * blk_add_driver_data - Add binary message with driver-specific data
  * @q:		queue the io is for
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
 	WARN_ON(ret);
 	ret = register_trace_block_remap(blk_add_trace_remap);
 	WARN_ON(ret);
+	ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
+	WARN_ON(ret);
 }
 
 static void blk_unregister_tracepoints(void)
 {
+	unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
 	unregister_trace_block_remap(blk_add_trace_remap);
 	unregister_trace_block_split(blk_add_trace_split);
 	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
-- 
cgit v1.2.3


From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 25 Sep 2009 16:07:19 -0700
Subject: The DRBD driver

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 .../blockdev/drbd/DRBD-8.3-data-packets.svg        |  588 +++
 Documentation/blockdev/drbd/DRBD-data-packets.svg  |  459 ++
 Documentation/blockdev/drbd/README.txt             |   16 +
 Documentation/blockdev/drbd/conn-states-8.dot      |   18 +
 Documentation/blockdev/drbd/disk-states-8.dot      |   16 +
 .../drbd/drbd-connection-state-overview.dot        |   85 +
 Documentation/blockdev/drbd/node-states-8.dot      |   14 +
 MAINTAINERS                                        |   13 +
 drivers/block/Kconfig                              |    2 +
 drivers/block/Makefile                             |    1 +
 drivers/block/drbd/Kconfig                         |   82 +
 drivers/block/drbd/Makefile                        |    8 +
 drivers/block/drbd/drbd_actlog.c                   | 1484 +++++++
 drivers/block/drbd/drbd_bitmap.c                   | 1327 ++++++
 drivers/block/drbd/drbd_int.h                      | 2258 ++++++++++
 drivers/block/drbd/drbd_main.c                     | 3735 ++++++++++++++++
 drivers/block/drbd/drbd_nl.c                       | 2365 +++++++++++
 drivers/block/drbd/drbd_proc.c                     |  266 ++
 drivers/block/drbd/drbd_receiver.c                 | 4456 ++++++++++++++++++++
 drivers/block/drbd/drbd_req.c                      | 1132 +++++
 drivers/block/drbd/drbd_req.h                      |  327 ++
 drivers/block/drbd/drbd_strings.c                  |  113 +
 drivers/block/drbd/drbd_tracing.c                  |  752 ++++
 drivers/block/drbd/drbd_tracing.h                  |   87 +
 drivers/block/drbd/drbd_vli.h                      |  351 ++
 drivers/block/drbd/drbd_worker.c                   | 1529 +++++++
 drivers/block/drbd/drbd_wrappers.h                 |   91 +
 include/linux/drbd.h                               |  349 ++
 include/linux/drbd_limits.h                        |  137 +
 include/linux/drbd_nl.h                            |  137 +
 include/linux/drbd_tag_magic.h                     |   83 +
 include/linux/lru_cache.h                          |  294 ++
 lib/Kconfig                                        |    3 +
 lib/Makefile                                       |    2 +
 lib/lru_cache.c                                    |  560 +++
 35 files changed, 23140 insertions(+)
 create mode 100644 Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
 create mode 100644 Documentation/blockdev/drbd/DRBD-data-packets.svg
 create mode 100644 Documentation/blockdev/drbd/README.txt
 create mode 100644 Documentation/blockdev/drbd/conn-states-8.dot
 create mode 100644 Documentation/blockdev/drbd/disk-states-8.dot
 create mode 100644 Documentation/blockdev/drbd/drbd-connection-state-overview.dot
 create mode 100644 Documentation/blockdev/drbd/node-states-8.dot
 create mode 100644 drivers/block/drbd/Kconfig
 create mode 100644 drivers/block/drbd/Makefile
 create mode 100644 drivers/block/drbd/drbd_actlog.c
 create mode 100644 drivers/block/drbd/drbd_bitmap.c
 create mode 100644 drivers/block/drbd/drbd_int.h
 create mode 100644 drivers/block/drbd/drbd_main.c
 create mode 100644 drivers/block/drbd/drbd_nl.c
 create mode 100644 drivers/block/drbd/drbd_proc.c
 create mode 100644 drivers/block/drbd/drbd_receiver.c
 create mode 100644 drivers/block/drbd/drbd_req.c
 create mode 100644 drivers/block/drbd/drbd_req.h
 create mode 100644 drivers/block/drbd/drbd_strings.c
 create mode 100644 drivers/block/drbd/drbd_tracing.c
 create mode 100644 drivers/block/drbd/drbd_tracing.h
 create mode 100644 drivers/block/drbd/drbd_vli.h
 create mode 100644 drivers/block/drbd/drbd_worker.c
 create mode 100644 drivers/block/drbd/drbd_wrappers.h
 create mode 100644 include/linux/drbd.h
 create mode 100644 include/linux/drbd_limits.h
 create mode 100644 include/linux/drbd_nl.h
 create mode 100644 include/linux/drbd_tag_magic.h
 create mode 100644 include/linux/lru_cache.h
 create mode 100644 lib/lru_cache.c

(limited to 'include/linux')

diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
new file mode 100644
index 000000000000..f87cfa0dc2fb
--- /dev/null
+++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
@@ -0,0 +1,588 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   version="1.0"
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 21000 29700"
+   id="svg2"
+   style="fill-rule:evenodd">
+  <defs
+     id="defs4" />
+  <g
+     id="Default"
+     style="visibility:visible">
+    <desc
+       id="desc180">Master slide</desc>
+  </g>
+  <path
+     d="M 11999,8601 L 11899,8301 L 12099,8301 L 11999,8601 z"
+     id="path193"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,7801 L 11999,8361"
+     id="path197"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 7999,10401 L 7899,10101 L 8099,10101 L 7999,10401 z"
+     id="path209"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,9601 L 7999,10161"
+     id="path213"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,7801 L 11685,7840 L 11724,7644 L 11999,7801 z"
+     id="path225"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,7001 L 11764,7754"
+     id="path229"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-1244.4792,1416.5139)"
+     id="g245"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text247">
+      <tspan
+         x="9139 9368 9579 9808 9986 10075 10252 10481 10659 10837 10909"
+         y="9284"
+         id="tspan249">RSDataReply</tspan>
+    </text>
+  </g>
+  <path
+     d="M 7999,9601 L 8281,9458 L 8311,9655 L 7999,9601 z"
+     id="path259"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,9001 L 8236,9565"
+     id="path263"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,1620.9382,-1639.4947)"
+     id="g279"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text281">
+      <tspan
+         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+         y="7023"
+         id="tspan283">CsumRSRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text297"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="5707"
+       id="tspan299">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text313"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="7806"
+       id="tspan315">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text329"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="8606"
+       id="tspan331">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text345"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+       y="9007"
+       id="tspan347">w_e_end_csum_rs_req()</tspan>
+  </text>
+  <text
+     id="text361"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+       y="9507"
+       id="tspan363">receive_RSDataReply()</tspan>
+  </text>
+  <text
+     id="text377"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+       y="10407"
+       id="tspan379">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text393"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+       y="10907"
+       id="tspan395">e_end_resync_block()</tspan>
+  </text>
+  <path
+     d="M 11999,11601 L 11685,11640 L 11724,11444 L 11999,11601 z"
+     id="path405"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 7999,10801 L 11764,11554"
+     id="path409"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,2434.7562,-1674.649)"
+     id="g425"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text427">
+      <tspan
+         x="9320 9621 9726 9798 9887 10065 10277 10438"
+         y="10943"
+         id="tspan429">WriteAck</tspan>
+    </text>
+  </g>
+  <text
+     id="text443"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+       y="11559"
+       id="tspan445">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text459"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14302 14540 14658 14777 14870 15107 15225 15437 15649 15886"
+       y="4877"
+       id="tspan461">Checksum based Resync, case not in sync</tspan>
+  </text>
+  <text
+     id="text475"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6961 7266 7571 7854 8159 8299 8536 8654 8891 9010 9247 9484 9603 9840 9958 10077 10170 10407"
+       y="2806"
+       id="tspan477">DRBD-8.3 data flow</tspan>
+  </text>
+  <text
+     id="text491"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+       y="7005"
+       id="tspan493">w_e_send_csum()</tspan>
+  </text>
+  <path
+     d="M 11999,17601 L 11899,17301 L 12099,17301 L 11999,17601 z"
+     id="path503"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,16801 L 11999,17361"
+     id="path507"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,16801 L 11685,16840 L 11724,16644 L 11999,16801 z"
+     id="path519"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,16001 L 11764,16754"
+     id="path523"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-2539.5806,1529.3491)"
+     id="g539"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text541">
+      <tspan
+         x="9269 9498 9709 9798 9959 10048 10226 10437 10598 10776"
+         y="18265"
+         id="tspan543">RSIsInSync</tspan>
+    </text>
+  </g>
+  <path
+     d="M 7999,18601 L 8281,18458 L 8311,18655 L 7999,18601 z"
+     id="path553"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 11999,18001 L 8236,18565"
+     id="path557"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,3461.4027,-1449.3012)"
+     id="g573"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text575">
+      <tspan
+         x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+         y="16023"
+         id="tspan577">CsumRSRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text591"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="16806"
+       id="tspan593">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text607"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="17606"
+       id="tspan609">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text623"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+       y="18007"
+       id="tspan625">w_e_end_csum_rs_req()</tspan>
+  </text>
+  <text
+     id="text639"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5735 5913 6091 6180 6357 6446 6607 6696 6874 7085 7246 7424 7585 7691"
+       y="18507"
+       id="tspan641">got_IsInSync()</tspan>
+  </text>
+  <text
+     id="text655"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14159 14396 14514 14726 14937 15175"
+       y="13877"
+       id="tspan657">Checksum based Resync, case in sync</tspan>
+  </text>
+  <path
+     d="M 12000,24601 L 11900,24301 L 12100,24301 L 12000,24601 z"
+     id="path667"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,23801 L 12000,24361"
+     id="path671"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 8000,26401 L 7900,26101 L 8100,26101 L 8000,26401 z"
+     id="path683"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,25601 L 8000,26161"
+     id="path687"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,23801 L 11686,23840 L 11725,23644 L 12000,23801 z"
+     id="path699"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,23001 L 11765,23754"
+     id="path703"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-3543.8452,1630.5143)"
+     id="g719"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text721">
+      <tspan
+         x="9464 9710 9921 10150 10328 10505 10577"
+         y="25236"
+         id="tspan723">OVReply</tspan>
+    </text>
+  </g>
+  <path
+     d="M 8000,25601 L 8282,25458 L 8312,25655 L 8000,25601 z"
+     id="path733"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,25001 L 8237,25565"
+     id="path737"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,4918.2801,-1381.2128)"
+     id="g753"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text755">
+      <tspan
+         x="9142 9388 9599 9828 10006 10183 10361 10539 10700"
+         y="23106"
+         id="tspan757">OVRequest</tspan>
+    </text>
+  </g>
+  <text
+     id="text771"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13656 13868 14097 14274 14452 14630 14808 14969 15058 15163"
+       y="23806"
+       id="tspan773">receive_OVRequest()</tspan>
+  </text>
+  <text
+     id="text787"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+       y="24606"
+       id="tspan789">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text803"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14004 14182 14288 14465 14643 14749"
+       y="25007"
+       id="tspan805">w_e_end_ov_req()</tspan>
+  </text>
+  <text
+     id="text819"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5101 5207 5385 5546 5723 5795 5956 6134 6312 6557 6769 6998 7175 7353 7425 7586 7692"
+       y="25507"
+       id="tspan821">receive_OVReply()</tspan>
+  </text>
+  <text
+     id="text835"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="26407"
+       id="tspan837">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text851"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4902 5131 5308 5486 5664 5842 6020 6197 6375 6553 6714 6892 6998 7175 7353 7425 7586 7692"
+       y="26907"
+       id="tspan853">w_e_end_ov_reply()</tspan>
+  </text>
+  <path
+     d="M 12000,27601 L 11686,27640 L 11725,27444 L 12000,27601 z"
+     id="path863"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 8000,26801 L 11765,27554"
+     id="path867"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <g
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,5704.1907,-1328.312)"
+     id="g883"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <text
+       id="text885">
+      <tspan
+         x="9279 9525 9736 9965 10143 10303 10481 10553"
+         y="26935"
+         id="tspan887">OVResult</tspan>
+    </text>
+  </g>
+  <text
+     id="text901"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12556 12645 12822 13068 13280 13508 13686 13847 14025 14097 14185 14291"
+       y="27559"
+       id="tspan903">got_OVResult()</tspan>
+  </text>
+  <text
+     id="text917"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8330 8567 8660 8754 8991 9228 9346 9558 9795 9935 10028 10146"
+       y="21877"
+       id="tspan919">Online verify</tspan>
+  </text>
+  <text
+     id="text933"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4641 4870 5047 5310 5488 5649 5826 6004 6182 6343 6521 6626 6804 6982 7160 7338 7499 7587 7693"
+       y="23005"
+       id="tspan935">w_make_ov_request()</tspan>
+  </text>
+  <path
+     d="M 8000,6500 L 7900,6200 L 8100,6200 L 8000,6500 z"
+     id="path945"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,5700 L 8000,6260"
+     id="path949"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 3900,5500 L 3700,5500 L 3700,11000 L 3900,11000"
+     id="path961"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,14500 L 3700,14500 L 3700,18600 L 3900,18600"
+     id="path973"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,22800 L 3700,22800 L 3700,26900 L 3900,26900"
+     id="path985"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text1001"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="6506"
+       id="tspan1003">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text1017"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="14708"
+       id="tspan1019">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text1033"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+       y="16006"
+       id="tspan1035">w_e_send_csum()</tspan>
+  </text>
+  <path
+     d="M 8000,15501 L 7900,15201 L 8100,15201 L 8000,15501 z"
+     id="path1045"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,14701 L 8000,15261"
+     id="path1049"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     id="text1065"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+       y="15507"
+       id="tspan1067">drbd_endio_read_sec()</tspan>
+  </text>
+  <path
+     d="M 16100,9000 L 16300,9000 L 16300,7500 L 16100,7500"
+     id="path1077"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,18000 L 16300,18000 L 16300,16500 L 16100,16500"
+     id="path1089"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,25000 L 16300,25000 L 16300,23500 L 16100,23500"
+     id="path1101"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text1117"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+       y="5402"
+       id="tspan1119">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1133"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2027 2133 2294 2472 2649 2827 3005 3077 3255 3432 3504 3682 3788"
+       y="14402"
+       id="tspan1135">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1149"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+       y="22602"
+       id="tspan1151">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1165"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1426 1532 1693 1871 2031 2209 2472 2649 2721 2899 2988 3166 3344 3416 3593 3699"
+       y="11302"
+       id="tspan1167">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1181"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+       y="18931"
+       id="tspan1183">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1197"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+       y="27231"
+       id="tspan1199">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1213"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16126 16232 16393 16571 16748 16926 17104 17176 17354 17531 17603 17781 17887"
+       y="7402"
+       id="tspan1215">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1229"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+       y="16331"
+       id="tspan1231">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1245"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+       y="23302"
+       id="tspan1247">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1261"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="9302"
+       id="tspan1263">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1277"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="18331"
+       id="tspan1279">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1293"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16126 16232 16393 16571 16731 16909 17172 17349 17421 17599 17688 17866 18044 18116 18293 18399"
+       y="25302"
+       id="tspan1295">rs_complete_io()</tspan>
+  </text>
+</svg>
diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg
new file mode 100644
index 000000000000..48a1e2165fec
--- /dev/null
+++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg
@@ -0,0 +1,459 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   version="1.0"
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 21000 29700"
+   id="svg2"
+   style="fill-rule:evenodd">
+  <defs
+     id="defs4" />
+  <g
+     id="Default"
+     style="visibility:visible">
+    <desc
+       id="desc176">Master slide</desc>
+  </g>
+  <path
+     d="M 11999,19601 L 11899,19301 L 12099,19301 L 11999,19601 z"
+     id="path189"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,18801 L 11999,19361"
+     id="path193"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 7999,21401 L 7899,21101 L 8099,21101 L 7999,21401 z"
+     id="path205"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,20601 L 7999,21161"
+     id="path209"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 11999,18801 L 11685,18840 L 11724,18644 L 11999,18801 z"
+     id="path221"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 7999,18001 L 11764,18754"
+     id="path225"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-3023.845"
+     y="1106.8124"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text243"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6115.1553 6344.1553 6555.1553 6784.1553 6962.1553 7051.1553 7228.1553 7457.1553 7635.1553 7813.1553 7885.1553"
+       y="21390.812"
+       id="tspan245">RSDataReply</tspan>
+  </text>
+  <path
+     d="M 7999,20601 L 8281,20458 L 8311,20655 L 7999,20601 z"
+     id="path255"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 11999,20001 L 8236,20565"
+     id="path259"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="3502.5356"
+     y="-2184.6621"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text277"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12321.536 12550.536 12761.536 12990.536 13168.536 13257.536 13434.536 13663.536 13841.536 14019.536 14196.536 14374.536 14535.536"
+       y="15854.338"
+       id="tspan279">RSDataRequest</tspan>
+  </text>
+  <text
+     id="text293"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+       y="17807"
+       id="tspan295">w_make_resync_request()</tspan>
+  </text>
+  <text
+     id="text309"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+       y="18806"
+       id="tspan311">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text325"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+       y="19606"
+       id="tspan327">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text341"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13770 13931 14109 14287 14375 14553 14731 14837 15015 15192 15298"
+       y="20007"
+       id="tspan343">w_e_end_rsdata_req()</tspan>
+  </text>
+  <text
+     id="text357"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+       y="20507"
+       id="tspan359">receive_RSDataReply()</tspan>
+  </text>
+  <text
+     id="text373"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+       y="21407"
+       id="tspan375">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text389"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+       y="21907"
+       id="tspan391">e_end_resync_block()</tspan>
+  </text>
+  <path
+     d="M 11999,22601 L 11685,22640 L 11724,22444 L 11999,22601 z"
+     id="path401"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 7999,21801 L 11764,22554"
+     id="path405"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <text
+     x="4290.3008"
+     y="-2369.6162"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text423"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="13610.301 13911.301 14016.301 14088.301 14177.301 14355.301 14567.301 14728.301"
+       y="19573.385"
+       id="tspan425">WriteAck</tspan>
+  </text>
+  <text
+     id="text439"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+       y="22559"
+       id="tspan441">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text455"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7999 8304 8541 8753 8964 9201 9413 9531 9769 9862 10099 10310 10522 10734 10852 10971 11208 11348 11585 11822"
+       y="16877"
+       id="tspan457">Resync blocks, 4-32K</tspan>
+  </text>
+  <path
+     d="M 12000,7601 L 11900,7301 L 12100,7301 L 12000,7601 z"
+     id="path467"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,6801 L 12000,7361"
+     id="path471"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,6801 L 11686,6840 L 11725,6644 L 12000,6801 z"
+     id="path483"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,6001 L 11765,6754"
+     id="path487"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-1288.1796"
+     y="1279.7666"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text505"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8174.8208 8475.8203 8580.8203 8652.8203 8741.8203 8919.8203 9131.8203 9292.8203"
+       y="9516.7666"
+       id="tspan507">WriteAck</tspan>
+  </text>
+  <path
+     d="M 8000,8601 L 8282,8458 L 8312,8655 L 8000,8601 z"
+     id="path517"
+     style="fill:#000080;visibility:visible" />
+  <path
+     d="M 12000,8001 L 8237,8565"
+     id="path521"
+     style="fill:none;stroke:#000080;visibility:visible" />
+  <text
+     x="1065.6655"
+     y="-2097.7664"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text539"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="10682.666 10911.666 11088.666 11177.666"
+       y="4107.2339"
+       id="tspan541">Data</tspan>
+  </text>
+  <text
+     id="text555"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+       y="5505"
+       id="tspan557">drbd_make_request()</tspan>
+  </text>
+  <text
+     id="text571"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14190"
+       y="6806"
+       id="tspan573">receive_Data()</tspan>
+  </text>
+  <text
+     id="text587"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14207 14312 14384 14473 14651 14829 14990 15168 15328 15434"
+       y="7606"
+       id="tspan589">drbd_endio_write_sec()</tspan>
+  </text>
+  <text
+     id="text603"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12370 12548 12725 12903 13081 13259 13437 13509 13686 13847 14008 14114"
+       y="8007"
+       id="tspan605">e_end_block()</tspan>
+  </text>
+  <text
+     id="text619"
+     style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5647 5825 6003 6092 6269 6481 6553 6731 6892 7052 7264 7425 7586 7692"
+       y="8606"
+       id="tspan621">got_BlockAck()</tspan>
+  </text>
+  <text
+     id="text635"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8305 8542 8779 9016 9109 9346 9486 9604 9956 10049 10189 10328 10565 10705 10942 11179 11298 11603 11742 11835 11954 12191 12310 12428 12665 12902 13139 13279 13516 13753"
+       y="4877"
+       id="tspan637">Regular mirrored write, 512-32K</tspan>
+  </text>
+  <text
+     id="text651"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5381 5610 5787 5948 6126 6304 6482 6659 6837 7015 7087 7265 7426 7587 7692"
+       y="6003"
+       id="tspan653">w_send_dblock()</tspan>
+  </text>
+  <path
+     d="M 8000,6800 L 7900,6500 L 8100,6500 L 8000,6800 z"
+     id="path663"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,6000 L 8000,6560"
+     id="path667"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     id="text683"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4602 4780 4886 5063 5241 5419 5597 5775 5952 6024 6202 6380 6609 6714 6786 6875 7053 7231 7409 7515 7587 7692"
+       y="6905"
+       id="tspan685">drbd_endio_write_pri()</tspan>
+  </text>
+  <path
+     d="M 12000,13602 L 11900,13302 L 12100,13302 L 12000,13602 z"
+     id="path695"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,12802 L 12000,13362"
+     id="path699"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <path
+     d="M 12000,12802 L 11686,12841 L 11725,12645 L 12000,12802 z"
+     id="path711"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 8000,12002 L 11765,12755"
+     id="path715"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="-2155.5266"
+     y="1201.5964"
+     transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+     id="text733"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="7202.4736 7431.4736 7608.4736 7697.4736 7875.4736 8104.4736 8282.4736 8459.4736 8531.4736"
+       y="15454.597"
+       id="tspan735">DataReply</tspan>
+  </text>
+  <path
+     d="M 8000,14602 L 8282,14459 L 8312,14656 L 8000,14602 z"
+     id="path745"
+     style="fill:#008000;visibility:visible" />
+  <path
+     d="M 12000,14002 L 8237,14566"
+     id="path749"
+     style="fill:none;stroke:#008000;visibility:visible" />
+  <text
+     x="2280.3804"
+     y="-2103.2141"
+     transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+     id="text767"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="11316.381 11545.381 11722.381 11811.381 11989.381 12218.381 12396.381 12573.381 12751.381 12929.381 13090.381"
+       y="9981.7861"
+       id="tspan769">DataRequest</tspan>
+  </text>
+  <text
+     id="text783"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+       y="11506"
+       id="tspan785">drbd_make_request()</tspan>
+  </text>
+  <text
+     id="text799"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14312 14490 14668 14846 15024 15185 15273 15379"
+       y="12807"
+       id="tspan801">receive_DataRequest()</tspan>
+  </text>
+  <text
+     id="text815"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+       y="13607"
+       id="tspan817">drbd_endio_read_sec()</tspan>
+  </text>
+  <text
+     id="text831"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14021 14110 14288 14465 14571 14749 14927 15033"
+       y="14008"
+       id="tspan833">w_e_end_data_req()</tspan>
+  </text>
+  <g
+     id="g835"
+     style="visibility:visible">
+    <desc
+       id="desc837">Drawing</desc>
+    <text
+       id="text847"
+       style="font-size:318px;font-weight:400;fill:#008000;font-family:Helvetica embedded">
+      <tspan
+         x="4885 4991 5169 5330 5507 5579 5740 5918 6096 6324 6502 6591 6769 6997 7175 7353 7425 7586 7692"
+         y="14607"
+         id="tspan849">receive_DataReply()</tspan>
+    </text>
+  </g>
+  <text
+     id="text863"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="8000 8305 8398 8610 8821 8914 9151 9363 9575 9693 9833 10070 10307 10544 10663 10781 11018 11255 11493 11632 11869 12106"
+       y="10878"
+       id="tspan865">Diskless read, 512-32K</tspan>
+  </text>
+  <text
+     id="text879"
+     style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="5029 5258 5435 5596 5774 5952 6130 6307 6413 6591 6769 6947 7125 7230 7408 7586 7692"
+       y="12004"
+       id="tspan881">w_send_read_req()</tspan>
+  </text>
+  <text
+     id="text895"
+     style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="6961 7266 7571 7854 8159 8278 8515 8633 8870 9107 9226 9463 9581 9700 9793 10030"
+       y="2806"
+       id="tspan897">DRBD 8 data flow</tspan>
+  </text>
+  <path
+     d="M 3900,5300 L 3700,5300 L 3700,7000 L 3900,7000"
+     id="path907"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 3900,17600 L 3700,17600 L 3700,22000 L 3900,22000"
+     id="path919"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <path
+     d="M 16100,20000 L 16300,20000 L 16300,18500 L 16100,18500"
+     id="path931"
+     style="fill:none;stroke:#000000;visibility:visible" />
+  <text
+     id="text947"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2126 2304 2376 2554 2731 2909 3087 3159 3337 3515 3587 3764 3870"
+       y="5202"
+       id="tspan949">al_begin_io()</tspan>
+  </text>
+  <text
+     id="text963"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1632 1810 1882 2060 2220 2398 2661 2839 2910 3088 3177 3355 3533 3605 3783 3888"
+       y="7331"
+       id="tspan965">al_complete_io()</tspan>
+  </text>
+  <text
+     id="text979"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="2126 2232 2393 2571 2748 2926 3104 3176 3354 3531 3603 3781 3887"
+       y="17431"
+       id="tspan981">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text995"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="1626 1732 1893 2071 2231 2409 2672 2849 2921 3099 3188 3366 3544 3616 3793 3899"
+       y="22331"
+       id="tspan997">rs_complete_io()</tspan>
+  </text>
+  <text
+     id="text1011"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16027 16133 16294 16472 16649 16827 17005 17077 17255 17432 17504 17682 17788"
+       y="18402"
+       id="tspan1013">rs_begin_io()</tspan>
+  </text>
+  <text
+     id="text1027"
+     style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+    <tspan
+       x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+       y="20331"
+       id="tspan1029">rs_complete_io()</tspan>
+  </text>
+</svg>
diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt
new file mode 100644
index 000000000000..627b0a1bf35e
--- /dev/null
+++ b/Documentation/blockdev/drbd/README.txt
@@ -0,0 +1,16 @@
+Description
+
+  DRBD is a shared-nothing, synchronously replicated block device. It
+  is designed to serve as a building block for high availability
+  clusters and in this context, is a "drop-in" replacement for shared
+  storage. Simplistically, you could see it as a network RAID 1.
+
+  Please visit http://www.drbd.org to find out more.
+
+The here included files are intended to help understand the implementation
+
+DRBD-8.3-data-packets.svg, DRBD-data-packets.svg  
+  relates some functions, and write packets.
+
+conn-states-8.dot, disk-states-8.dot, node-states-8.dot
+  The sub graphs of DRBD's state transitions
diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot
new file mode 100644
index 000000000000..025e8cf5e64a
--- /dev/null
+++ b/Documentation/blockdev/drbd/conn-states-8.dot
@@ -0,0 +1,18 @@
+digraph conn_states {
+	StandAllone  -> WFConnection   [ label = "ioctl_set_net()" ]
+	WFConnection -> Unconnected    [ label = "unable to bind()" ]
+	WFConnection -> WFReportParams [ label = "in connect() after accept" ]
+	WFReportParams -> StandAllone  [ label = "checks in receive_param()" ]
+	WFReportParams -> Connected    [ label = "in receive_param()" ]
+	WFReportParams -> WFBitMapS    [ label = "sync_handshake()" ]
+	WFReportParams -> WFBitMapT    [ label = "sync_handshake()" ]
+	WFBitMapS -> SyncSource        [ label = "receive_bitmap()" ]
+	WFBitMapT -> SyncTarget        [ label = "receive_bitmap()" ]
+	SyncSource -> Connected
+	SyncTarget -> Connected
+	SyncSource -> PausedSyncS
+	SyncTarget -> PausedSyncT
+	PausedSyncS -> SyncSource
+	PausedSyncT -> SyncTarget
+	Connected   -> WFConnection    [ label = "* on network error" ]
+}
diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot
new file mode 100644
index 000000000000..d06cfb46fb98
--- /dev/null
+++ b/Documentation/blockdev/drbd/disk-states-8.dot
@@ -0,0 +1,16 @@
+digraph disk_states {
+	Diskless -> Inconsistent       [ label = "ioctl_set_disk()" ]
+	Diskless -> Consistent         [ label = "ioctl_set_disk()" ]
+	Diskless -> Outdated           [ label = "ioctl_set_disk()" ]
+	Consistent -> Outdated         [ label = "receive_param()" ]
+	Consistent -> UpToDate         [ label = "receive_param()" ]
+	Consistent -> Inconsistent     [ label = "start resync" ]
+	Outdated   -> Inconsistent     [ label = "start resync" ]
+	UpToDate   -> Inconsistent     [ label = "ioctl_replicate" ]
+	Inconsistent -> UpToDate       [ label = "resync completed" ]
+	Consistent -> Failed           [ label = "io completion error" ]
+	Outdated   -> Failed           [ label = "io completion error" ]
+	UpToDate   -> Failed           [ label = "io completion error" ]
+	Inconsistent -> Failed         [ label = "io completion error" ]
+	Failed -> Diskless             [ label = "sending notify to peer" ]
+}
diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
new file mode 100644
index 000000000000..6d9cf0a7b11d
--- /dev/null
+++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
@@ -0,0 +1,85 @@
+// vim: set sw=2 sts=2 :
+digraph {
+  rankdir=BT
+  bgcolor=white
+
+  node [shape=plaintext]
+  node [fontcolor=black]
+
+  StandAlone     [ style=filled,fillcolor=gray,label=StandAlone ]
+
+  node [fontcolor=lightgray]
+
+  Unconnected    [ label=Unconnected ]
+
+  CommTrouble [ shape=record,
+    label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ]
+
+  node [fontcolor=gray]
+
+  subgraph cluster_try_connect {
+    label="try to connect, handshake"
+    rank=max
+    WFConnection   [ label=WFConnection ]
+    WFReportParams [ label=WFReportParams ]
+  }
+
+  TearDown       [ label=TearDown ]
+
+  Connected      [ label=Connected,style=filled,fillcolor=green,fontcolor=black ]
+
+  node [fontcolor=lightblue]
+
+  StartingSyncS  [ label=StartingSyncS ]
+  StartingSyncT  [ label=StartingSyncT ]
+
+  subgraph cluster_bitmap_exchange {
+    node [fontcolor=red]
+    fontcolor=red
+    label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged"
+
+    WFBitMapT      [ label=WFBitMapT ]
+    WFSyncUUID     [ label=WFSyncUUID ]
+    WFBitMapS      [ label=WFBitMapS ]
+  }
+
+  node [fontcolor=blue]
+
+  cluster_resync [ shape=record,label="{<any>resynchronisation process running\l'concurrent' application requests allowed|{{<T>PausedSyncT\nSyncTarget}|{<S>PausedSyncS\nSyncSource}}}" ]
+
+  node [shape=box,fontcolor=black]
+
+  // drbdadm [label="drbdadm connect"]
+  // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."]
+  // comm_error [label="communication trouble"]
+
+  //
+  // edges
+  // --------------------------------------
+
+  StandAlone -> Unconnected [ label="drbdadm connect" ]
+  Unconnected -> StandAlone  [ label="drbdadm disconnect\lor serious communication trouble" ]
+  Unconnected -> WFConnection [ label="receiver thread is started" ]
+  WFConnection -> WFReportParams [ headlabel="accept()\land/or                        \lconnect()\l" ]
+
+  WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ]
+  WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ]
+
+    WFReportParams -> WFBitMapS
+    WFReportParams -> WFBitMapT
+    WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false]
+
+      WFBitMapS -> cluster_resync:S
+      WFSyncUUID -> cluster_resync:T
+
+  edge [color=green]
+  cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ]
+
+  edge [color=red]
+  WFReportParams -> CommTrouble
+  Connected -> CommTrouble
+  cluster_resync:any -> CommTrouble
+  edge [color=black]
+  CommTrouble -> Unconnected [label="receiver thread is stopped" ]
+
+}
diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot
new file mode 100644
index 000000000000..4a2b00c23547
--- /dev/null
+++ b/Documentation/blockdev/drbd/node-states-8.dot
@@ -0,0 +1,14 @@
+digraph node_states {
+	Secondary -> Primary           [ label = "ioctl_set_state()" ]
+	Primary   -> Secondary 	       [ label = "ioctl_set_state()" ]
+}
+
+digraph peer_states {
+	Secondary -> Primary           [ label = "recv state packet" ]
+	Primary   -> Secondary 	       [ label = "recv state packet" ]
+	Primary   -> Unknown 	       [ label = "connection lost" ]
+	Secondary  -> Unknown  	       [ label = "connection lost" ]
+	Unknown   -> Primary           [ label = "connected" ]
+	Unknown   -> Secondary         [ label = "connected" ]
+}
+
diff --git a/MAINTAINERS b/MAINTAINERS
index c450f3abb8c9..ea56bd7a6cba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1758,6 +1758,19 @@ S:	Maintained
 F:	drivers/scsi/dpt*
 F:	drivers/scsi/dpt/
 
+DRBD DRIVER
+P:     Philipp Reisner
+P:     Lars Ellenberg
+M:     drbd-dev@lists.linbit.com
+L:     drbd-user@lists.linbit.com
+W:     http://www.drbd.org
+T:     git git://git.drbd.org/linux-2.6-drbd.git drbd
+T:     git git://git.drbd.org/drbd-8.3.git
+S:     Supported
+F:     drivers/block/drbd/
+F:     lib/lru_cache.c
+F:     Documentation/blockdev/drbd/
+
 DRIVER CORE, KOBJECTS, AND SYSFS
 M:	Greg Kroah-Hartman <gregkh@suse.de>
 T:	quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1d886e079c58..77bfce52e9ca 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -271,6 +271,8 @@ config BLK_DEV_CRYPTOLOOP
 	  instead, which can be configured to be on-disk compatible with the
 	  cryptoloop device.
 
+source "drivers/block/drbd/Kconfig"
+
 config BLK_DEV_NBD
 	tristate "Network block device support"
 	depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index cdaa3f8fddf0..aff5ac925c34 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_BLK_DEV_UB)	+= ub.o
 obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
 
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
 
 swim_mod-objs	:= swim.o swim_asm.o
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
new file mode 100644
index 000000000000..4e6f90f487c2
--- /dev/null
+++ b/drivers/block/drbd/Kconfig
@@ -0,0 +1,82 @@
+#
+# DRBD device driver configuration
+#
+
+comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
+	depends on !PROC_FS || !INET || !CONNECTOR
+
+config BLK_DEV_DRBD
+	tristate "DRBD Distributed Replicated Block Device support"
+	depends on PROC_FS && INET && CONNECTOR
+	select LRU_CACHE
+	default n
+	help
+
+	  NOTE: In order to authenticate connections you have to select
+	  CRYPTO_HMAC and a hash function as well.
+
+	  DRBD is a shared-nothing, synchronously replicated block device. It
+	  is designed to serve as a building block for high availability
+	  clusters and in this context, is a "drop-in" replacement for shared
+	  storage. Simplistically, you could see it as a network RAID 1.
+
+	  Each minor device has a role, which can be 'primary' or 'secondary'.
+	  On the node with the primary device the application is supposed to
+	  run and to access the device (/dev/drbdX). Every write is sent to
+	  the local 'lower level block device' and, across the network, to the
+	  node with the device in 'secondary' state.  The secondary device
+	  simply writes the data to its lower level block device.
+
+	  DRBD can also be used in dual-Primary mode (device writable on both
+	  nodes), which means it can exhibit shared disk semantics in a
+	  shared-nothing cluster.  Needless to say, on top of dual-Primary
+	  DRBD utilizing a cluster file system is necessary to maintain for
+	  cache coherency.
+
+	  For automatic failover you need a cluster manager (e.g. heartbeat).
+	  See also: http://www.drbd.org/, http://www.linux-ha.org
+
+	  If unsure, say N.
+
+config DRBD_TRACE
+	tristate "DRBD tracing"
+	depends on BLK_DEV_DRBD
+	select TRACEPOINTS
+	default n
+	help
+
+	  Say Y here if you want to be able to trace various events in DRBD.
+
+	  If unsure, say N.
+
+config DRBD_FAULT_INJECTION
+	bool "DRBD fault injection"
+	depends on BLK_DEV_DRBD
+	help
+
+	  Say Y here if you want to simulate IO errors, in order to test DRBD's
+	  behavior.
+
+	  The actual simulation of IO errors is done by writing 3 values to
+	  /sys/module/drbd/parameters/
+
+	  enable_faults: bitmask of...
+	  1	meta data write
+	  2               read
+	  4	resync data write
+	  8	            read
+	  16	data write
+	  32	data read
+	  64	read ahead
+	  128	kmalloc of bitmap
+	  256	allocation of EE (epoch_entries)
+
+	  fault_devs: bitmask of minor numbers
+	  fault_rate: frequency in percent
+
+	  Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
+		echo 16 > /sys/module/drbd/parameters/enable_faults
+		echo 1 > /sys/module/drbd/parameters/fault_devs
+		echo 5 > /sys/module/drbd/parameters/fault_rate
+
+	  If unsure, say N.
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
new file mode 100644
index 000000000000..7d86ef8a8b40
--- /dev/null
+++ b/drivers/block/drbd/Makefile
@@ -0,0 +1,8 @@
+drbd-y := drbd_bitmap.o drbd_proc.o
+drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
+drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
+
+drbd_trace-y := drbd_tracing.o
+
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
+obj-$(CONFIG_DRBD_TRACE)       += drbd_trace.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
new file mode 100644
index 000000000000..74b4835d3107
--- /dev/null
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -0,0 +1,1484 @@
+/*
+   drbd_actlog.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include "drbd_wrappers.h"
+
+/* We maintain a trivial check sum in our on disk activity log.
+ * With that we can ensure correct operation even when the storage
+ * device might do a partial (last) sector write while loosing power.
+ */
+struct __packed al_transaction {
+	u32       magic;
+	u32       tr_number;
+	struct __packed {
+		u32 pos;
+		u32 extent; } updates[1 + AL_EXTENTS_PT];
+	u32       xor_sum;
+};
+
+struct update_odbm_work {
+	struct drbd_work w;
+	unsigned int enr;
+};
+
+struct update_al_work {
+	struct drbd_work w;
+	struct lc_element *al_ext;
+	struct completion event;
+	unsigned int enr;
+	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
+	unsigned int old_enr;
+};
+
+struct drbd_atodb_wait {
+	atomic_t           count;
+	struct completion  io_done;
+	struct drbd_conf   *mdev;
+	int                error;
+};
+
+
+int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
+
+/* The actual tracepoint needs to have constant number of known arguments...
+ */
+void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	trace__drbd_resync(mdev, level, fmt, ap);
+	va_end(ap);
+}
+
+static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
+				 struct drbd_backing_dev *bdev,
+				 struct page *page, sector_t sector,
+				 int rw, int size)
+{
+	struct bio *bio;
+	struct drbd_md_io md_io;
+	int ok;
+
+	md_io.mdev = mdev;
+	init_completion(&md_io.event);
+	md_io.error = 0;
+
+	if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
+		rw |= (1 << BIO_RW_BARRIER);
+	rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO));
+
+ retry:
+	bio = bio_alloc(GFP_NOIO, 1);
+	bio->bi_bdev = bdev->md_bdev;
+	bio->bi_sector = sector;
+	ok = (bio_add_page(bio, page, size, 0) == size);
+	if (!ok)
+		goto out;
+	bio->bi_private = &md_io;
+	bio->bi_end_io = drbd_md_io_complete;
+	bio->bi_rw = rw;
+
+	trace_drbd_bio(mdev, "Md", bio, 0, NULL);
+
+	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+		bio_endio(bio, -EIO);
+	else
+		submit_bio(rw, bio);
+	wait_for_completion(&md_io.event);
+	ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
+
+	/* check for unsupported barrier op.
+	 * would rather check on EOPNOTSUPP, but that is not reliable.
+	 * don't try again for ANY return value != 0 */
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
+		/* Try again with no barrier */
+		dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
+		set_bit(MD_NO_BARRIER, &mdev->flags);
+		rw &= ~(1 << BIO_RW_BARRIER);
+		bio_put(bio);
+		goto retry;
+	}
+ out:
+	bio_put(bio);
+	return ok;
+}
+
+int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+			 sector_t sector, int rw)
+{
+	int logical_block_size, mask, ok;
+	int offset = 0;
+	struct page *iop = mdev->md_io_page;
+
+	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+
+	BUG_ON(!bdev->md_bdev);
+
+	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
+	if (logical_block_size == 0)
+		logical_block_size = MD_SECTOR_SIZE;
+
+	/* in case logical_block_size != 512 [ s390 only? ] */
+	if (logical_block_size != MD_SECTOR_SIZE) {
+		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
+		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
+		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
+		offset = sector & mask;
+		sector = sector & ~mask;
+		iop = mdev->md_io_tmpp;
+
+		if (rw & WRITE) {
+			/* these are GFP_KERNEL pages, pre-allocated
+			 * on device initialization */
+			void *p = page_address(mdev->md_io_page);
+			void *hp = page_address(mdev->md_io_tmpp);
+
+			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
+					READ, logical_block_size);
+
+			if (unlikely(!ok)) {
+				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
+				    "READ [logical_block_size!=512]) failed!\n",
+				    (unsigned long long)sector);
+				return 0;
+			}
+
+			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
+		}
+	}
+
+	if (sector < drbd_md_first_sector(bdev) ||
+	    sector > drbd_md_last_sector(bdev))
+		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
+		     current->comm, current->pid, __func__,
+		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+
+	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
+	if (unlikely(!ok)) {
+		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
+		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+		return 0;
+	}
+
+	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
+		void *p = page_address(mdev->md_io_page);
+		void *hp = page_address(mdev->md_io_tmpp);
+
+		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
+	}
+
+	return ok;
+}
+
+static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *al_ext;
+	struct lc_element *tmp;
+	unsigned long     al_flags = 0;
+
+	spin_lock_irq(&mdev->al_lock);
+	tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
+	if (unlikely(tmp != NULL)) {
+		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+		if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			spin_unlock_irq(&mdev->al_lock);
+			return NULL;
+		}
+	}
+	al_ext   = lc_get(mdev->act_log, enr);
+	al_flags = mdev->act_log->flags;
+	spin_unlock_irq(&mdev->al_lock);
+
+	/*
+	if (!al_ext) {
+		if (al_flags & LC_STARVING)
+			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
+		if (al_flags & LC_DIRTY)
+			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
+	}
+	*/
+
+	return al_ext;
+}
+
+void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	struct lc_element *al_ext;
+	struct update_al_work al_work;
+
+	D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
+
+	trace_drbd_actlog(mdev, sector, "al_begin_io");
+
+	wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
+
+	if (al_ext->lc_number != enr) {
+		/* drbd_al_write_transaction(mdev,al_ext,enr);
+		 * recurses into generic_make_request(), which
+		 * disallows recursion, bios being serialized on the
+		 * current->bio_tail list now.
+		 * we have to delegate updates to the activity log
+		 * to the worker thread. */
+		init_completion(&al_work.event);
+		al_work.al_ext = al_ext;
+		al_work.enr = enr;
+		al_work.old_enr = al_ext->lc_number;
+		al_work.w.cb = w_al_write_transaction;
+		drbd_queue_work_front(&mdev->data.work, &al_work.w);
+		wait_for_completion(&al_work.event);
+
+		mdev->al_writ_cnt++;
+
+		spin_lock_irq(&mdev->al_lock);
+		lc_changed(mdev->act_log, al_ext);
+		spin_unlock_irq(&mdev->al_lock);
+		wake_up(&mdev->al_wait);
+	}
+}
+
+void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	struct lc_element *extent;
+	unsigned long flags;
+
+	trace_drbd_actlog(mdev, sector, "al_complete_io");
+
+	spin_lock_irqsave(&mdev->al_lock, flags);
+
+	extent = lc_find(mdev->act_log, enr);
+
+	if (!extent) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
+		return;
+	}
+
+	if (lc_put(mdev->act_log, extent) == 0)
+		wake_up(&mdev->al_wait);
+
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+int
+w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct update_al_work *aw = container_of(w, struct update_al_work, w);
+	struct lc_element *updated = aw->al_ext;
+	const unsigned int new_enr = aw->enr;
+	const unsigned int evicted = aw->old_enr;
+	struct al_transaction *buffer;
+	sector_t sector;
+	int i, n, mx;
+	unsigned int extent_nr;
+	u32 xor_sum = 0;
+
+	if (!get_ldev(mdev)) {
+		dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n");
+		complete(&((struct update_al_work *)w)->event);
+		return 1;
+	}
+	/* do we have to do a bitmap write, first?
+	 * TODO reduce maximum latency:
+	 * submit both bios, then wait for both,
+	 * instead of doing two synchronous sector writes. */
+	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
+		drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
+
+	mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */
+	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
+
+	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
+
+	n = lc_index_of(mdev->act_log, updated);
+
+	buffer->updates[0].pos = cpu_to_be32(n);
+	buffer->updates[0].extent = cpu_to_be32(new_enr);
+
+	xor_sum ^= new_enr;
+
+	mx = min_t(int, AL_EXTENTS_PT,
+		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
+	for (i = 0; i < mx; i++) {
+		unsigned idx = mdev->al_tr_cycle + i;
+		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
+		buffer->updates[i+1].pos = cpu_to_be32(idx);
+		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
+		xor_sum ^= extent_nr;
+	}
+	for (; i < AL_EXTENTS_PT; i++) {
+		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
+		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
+		xor_sum ^= LC_FREE;
+	}
+	mdev->al_tr_cycle += AL_EXTENTS_PT;
+	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
+		mdev->al_tr_cycle = 0;
+
+	buffer->xor_sum = cpu_to_be32(xor_sum);
+
+	sector =  mdev->ldev->md.md_offset
+		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
+
+	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
+		drbd_chk_io_error(mdev, 1, TRUE);
+
+	if (++mdev->al_tr_pos >
+	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+		mdev->al_tr_pos = 0;
+
+	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
+	mdev->al_tr_number++;
+
+	mutex_unlock(&mdev->md_io_mutex);
+
+	complete(&((struct update_al_work *)w)->event);
+	put_ldev(mdev);
+
+	return 1;
+}
+
+/**
+ * drbd_al_read_tr() - Read a single transaction from the on disk activity log
+ * @mdev:	DRBD device.
+ * @bdev:	Block device to read form.
+ * @b:		pointer to an al_transaction.
+ * @index:	On disk slot of the transaction to read.
+ *
+ * Returns -1 on IO error, 0 on checksum error and 1 upon success.
+ */
+static int drbd_al_read_tr(struct drbd_conf *mdev,
+			   struct drbd_backing_dev *bdev,
+			   struct al_transaction *b,
+			   int index)
+{
+	sector_t sector;
+	int rv, i;
+	u32 xor_sum = 0;
+
+	sector = bdev->md.md_offset + bdev->md.al_offset + index;
+
+	/* Dont process error normally,
+	 * as this is done before disk is attached! */
+	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
+		return -1;
+
+	rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
+
+	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
+		xor_sum ^= be32_to_cpu(b->updates[i].extent);
+	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
+
+	return rv;
+}
+
+/**
+ * drbd_al_read_log() - Restores the activity log from its on disk representation.
+ * @mdev:	DRBD device.
+ * @bdev:	Block device to read form.
+ *
+ * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
+ */
+int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	struct al_transaction *buffer;
+	int i;
+	int rv;
+	int mx;
+	int active_extents = 0;
+	int transactions = 0;
+	int found_valid = 0;
+	int from = 0;
+	int to = 0;
+	u32 from_tnr = 0;
+	u32 to_tnr = 0;
+	u32 cnr;
+
+	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+
+	/* lock out all other meta data io for now,
+	 * and make sure the page is mapped.
+	 */
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = page_address(mdev->md_io_page);
+
+	/* Find the valid transaction in the log */
+	for (i = 0; i <= mx; i++) {
+		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		if (rv == 0)
+			continue;
+		if (rv == -1) {
+			mutex_unlock(&mdev->md_io_mutex);
+			return 0;
+		}
+		cnr = be32_to_cpu(buffer->tr_number);
+
+		if (++found_valid == 1) {
+			from = i;
+			to = i;
+			from_tnr = cnr;
+			to_tnr = cnr;
+			continue;
+		}
+		if ((int)cnr - (int)from_tnr < 0) {
+			D_ASSERT(from_tnr - cnr + i - from == mx+1);
+			from = i;
+			from_tnr = cnr;
+		}
+		if ((int)cnr - (int)to_tnr > 0) {
+			D_ASSERT(cnr - to_tnr == i - to);
+			to = i;
+			to_tnr = cnr;
+		}
+	}
+
+	if (!found_valid) {
+		dev_warn(DEV, "No usable activity log found.\n");
+		mutex_unlock(&mdev->md_io_mutex);
+		return 1;
+	}
+
+	/* Read the valid transactions.
+	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
+	i = from;
+	while (1) {
+		int j, pos;
+		unsigned int extent_nr;
+		unsigned int trn;
+
+		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		ERR_IF(rv == 0) goto cancel;
+		if (rv == -1) {
+			mutex_unlock(&mdev->md_io_mutex);
+			return 0;
+		}
+
+		trn = be32_to_cpu(buffer->tr_number);
+
+		spin_lock_irq(&mdev->al_lock);
+
+		/* This loop runs backwards because in the cyclic
+		   elements there might be an old version of the
+		   updated element (in slot 0). So the element in slot 0
+		   can overwrite old versions. */
+		for (j = AL_EXTENTS_PT; j >= 0; j--) {
+			pos = be32_to_cpu(buffer->updates[j].pos);
+			extent_nr = be32_to_cpu(buffer->updates[j].extent);
+
+			if (extent_nr == LC_FREE)
+				continue;
+
+			lc_set(mdev->act_log, extent_nr, pos);
+			active_extents++;
+		}
+		spin_unlock_irq(&mdev->al_lock);
+
+		transactions++;
+
+cancel:
+		if (i == to)
+			break;
+		i++;
+		if (i > mx)
+			i = 0;
+	}
+
+	mdev->al_tr_number = to_tnr+1;
+	mdev->al_tr_pos = to;
+	if (++mdev->al_tr_pos >
+	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+		mdev->al_tr_pos = 0;
+
+	/* ok, we are done with it */
+	mutex_unlock(&mdev->md_io_mutex);
+
+	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
+	     transactions, active_extents);
+
+	return 1;
+}
+
+static void atodb_endio(struct bio *bio, int error)
+{
+	struct drbd_atodb_wait *wc = bio->bi_private;
+	struct drbd_conf *mdev = wc->mdev;
+	struct page *page;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+	/* strange behavior of some lower level drivers...
+	 * fail the request by clearing the uptodate flag,
+	 * but do not return any error?! */
+	if (!error && !uptodate)
+		error = -EIO;
+
+	drbd_chk_io_error(mdev, error, TRUE);
+	if (error && wc->error == 0)
+		wc->error = error;
+
+	if (atomic_dec_and_test(&wc->count))
+		complete(&wc->io_done);
+
+	page = bio->bi_io_vec[0].bv_page;
+	put_page(page);
+	bio_put(bio);
+	mdev->bm_writ_cnt++;
+	put_ldev(mdev);
+}
+
+#define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+/* activity log to on disk bitmap -- prepare bio unless that sector
+ * is already covered by previously prepared bios */
+static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
+					struct bio **bios,
+					unsigned int enr,
+					struct drbd_atodb_wait *wc) __must_hold(local)
+{
+	struct bio *bio;
+	struct page *page;
+	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+				      + mdev->ldev->md.bm_offset;
+	unsigned int page_offset = PAGE_SIZE;
+	int offset;
+	int i = 0;
+	int err = -ENOMEM;
+
+	/* Check if that enr is already covered by an already created bio.
+	 * Caution, bios[] is not NULL terminated,
+	 * but only initialized to all NULL.
+	 * For completely scattered activity log,
+	 * the last invocation iterates over all bios,
+	 * and finds the last NULL entry.
+	 */
+	while ((bio = bios[i])) {
+		if (bio->bi_sector == on_disk_sector)
+			return 0;
+		i++;
+	}
+	/* bios[i] == NULL, the next not yet used slot */
+
+	/* GFP_KERNEL, we are not in the write-out path */
+	bio = bio_alloc(GFP_KERNEL, 1);
+	if (bio == NULL)
+		return -ENOMEM;
+
+	if (i > 0) {
+		const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
+		page_offset = prev_bv->bv_offset + prev_bv->bv_len;
+		page = prev_bv->bv_page;
+	}
+	if (page_offset == PAGE_SIZE) {
+		page = alloc_page(__GFP_HIGHMEM);
+		if (page == NULL)
+			goto out_bio_put;
+		page_offset = 0;
+	} else {
+		get_page(page);
+	}
+
+	offset = S2W(enr);
+	drbd_bm_get_lel(mdev, offset,
+			min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
+			kmap(page) + page_offset);
+	kunmap(page);
+
+	bio->bi_private = wc;
+	bio->bi_end_io = atodb_endio;
+	bio->bi_bdev = mdev->ldev->md_bdev;
+	bio->bi_sector = on_disk_sector;
+
+	if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
+		goto out_put_page;
+
+	atomic_inc(&wc->count);
+	/* we already know that we may do this...
+	 * get_ldev_if_state(mdev,D_ATTACHING);
+	 * just get the extra reference, so that the local_cnt reflects
+	 * the number of pending IO requests DRBD at its backing device.
+	 */
+	atomic_inc(&mdev->local_cnt);
+
+	bios[i] = bio;
+
+	return 0;
+
+out_put_page:
+	err = -EINVAL;
+	put_page(page);
+out_bio_put:
+	bio_put(bio);
+	return err;
+}
+
+/**
+ * drbd_al_to_on_disk_bm() -  * Writes bitmap parts covered by active AL extents
+ * @mdev:	DRBD device.
+ *
+ * Called when we detach (unconfigure) local storage,
+ * or when we go from R_PRIMARY to R_SECONDARY role.
+ */
+void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
+{
+	int i, nr_elements;
+	unsigned int enr;
+	struct bio **bios;
+	struct drbd_atodb_wait wc;
+
+	ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
+		return; /* sorry, I don't have any act_log etc... */
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	nr_elements = mdev->act_log->nr_elements;
+
+	/* GFP_KERNEL, we are not in anyone's write-out path */
+	bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
+	if (!bios)
+		goto submit_one_by_one;
+
+	atomic_set(&wc.count, 0);
+	init_completion(&wc.io_done);
+	wc.mdev = mdev;
+	wc.error = 0;
+
+	for (i = 0; i < nr_elements; i++) {
+		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+		if (enr == LC_FREE)
+			continue;
+		/* next statement also does atomic_inc wc.count and local_cnt */
+		if (atodb_prepare_unless_covered(mdev, bios,
+						enr/AL_EXT_PER_BM_SECT,
+						&wc))
+			goto free_bios_submit_one_by_one;
+	}
+
+	/* unnecessary optimization? */
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	/* all prepared, submit them */
+	for (i = 0; i < nr_elements; i++) {
+		if (bios[i] == NULL)
+			break;
+		if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
+			bios[i]->bi_rw = WRITE;
+			bio_endio(bios[i], -EIO);
+		} else {
+			submit_bio(WRITE, bios[i]);
+		}
+	}
+
+	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
+
+	/* always (try to) flush bitmap to stable storage */
+	drbd_md_flush(mdev);
+
+	/* In case we did not submit a single IO do not wait for
+	 * them to complete. ( Because we would wait forever here. )
+	 *
+	 * In case we had IOs and they are already complete, there
+	 * is not point in waiting anyways.
+	 * Therefore this if () ... */
+	if (atomic_read(&wc.count))
+		wait_for_completion(&wc.io_done);
+
+	put_ldev(mdev);
+
+	kfree(bios);
+	return;
+
+ free_bios_submit_one_by_one:
+	/* free everything by calling the endio callback directly. */
+	for (i = 0; i < nr_elements && bios[i]; i++)
+		bio_endio(bios[i], 0);
+
+	kfree(bios);
+
+ submit_one_by_one:
+	dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
+
+	for (i = 0; i < mdev->act_log->nr_elements; i++) {
+		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+		if (enr == LC_FREE)
+			continue;
+		/* Really slow: if we have al-extents 16..19 active,
+		 * sector 4 will be written four times! Synchronous! */
+		drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
+	}
+
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+	put_ldev(mdev);
+}
+
+/**
+ * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
+ * @mdev:	DRBD device.
+ */
+void drbd_al_apply_to_bm(struct drbd_conf *mdev)
+{
+	unsigned int enr;
+	unsigned long add = 0;
+	char ppb[10];
+	int i;
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	for (i = 0; i < mdev->act_log->nr_elements; i++) {
+		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+		if (enr == LC_FREE)
+			continue;
+		add += drbd_bm_ALe_set_all(mdev, enr);
+	}
+
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
+	     ppsize(ppb, Bit2KB(add)));
+}
+
+static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
+{
+	int rv;
+
+	spin_lock_irq(&mdev->al_lock);
+	rv = (al_ext->refcnt == 0);
+	if (likely(rv))
+		lc_del(mdev->act_log, al_ext);
+	spin_unlock_irq(&mdev->al_lock);
+
+	return rv;
+}
+
+/**
+ * drbd_al_shrink() - Removes all active extents form the activity log
+ * @mdev:	DRBD device.
+ *
+ * Removes all active extents form the activity log, waiting until
+ * the reference count of each entry dropped to 0 first, of course.
+ *
+ * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
+ */
+void drbd_al_shrink(struct drbd_conf *mdev)
+{
+	struct lc_element *al_ext;
+	int i;
+
+	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+
+	for (i = 0; i < mdev->act_log->nr_elements; i++) {
+		al_ext = lc_element_by_index(mdev->act_log, i);
+		if (al_ext->lc_number == LC_FREE)
+			continue;
+		wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
+	}
+
+	wake_up(&mdev->al_wait);
+}
+
+static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
+
+	if (!get_ldev(mdev)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
+		kfree(udw);
+		return 1;
+	}
+
+	drbd_bm_write_sect(mdev, udw->enr);
+	put_ldev(mdev);
+
+	kfree(udw);
+
+	if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
+		switch (mdev->state.conn) {
+		case C_SYNC_SOURCE:  case C_SYNC_TARGET:
+		case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
+			drbd_resync_finished(mdev);
+		default:
+			/* nothing to do */
+			break;
+		}
+	}
+	drbd_bcast_sync_progress(mdev);
+
+	return 1;
+}
+
+
+/* ATTENTION. The AL's extents are 4MB each, while the extents in the
+ * resync LRU-cache are 16MB each.
+ * The caller of this function has to hold an get_ldev() reference.
+ *
+ * TODO will be obsoleted once we have a caching lru of the on disk bitmap
+ */
+static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
+				      int count, int success)
+{
+	struct lc_element *e;
+	struct update_odbm_work *udw;
+
+	unsigned int enr;
+
+	D_ASSERT(atomic_read(&mdev->local_cnt));
+
+	/* I simply assume that a sector/size pair never crosses
+	 * a 16 MB extent border. (Currently this is true...) */
+	enr = BM_SECT_TO_EXT(sector);
+
+	e = lc_get(mdev->resync, enr);
+	if (e) {
+		struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
+		if (ext->lce.lc_number == enr) {
+			if (success)
+				ext->rs_left -= count;
+			else
+				ext->rs_failed += count;
+			if (ext->rs_left < ext->rs_failed) {
+				dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
+				    "rs_failed=%d count=%d\n",
+				     (unsigned long long)sector,
+				     ext->lce.lc_number, ext->rs_left,
+				     ext->rs_failed, count);
+				dump_stack();
+
+				lc_put(mdev->resync, &ext->lce);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return;
+			}
+		} else {
+			/* Normally this element should be in the cache,
+			 * since drbd_rs_begin_io() pulled it already in.
+			 *
+			 * But maybe an application write finished, and we set
+			 * something outside the resync lru_cache in sync.
+			 */
+			int rs_left = drbd_bm_e_weight(mdev, enr);
+			if (ext->flags != 0) {
+				dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
+				     " -> %d[%u;00]\n",
+				     ext->lce.lc_number, ext->rs_left,
+				     ext->flags, enr, rs_left);
+				ext->flags = 0;
+			}
+			if (ext->rs_failed) {
+				dev_warn(DEV, "Kicking resync_lru element enr=%u "
+				     "out with rs_failed=%d\n",
+				     ext->lce.lc_number, ext->rs_failed);
+				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+			}
+			ext->rs_left = rs_left;
+			ext->rs_failed = success ? 0 : count;
+			lc_changed(mdev->resync, &ext->lce);
+		}
+		lc_put(mdev->resync, &ext->lce);
+		/* no race, we are within the al_lock! */
+
+		if (ext->rs_left == ext->rs_failed) {
+			ext->rs_failed = 0;
+
+			udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
+			if (udw) {
+				udw->enr = ext->lce.lc_number;
+				udw->w.cb = w_update_odbm;
+				drbd_queue_work_front(&mdev->data.work, &udw->w);
+			} else {
+				dev_warn(DEV, "Could not kmalloc an udw\n");
+				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+			}
+		}
+	} else {
+		dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
+		    mdev->resync_locked,
+		    mdev->resync->nr_elements,
+		    mdev->resync->flags);
+	}
+}
+
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector.  Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
+ *
+ */
+void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
+		       const char *file, const unsigned int line)
+{
+	/* Is called from worker and receiver context _only_ */
+	unsigned long sbnr, ebnr, lbnr;
+	unsigned long count = 0;
+	sector_t esector, nr_sectors;
+	int wake_up = 0;
+	unsigned long flags;
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
+				(unsigned long long)sector, size);
+		return;
+	}
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors) return;
+	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/* we clear it (in sync).
+	 * round up start sector, round down end sector.  we make sure we only
+	 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
+	if (unlikely(esector < BM_SECT_PER_BIT-1))
+		return;
+	if (unlikely(esector == (nr_sectors-1)))
+		ebnr = lbnr;
+	else
+		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
+			  "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
+			  (unsigned long long)sector, size, sbnr, ebnr);
+
+	if (sbnr > ebnr)
+		return;
+
+	/*
+	 * ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.
+	 */
+	spin_lock_irqsave(&mdev->al_lock, flags);
+	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
+	if (count) {
+		/* we need the lock for drbd_try_clear_on_disk_bm */
+		if (jiffies - mdev->rs_mark_time > HZ*10) {
+			/* should be rolling marks,
+			 * but we estimate only anyways. */
+			if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
+			    mdev->state.conn != C_PAUSED_SYNC_T &&
+			    mdev->state.conn != C_PAUSED_SYNC_S) {
+				mdev->rs_mark_time = jiffies;
+				mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+			}
+		}
+		if (get_ldev(mdev)) {
+			drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
+			put_ldev(mdev);
+		}
+		/* just wake_up unconditional now, various lc_chaged(),
+		 * lc_put() in drbd_try_clear_on_disk_bm(). */
+		wake_up = 1;
+	}
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+	if (wake_up)
+		wake_up(&mdev->al_wait);
+}
+
+/*
+ * this is intended to set one request worth of data out of sync.
+ * affects at least 1 bit,
+ * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
+ *
+ * called by tl_clear and drbd_send_dblock (==drbd_make_request).
+ * so this can be _any_ process.
+ */
+void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+			    const char *file, const unsigned int line)
+{
+	unsigned long sbnr, ebnr, lbnr, flags;
+	sector_t esector, nr_sectors;
+	unsigned int enr, count;
+	struct lc_element *e;
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+		dev_err(DEV, "sector: %llus, size: %d\n",
+			(unsigned long long)sector, size);
+		return;
+	}
+
+	if (!get_ldev(mdev))
+		return; /* no disk, no metadata, no bitmap to set bits in */
+
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors)
+		goto out;
+	ERR_IF(esector >= nr_sectors)
+		esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/* we set it out of sync,
+	 * we do not need to round anything here */
+	sbnr = BM_SECT_TO_BIT(sector);
+	ebnr = BM_SECT_TO_BIT(esector);
+
+	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
+			  "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
+			  (unsigned long long)sector, size, sbnr, ebnr);
+
+	/* ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.  */
+	spin_lock_irqsave(&mdev->al_lock, flags);
+	count = drbd_bm_set_bits(mdev, sbnr, ebnr);
+
+	enr = BM_SECT_TO_EXT(sector);
+	e = lc_find(mdev->resync, enr);
+	if (e)
+		lc_entry(e, struct bm_extent, lce)->rs_left += count;
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+
+out:
+	put_ldev(mdev);
+}
+
+static
+struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int wakeup = 0;
+	unsigned long rs_flags;
+
+	spin_lock_irq(&mdev->al_lock);
+	if (mdev->resync_locked > mdev->resync->nr_elements/2) {
+		spin_unlock_irq(&mdev->al_lock);
+		return NULL;
+	}
+	e = lc_get(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (bm_ext) {
+		if (bm_ext->lce.lc_number != enr) {
+			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+			bm_ext->rs_failed = 0;
+			lc_changed(mdev->resync, &bm_ext->lce);
+			wakeup = 1;
+		}
+		if (bm_ext->lce.refcnt == 1)
+			mdev->resync_locked++;
+		set_bit(BME_NO_WRITES, &bm_ext->flags);
+	}
+	rs_flags = mdev->resync->flags;
+	spin_unlock_irq(&mdev->al_lock);
+	if (wakeup)
+		wake_up(&mdev->al_wait);
+
+	if (!bm_ext) {
+		if (rs_flags & LC_STARVING)
+			dev_warn(DEV, "Have to wait for element"
+			     " (resync LRU too small?)\n");
+		BUG_ON(rs_flags & LC_DIRTY);
+	}
+
+	return bm_ext;
+}
+
+static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *al_ext;
+	int rv = 0;
+
+	spin_lock_irq(&mdev->al_lock);
+	if (unlikely(enr == mdev->act_log->new_number))
+		rv = 1;
+	else {
+		al_ext = lc_find(mdev->act_log, enr);
+		if (al_ext) {
+			if (al_ext->refcnt)
+				rv = 1;
+		}
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	/*
+	if (unlikely(rv)) {
+		dev_info(DEV, "Delaying sync read until app's write is done\n");
+	}
+	*/
+	return rv;
+}
+
+/**
+ * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ *
+ * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
+ */
+int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	struct bm_extent *bm_ext;
+	int i, sig;
+
+	trace_drbd_resync(mdev, TRACE_LVL_ALL,
+			  "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n",
+			  (unsigned long long)sector, enr);
+
+	sig = wait_event_interruptible(mdev->al_wait,
+			(bm_ext = _bme_get(mdev, enr)));
+	if (sig)
+		return 0;
+
+	if (test_bit(BME_LOCKED, &bm_ext->flags))
+		return 1;
+
+	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+		sig = wait_event_interruptible(mdev->al_wait,
+				!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
+		if (sig) {
+			spin_lock_irq(&mdev->al_lock);
+			if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+				clear_bit(BME_NO_WRITES, &bm_ext->flags);
+				mdev->resync_locked--;
+				wake_up(&mdev->al_wait);
+			}
+			spin_unlock_irq(&mdev->al_lock);
+			return 0;
+		}
+	}
+
+	set_bit(BME_LOCKED, &bm_ext->flags);
+
+	return 1;
+}
+
+/**
+ * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ *
+ * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
+ * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
+ * if there is still application IO going on in this area.
+ */
+int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int i;
+
+	trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n",
+			  (unsigned long long)sector);
+
+	spin_lock_irq(&mdev->al_lock);
+	if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
+		/* in case you have very heavy scattered io, it may
+		 * stall the syncer undefined if we give up the ref count
+		 * when we try again and requeue.
+		 *
+		 * if we don't give up the refcount, but the next time
+		 * we are scheduled this extent has been "synced" by new
+		 * application writes, we'd miss the lc_put on the
+		 * extent we keep the refcount on.
+		 * so we remembered which extent we had to try again, and
+		 * if the next requested one is something else, we do
+		 * the lc_put here...
+		 * we also have to wake_up
+		 */
+
+		trace_drbd_resync(mdev, TRACE_LVL_ALL,
+				  "dropping %u, apparently got 'synced' by application io\n",
+				  mdev->resync_wenr);
+
+		e = lc_find(mdev->resync, mdev->resync_wenr);
+		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+		if (bm_ext) {
+			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+			D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+			clear_bit(BME_NO_WRITES, &bm_ext->flags);
+			mdev->resync_wenr = LC_FREE;
+			if (lc_put(mdev->resync, &bm_ext->lce) == 0)
+				mdev->resync_locked--;
+			wake_up(&mdev->al_wait);
+		} else {
+			dev_alert(DEV, "LOGIC BUG\n");
+		}
+	}
+	/* TRY. */
+	e = lc_try_get(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (bm_ext) {
+		if (test_bit(BME_LOCKED, &bm_ext->flags))
+			goto proceed;
+		if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			mdev->resync_locked++;
+		} else {
+			/* we did set the BME_NO_WRITES,
+			 * but then could not set BME_LOCKED,
+			 * so we tried again.
+			 * drop the extra reference. */
+			trace_drbd_resync(mdev, TRACE_LVL_ALL,
+					  "dropping extra reference on %u\n", enr);
+
+			bm_ext->lce.refcnt--;
+			D_ASSERT(bm_ext->lce.refcnt > 0);
+		}
+		goto check_al;
+	} else {
+		/* do we rather want to try later? */
+		if (mdev->resync_locked > mdev->resync->nr_elements-3) {
+			trace_drbd_resync(mdev, TRACE_LVL_ALL,
+					  "resync_locked = %u!\n", mdev->resync_locked);
+
+			goto try_again;
+		}
+		/* Do or do not. There is no try. -- Yoda */
+		e = lc_get(mdev->resync, enr);
+		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+		if (!bm_ext) {
+			const unsigned long rs_flags = mdev->resync->flags;
+			if (rs_flags & LC_STARVING)
+				dev_warn(DEV, "Have to wait for element"
+				     " (resync LRU too small?)\n");
+			BUG_ON(rs_flags & LC_DIRTY);
+			goto try_again;
+		}
+		if (bm_ext->lce.lc_number != enr) {
+			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+			bm_ext->rs_failed = 0;
+			lc_changed(mdev->resync, &bm_ext->lce);
+			wake_up(&mdev->al_wait);
+			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
+		}
+		set_bit(BME_NO_WRITES, &bm_ext->flags);
+		D_ASSERT(bm_ext->lce.refcnt == 1);
+		mdev->resync_locked++;
+		goto check_al;
+	}
+check_al:
+	trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr);
+
+	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+		if (unlikely(al_enr+i == mdev->act_log->new_number))
+			goto try_again;
+		if (lc_is_used(mdev->act_log, al_enr+i))
+			goto try_again;
+	}
+	set_bit(BME_LOCKED, &bm_ext->flags);
+proceed:
+	mdev->resync_wenr = LC_FREE;
+	spin_unlock_irq(&mdev->al_lock);
+	return 0;
+
+try_again:
+	trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr);
+	if (bm_ext)
+		mdev->resync_wenr = enr;
+	spin_unlock_irq(&mdev->al_lock);
+	return -EAGAIN;
+}
+
+void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	unsigned long flags;
+
+	trace_drbd_resync(mdev, TRACE_LVL_ALL,
+			  "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n",
+			  (long long)sector, enr);
+
+	spin_lock_irqsave(&mdev->al_lock, flags);
+	e = lc_find(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (!bm_ext) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
+		return;
+	}
+
+	if (bm_ext->lce.refcnt == 0) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
+		    "but refcnt is 0!?\n",
+		    (unsigned long long)sector, enr);
+		return;
+	}
+
+	if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+		clear_bit(BME_LOCKED, &bm_ext->flags);
+		clear_bit(BME_NO_WRITES, &bm_ext->flags);
+		mdev->resync_locked--;
+		wake_up(&mdev->al_wait);
+	}
+
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+/**
+ * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
+ * @mdev:	DRBD device.
+ */
+void drbd_rs_cancel_all(struct drbd_conf *mdev)
+{
+	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n");
+
+	spin_lock_irq(&mdev->al_lock);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
+		lc_reset(mdev->resync);
+		put_ldev(mdev);
+	}
+	mdev->resync_locked = 0;
+	mdev->resync_wenr = LC_FREE;
+	spin_unlock_irq(&mdev->al_lock);
+	wake_up(&mdev->al_wait);
+}
+
+/**
+ * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
+ * @mdev:	DRBD device.
+ *
+ * Returns 0 upon success, -EAGAIN if at least one reference count was
+ * not zero.
+ */
+int drbd_rs_del_all(struct drbd_conf *mdev)
+{
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int i;
+
+	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n");
+
+	spin_lock_irq(&mdev->al_lock);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		/* ok, ->resync is there. */
+		for (i = 0; i < mdev->resync->nr_elements; i++) {
+			e = lc_element_by_index(mdev->resync, i);
+			bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+			if (bm_ext->lce.lc_number == LC_FREE)
+				continue;
+			if (bm_ext->lce.lc_number == mdev->resync_wenr) {
+				dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
+				     " got 'synced' by application io\n",
+				     mdev->resync_wenr);
+				D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+				D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+				clear_bit(BME_NO_WRITES, &bm_ext->flags);
+				mdev->resync_wenr = LC_FREE;
+				lc_put(mdev->resync, &bm_ext->lce);
+			}
+			if (bm_ext->lce.refcnt != 0) {
+				dev_info(DEV, "Retrying drbd_rs_del_all() later. "
+				     "refcnt=%d\n", bm_ext->lce.refcnt);
+				put_ldev(mdev);
+				spin_unlock_irq(&mdev->al_lock);
+				return -EAGAIN;
+			}
+			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+			D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
+			lc_del(mdev->resync, &bm_ext->lce);
+		}
+		D_ASSERT(mdev->resync->used == 0);
+		put_ldev(mdev);
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	return 0;
+}
+
+/**
+ * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ * @size:	Size of failed IO operation, in byte.
+ */
+void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	/* Is called from worker and receiver context _only_ */
+	unsigned long sbnr, ebnr, lbnr;
+	unsigned long count;
+	sector_t esector, nr_sectors;
+	int wake_up = 0;
+
+	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY,
+			  "drbd_rs_failed_io: sector=%llus, size=%u\n",
+			  (unsigned long long)sector, size);
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
+				(unsigned long long)sector, size);
+		return;
+	}
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors) return;
+	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/*
+	 * round up start sector, round down end sector.  we make sure we only
+	 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
+	if (unlikely(esector < BM_SECT_PER_BIT-1))
+		return;
+	if (unlikely(esector == (nr_sectors-1)))
+		ebnr = lbnr;
+	else
+		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+	if (sbnr > ebnr)
+		return;
+
+	/*
+	 * ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.
+	 */
+	spin_lock_irq(&mdev->al_lock);
+	count = drbd_bm_count_bits(mdev, sbnr, ebnr);
+	if (count) {
+		mdev->rs_failed += count;
+
+		if (get_ldev(mdev)) {
+			drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
+			put_ldev(mdev);
+		}
+
+		/* just wake_up unconditional now, various lc_chaged(),
+		 * lc_put() in drbd_try_clear_on_disk_bm(). */
+		wake_up = 1;
+	}
+	spin_unlock_irq(&mdev->al_lock);
+	if (wake_up)
+		wake_up(&mdev->al_wait);
+}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
new file mode 100644
index 000000000000..b61057e77882
--- /dev/null
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -0,0 +1,1327 @@
+/*
+   drbd_bitmap.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/drbd.h>
+#include <asm/kmap_types.h>
+#include "drbd_int.h"
+
+/* OPAQUE outside this file!
+ * interface defined in drbd_int.h
+
+ * convention:
+ * function name drbd_bm_... => used elsewhere, "public".
+ * function name      bm_... => internal to implementation, "private".
+
+ * Note that since find_first_bit returns int, at the current granularity of
+ * the bitmap (4KB per byte), this implementation "only" supports up to
+ * 1<<(32+12) == 16 TB...
+ */
+
+/*
+ * NOTE
+ *  Access to the *bm_pages is protected by bm_lock.
+ *  It is safe to read the other members within the lock.
+ *
+ *  drbd_bm_set_bits is called from bio_endio callbacks,
+ *  We may be called with irq already disabled,
+ *  so we need spin_lock_irqsave().
+ *  And we need the kmap_atomic.
+ */
+struct drbd_bitmap {
+	struct page **bm_pages;
+	spinlock_t bm_lock;
+	/* WARNING unsigned long bm_*:
+	 * 32bit number of bit offset is just enough for 512 MB bitmap.
+	 * it will blow up if we make the bitmap bigger...
+	 * not that it makes much sense to have a bitmap that large,
+	 * rather change the granularity to 16k or 64k or something.
+	 * (that implies other problems, however...)
+	 */
+	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
+	unsigned long bm_bits;
+	size_t   bm_words;
+	size_t   bm_number_of_pages;
+	sector_t bm_dev_capacity;
+	struct semaphore bm_change; /* serializes resize operations */
+
+	atomic_t bm_async_io;
+	wait_queue_head_t bm_io_wait;
+
+	unsigned long  bm_flags;
+
+	/* debugging aid, in case we are still racy somewhere */
+	char          *bm_why;
+	struct task_struct *bm_task;
+};
+
+/* definition of bits in bm_flags */
+#define BM_LOCKED       0
+#define BM_MD_IO_ERROR  1
+#define BM_P_VMALLOCED  2
+
+static int bm_is_locked(struct drbd_bitmap *b)
+{
+	return test_bit(BM_LOCKED, &b->bm_flags);
+}
+
+#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
+static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	if (!__ratelimit(&drbd_ratelimit_state))
+		return;
+	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
+	    current == mdev->receiver.task ? "receiver" :
+	    current == mdev->asender.task  ? "asender"  :
+	    current == mdev->worker.task   ? "worker"   : current->comm,
+	    func, b->bm_why ?: "?",
+	    b->bm_task == mdev->receiver.task ? "receiver" :
+	    b->bm_task == mdev->asender.task  ? "asender"  :
+	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+}
+
+void drbd_bm_lock(struct drbd_conf *mdev, char *why)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	int trylock_failed;
+
+	if (!b) {
+		dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
+		return;
+	}
+
+	trylock_failed = down_trylock(&b->bm_change);
+
+	if (trylock_failed) {
+		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
+		    current == mdev->receiver.task ? "receiver" :
+		    current == mdev->asender.task  ? "asender"  :
+		    current == mdev->worker.task   ? "worker"   : current->comm,
+		    why, b->bm_why ?: "?",
+		    b->bm_task == mdev->receiver.task ? "receiver" :
+		    b->bm_task == mdev->asender.task  ? "asender"  :
+		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+		down(&b->bm_change);
+	}
+	if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
+		dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+
+	b->bm_why  = why;
+	b->bm_task = current;
+}
+
+void drbd_bm_unlock(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	if (!b) {
+		dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
+		return;
+	}
+
+	if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
+		dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
+
+	b->bm_why  = NULL;
+	b->bm_task = NULL;
+	up(&b->bm_change);
+}
+
+/* word offset to long pointer */
+static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
+{
+	struct page *page;
+	unsigned long page_nr;
+
+	/* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
+	page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+	BUG_ON(page_nr >= b->bm_number_of_pages);
+	page = b->bm_pages[page_nr];
+
+	return (unsigned long *) kmap_atomic(page, km);
+}
+
+static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
+{
+	return __bm_map_paddr(b, offset, KM_IRQ1);
+}
+
+static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+{
+	kunmap_atomic(p_addr, km);
+};
+
+static void bm_unmap(unsigned long *p_addr)
+{
+	return __bm_unmap(p_addr, KM_IRQ1);
+}
+
+/* long word offset of _bitmap_ sector */
+#define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+/* word offset from start of bitmap to word number _in_page_
+ * modulo longs per page
+#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
+ hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ so do it explicitly:
+ */
+#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
+
+/* Long words per page */
+#define LWPP (PAGE_SIZE/sizeof(long))
+
+/*
+ * actually most functions herein should take a struct drbd_bitmap*, not a
+ * struct drbd_conf*, but for the debug macros I like to have the mdev around
+ * to be able to report device specific.
+ */
+
+static void bm_free_pages(struct page **pages, unsigned long number)
+{
+	unsigned long i;
+	if (!pages)
+		return;
+
+	for (i = 0; i < number; i++) {
+		if (!pages[i]) {
+			printk(KERN_ALERT "drbd: bm_free_pages tried to free "
+					  "a NULL pointer; i=%lu n=%lu\n",
+					  i, number);
+			continue;
+		}
+		__free_page(pages[i]);
+		pages[i] = NULL;
+	}
+}
+
+static void bm_vk_free(void *ptr, int v)
+{
+	if (v)
+		vfree(ptr);
+	else
+		kfree(ptr);
+}
+
+/*
+ * "have" and "want" are NUMBER OF PAGES.
+ */
+static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
+{
+	struct page **old_pages = b->bm_pages;
+	struct page **new_pages, *page;
+	unsigned int i, bytes, vmalloced = 0;
+	unsigned long have = b->bm_number_of_pages;
+
+	BUG_ON(have == 0 && old_pages != NULL);
+	BUG_ON(have != 0 && old_pages == NULL);
+
+	if (have == want)
+		return old_pages;
+
+	/* Trying kmalloc first, falling back to vmalloc.
+	 * GFP_KERNEL is ok, as this is done when a lower level disk is
+	 * "attached" to the drbd.  Context is receiver thread or cqueue
+	 * thread.  As we have no disk yet, we are not in the IO path,
+	 * not even the IO path of the peer. */
+	bytes = sizeof(struct page *)*want;
+	new_pages = kmalloc(bytes, GFP_KERNEL);
+	if (!new_pages) {
+		new_pages = vmalloc(bytes);
+		if (!new_pages)
+			return NULL;
+		vmalloced = 1;
+	}
+
+	memset(new_pages, 0, bytes);
+	if (want >= have) {
+		for (i = 0; i < have; i++)
+			new_pages[i] = old_pages[i];
+		for (; i < want; i++) {
+			page = alloc_page(GFP_HIGHUSER);
+			if (!page) {
+				bm_free_pages(new_pages + have, i - have);
+				bm_vk_free(new_pages, vmalloced);
+				return NULL;
+			}
+			new_pages[i] = page;
+		}
+	} else {
+		for (i = 0; i < want; i++)
+			new_pages[i] = old_pages[i];
+		/* NOT HERE, we are outside the spinlock!
+		bm_free_pages(old_pages + want, have - want);
+		*/
+	}
+
+	if (vmalloced)
+		set_bit(BM_P_VMALLOCED, &b->bm_flags);
+	else
+		clear_bit(BM_P_VMALLOCED, &b->bm_flags);
+
+	return new_pages;
+}
+
+/*
+ * called on driver init only. TODO call when a device is created.
+ * allocates the drbd_bitmap, and stores it in mdev->bitmap.
+ */
+int drbd_bm_init(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	WARN_ON(b != NULL);
+	b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
+	if (!b)
+		return -ENOMEM;
+	spin_lock_init(&b->bm_lock);
+	init_MUTEX(&b->bm_change);
+	init_waitqueue_head(&b->bm_io_wait);
+
+	mdev->bitmap = b;
+
+	return 0;
+}
+
+sector_t drbd_bm_capacity(struct drbd_conf *mdev)
+{
+	ERR_IF(!mdev->bitmap) return 0;
+	return mdev->bitmap->bm_dev_capacity;
+}
+
+/* called on driver unload. TODO: call when a device is destroyed.
+ */
+void drbd_bm_cleanup(struct drbd_conf *mdev)
+{
+	ERR_IF (!mdev->bitmap) return;
+	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
+	bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
+	kfree(mdev->bitmap);
+	mdev->bitmap = NULL;
+}
+
+/*
+ * since (b->bm_bits % BITS_PER_LONG) != 0,
+ * this masks out the remaining bits.
+ * Returns the number of bits cleared.
+ */
+static int bm_clear_surplus(struct drbd_bitmap *b)
+{
+	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
+	size_t w = b->bm_bits >> LN2_BPL;
+	int cleared = 0;
+	unsigned long *p_addr, *bm;
+
+	p_addr = bm_map_paddr(b, w);
+	bm = p_addr + MLPP(w);
+	if (w < b->bm_words) {
+		cleared = hweight_long(*bm & ~mask);
+		*bm &= mask;
+		w++; bm++;
+	}
+
+	if (w < b->bm_words) {
+		cleared += hweight_long(*bm);
+		*bm = 0;
+	}
+	bm_unmap(p_addr);
+	return cleared;
+}
+
+static void bm_set_surplus(struct drbd_bitmap *b)
+{
+	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
+	size_t w = b->bm_bits >> LN2_BPL;
+	unsigned long *p_addr, *bm;
+
+	p_addr = bm_map_paddr(b, w);
+	bm = p_addr + MLPP(w);
+	if (w < b->bm_words) {
+		*bm |= ~mask;
+		bm++; w++;
+	}
+
+	if (w < b->bm_words) {
+		*bm = ~(0UL);
+	}
+	bm_unmap(p_addr);
+}
+
+static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
+{
+	unsigned long *p_addr, *bm, offset = 0;
+	unsigned long bits = 0;
+	unsigned long i, do_now;
+
+	while (offset < b->bm_words) {
+		i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
+		p_addr = __bm_map_paddr(b, offset, KM_USER0);
+		bm = p_addr + MLPP(offset);
+		while (i--) {
+#ifndef __LITTLE_ENDIAN
+			if (swap_endian)
+				*bm = lel_to_cpu(*bm);
+#endif
+			bits += hweight_long(*bm++);
+		}
+		__bm_unmap(p_addr, KM_USER0);
+		offset += do_now;
+		cond_resched();
+	}
+
+	return bits;
+}
+
+static unsigned long bm_count_bits(struct drbd_bitmap *b)
+{
+	return __bm_count_bits(b, 0);
+}
+
+static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
+{
+	return __bm_count_bits(b, 1);
+}
+
+/* offset and len in long words.*/
+static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
+{
+	unsigned long *p_addr, *bm;
+	size_t do_now, end;
+
+#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
+
+	end = offset + len;
+
+	if (end > b->bm_words) {
+		printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+		return;
+	}
+
+	while (offset < end) {
+		do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
+		p_addr = bm_map_paddr(b, offset);
+		bm = p_addr + MLPP(offset);
+		if (bm+do_now > p_addr + LWPP) {
+			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+			       p_addr, bm, (int)do_now);
+			break; /* breaks to after catch_oob_access_end() only! */
+		}
+		memset(bm, c, do_now * sizeof(long));
+		bm_unmap(p_addr);
+		offset += do_now;
+	}
+}
+
+/*
+ * make sure the bitmap has enough room for the attached storage,
+ * if necessary, resize.
+ * called whenever we may have changed the device size.
+ * returns -ENOMEM if we could not allocate enough memory, 0 on success.
+ * In case this is actually a resize, we copy the old bitmap into the new one.
+ * Otherwise, the bitmap is initialized to all bits set.
+ */
+int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long bits, words, owords, obits, *p_addr, *bm;
+	unsigned long want, have, onpages; /* number of pages */
+	struct page **npages, **opages = NULL;
+	int err = 0, growing;
+	int opages_vmalloced;
+
+	ERR_IF(!b) return -ENOMEM;
+
+	drbd_bm_lock(mdev, "resize");
+
+	dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
+			(unsigned long long)capacity);
+
+	if (capacity == b->bm_dev_capacity)
+		goto out;
+
+	opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
+
+	if (capacity == 0) {
+		spin_lock_irq(&b->bm_lock);
+		opages = b->bm_pages;
+		onpages = b->bm_number_of_pages;
+		owords = b->bm_words;
+		b->bm_pages = NULL;
+		b->bm_number_of_pages =
+		b->bm_set   =
+		b->bm_bits  =
+		b->bm_words =
+		b->bm_dev_capacity = 0;
+		spin_unlock_irq(&b->bm_lock);
+		bm_free_pages(opages, onpages);
+		bm_vk_free(opages, opages_vmalloced);
+		goto out;
+	}
+	bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
+
+	/* if we would use
+	   words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
+	   a 32bit host could present the wrong number of words
+	   to a 64bit host.
+	*/
+	words = ALIGN(bits, 64) >> LN2_BPL;
+
+	if (get_ldev(mdev)) {
+		D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
+		put_ldev(mdev);
+	}
+
+	/* one extra long to catch off by one errors */
+	want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+	have = b->bm_number_of_pages;
+	if (want == have) {
+		D_ASSERT(b->bm_pages != NULL);
+		npages = b->bm_pages;
+	} else {
+		if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
+			npages = NULL;
+		else
+			npages = bm_realloc_pages(b, want);
+	}
+
+	if (!npages) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock_irq(&b->bm_lock);
+	opages = b->bm_pages;
+	owords = b->bm_words;
+	obits  = b->bm_bits;
+
+	growing = bits > obits;
+	if (opages)
+		bm_set_surplus(b);
+
+	b->bm_pages = npages;
+	b->bm_number_of_pages = want;
+	b->bm_bits  = bits;
+	b->bm_words = words;
+	b->bm_dev_capacity = capacity;
+
+	if (growing) {
+		bm_memset(b, owords, 0xff, words-owords);
+		b->bm_set += bits - obits;
+	}
+
+	if (want < have) {
+		/* implicit: (opages != NULL) && (opages != npages) */
+		bm_free_pages(opages + want, have - want);
+	}
+
+	p_addr = bm_map_paddr(b, words);
+	bm = p_addr + MLPP(words);
+	*bm = DRBD_MAGIC;
+	bm_unmap(p_addr);
+
+	(void)bm_clear_surplus(b);
+
+	spin_unlock_irq(&b->bm_lock);
+	if (opages != npages)
+		bm_vk_free(opages, opages_vmalloced);
+	if (!growing)
+		b->bm_set = bm_count_bits(b);
+	dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
+
+ out:
+	drbd_bm_unlock(mdev);
+	return err;
+}
+
+/* inherently racy:
+ * if not protected by other means, return value may be out of date when
+ * leaving this function...
+ * we still need to lock it, since it is important that this returns
+ * bm_set == 0 precisely.
+ *
+ * maybe bm_set should be atomic_t ?
+ */
+static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long s;
+	unsigned long flags;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	s = b->bm_set;
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+
+	return s;
+}
+
+unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+	unsigned long s;
+	/* if I don't have a disk, I don't know about out-of-sync status */
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+		return 0;
+	s = _drbd_bm_total_weight(mdev);
+	put_ldev(mdev);
+	return s;
+}
+
+size_t drbd_bm_words(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	return b->bm_words;
+}
+
+unsigned long drbd_bm_bits(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return 0;
+
+	return b->bm_bits;
+}
+
+/* merge number words from buffer into the bitmap starting at offset.
+ * buffer[i] is expected to be little endian unsigned long.
+ * bitmap must be locked by drbd_bm_lock.
+ * currently only used from receive_bitmap.
+ */
+void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+			unsigned long *buffer)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	unsigned long word, bits;
+	size_t end, do_now;
+
+	end = offset + number;
+
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+	if (number == 0)
+		return;
+	WARN_ON(offset >= b->bm_words);
+	WARN_ON(end    >  b->bm_words);
+
+	spin_lock_irq(&b->bm_lock);
+	while (offset < end) {
+		do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+		p_addr = bm_map_paddr(b, offset);
+		bm = p_addr + MLPP(offset);
+		offset += do_now;
+		while (do_now--) {
+			bits = hweight_long(*bm);
+			word = *bm | lel_to_cpu(*buffer++);
+			*bm++ = word;
+			b->bm_set += hweight_long(word) - bits;
+		}
+		bm_unmap(p_addr);
+	}
+	/* with 32bit <-> 64bit cross-platform connect
+	 * this is only correct for current usage,
+	 * where we _know_ that we are 64 bit aligned,
+	 * and know that this function is used in this way, too...
+	 */
+	if (end == b->bm_words)
+		b->bm_set -= bm_clear_surplus(b);
+
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* copy number words from the bitmap starting at offset into the buffer.
+ * buffer[i] will be little endian unsigned long.
+ */
+void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+		     unsigned long *buffer)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	size_t end, do_now;
+
+	end = offset + number;
+
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	if ((offset >= b->bm_words) ||
+	    (end    >  b->bm_words) ||
+	    (number <= 0))
+		dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
+			(unsigned long)	offset,
+			(unsigned long)	number,
+			(unsigned long) b->bm_words);
+	else {
+		while (offset < end) {
+			do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+			p_addr = bm_map_paddr(b, offset);
+			bm = p_addr + MLPP(offset);
+			offset += do_now;
+			while (do_now--)
+				*buffer++ = cpu_to_lel(*bm++);
+			bm_unmap(p_addr);
+		}
+	}
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* set all bits in the bitmap */
+void drbd_bm_set_all(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	bm_memset(b, 0, 0xff, b->bm_words);
+	(void)bm_clear_surplus(b);
+	b->bm_set = b->bm_bits;
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* clear all bits in the bitmap */
+void drbd_bm_clear_all(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	bm_memset(b, 0, 0, b->bm_words);
+	b->bm_set = 0;
+	spin_unlock_irq(&b->bm_lock);
+}
+
+static void bm_async_io_complete(struct bio *bio, int error)
+{
+	struct drbd_bitmap *b = bio->bi_private;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+
+	/* strange behavior of some lower level drivers...
+	 * fail the request by clearing the uptodate flag,
+	 * but do not return any error?!
+	 * do we want to WARN() on this? */
+	if (!error && !uptodate)
+		error = -EIO;
+
+	if (error) {
+		/* doh. what now?
+		 * for now, set all bits, and flag MD_IO_ERROR */
+		__set_bit(BM_MD_IO_ERROR, &b->bm_flags);
+	}
+	if (atomic_dec_and_test(&b->bm_async_io))
+		wake_up(&b->bm_io_wait);
+
+	bio_put(bio);
+}
+
+static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
+{
+	/* we are process context. we always get a bio */
+	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+	unsigned int len;
+	sector_t on_disk_sector =
+		mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
+	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+
+	/* this might happen with very small
+	 * flexible external meta data device */
+	len = min_t(unsigned int, PAGE_SIZE,
+		(drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
+
+	bio->bi_bdev = mdev->ldev->md_bdev;
+	bio->bi_sector = on_disk_sector;
+	bio_add_page(bio, b->bm_pages[page_nr], len, 0);
+	bio->bi_private = b;
+	bio->bi_end_io = bm_async_io_complete;
+
+	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+		bio->bi_rw |= rw;
+		bio_endio(bio, -EIO);
+	} else {
+		submit_bio(rw, bio);
+	}
+}
+
+# if defined(__LITTLE_ENDIAN)
+	/* nothing to do, on disk == in memory */
+# define bm_cpu_to_lel(x) ((void)0)
+# else
+void bm_cpu_to_lel(struct drbd_bitmap *b)
+{
+	/* need to cpu_to_lel all the pages ...
+	 * this may be optimized by using
+	 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
+	 * the following is still not optimal, but better than nothing */
+	unsigned int i;
+	unsigned long *p_addr, *bm;
+	if (b->bm_set == 0) {
+		/* no page at all; avoid swap if all is 0 */
+		i = b->bm_number_of_pages;
+	} else if (b->bm_set == b->bm_bits) {
+		/* only the last page */
+		i = b->bm_number_of_pages - 1;
+	} else {
+		/* all pages */
+		i = 0;
+	}
+	for (; i < b->bm_number_of_pages; i++) {
+		p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
+		for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
+			*bm = cpu_to_lel(*bm);
+		kunmap_atomic(p_addr, KM_USER0);
+	}
+}
+# endif
+/* lel_to_cpu == cpu_to_lel */
+# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
+
+/*
+ * bm_rw: read/write the whole bitmap from/to its on disk location.
+ */
+static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	/* sector_t sector; */
+	int bm_words, num_pages, i;
+	unsigned long now;
+	char ppb[10];
+	int err = 0;
+
+	WARN_ON(!bm_is_locked(b));
+
+	/* no spinlock here, the drbd_bm_lock should be enough! */
+
+	bm_words  = drbd_bm_words(mdev);
+	num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	/* on disk bitmap is little endian */
+	if (rw == WRITE)
+		bm_cpu_to_lel(b);
+
+	now = jiffies;
+	atomic_set(&b->bm_async_io, num_pages);
+	__clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
+
+	/* let the layers below us try to merge these bios... */
+	for (i = 0; i < num_pages; i++)
+		bm_page_io_async(mdev, b, i, rw);
+
+	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
+	wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
+
+	if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
+		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
+		drbd_chk_io_error(mdev, 1, TRUE);
+		err = -EIO;
+	}
+
+	now = jiffies;
+	if (rw == WRITE) {
+		/* swap back endianness */
+		bm_lel_to_cpu(b);
+		/* flush bitmap to stable storage */
+		drbd_md_flush(mdev);
+	} else /* rw == READ */ {
+		/* just read, if necessary adjust endianness */
+		b->bm_set = bm_count_bits_swap_endian(b);
+		dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
+		     jiffies - now);
+	}
+	now = b->bm_set;
+
+	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+
+	return err;
+}
+
+/**
+ * drbd_bm_read() - Read the whole bitmap from its on disk location.
+ * @mdev:	DRBD device.
+ */
+int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, READ);
+}
+
+/**
+ * drbd_bm_write() - Write the whole bitmap to its on disk location.
+ * @mdev:	DRBD device.
+ */
+int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE);
+}
+
+/**
+ * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
+ * @mdev:	DRBD device.
+ * @enr:	Extent number in the resync lru (happens to be sector offset)
+ *
+ * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
+ * by a single sector write. Therefore enr == sector offset from the
+ * start of the bitmap.
+ */
+int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
+{
+	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+				      + mdev->ldev->md.bm_offset;
+	int bm_words, num_words, offset;
+	int err = 0;
+
+	mutex_lock(&mdev->md_io_mutex);
+	bm_words  = drbd_bm_words(mdev);
+	offset    = S2W(enr);	/* word offset into bitmap */
+	num_words = min(S2W(1), bm_words - offset);
+	if (num_words < S2W(1))
+		memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
+	drbd_bm_get_lel(mdev, offset, num_words,
+			page_address(mdev->md_io_page));
+	if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
+		int i;
+		err = -EIO;
+		dev_err(DEV, "IO ERROR writing bitmap sector %lu "
+		    "(meta-disk sector %llus)\n",
+		    enr, (unsigned long long)on_disk_sector);
+		drbd_chk_io_error(mdev, 1, TRUE);
+		for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
+			drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
+	}
+	mdev->bm_writ_cnt++;
+	mutex_unlock(&mdev->md_io_mutex);
+	return err;
+}
+
+/* NOTE
+ * find_first_bit returns int, we return unsigned long.
+ * should not make much difference anyways, but ...
+ *
+ * this returns a bit number, NOT a sector!
+ */
+#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
+static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
+	const int find_zero_bit, const enum km_type km)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long i = -1UL;
+	unsigned long *p_addr;
+	unsigned long bit_offset; /* bit offset of the mapped page. */
+
+	if (bm_fo > b->bm_bits) {
+		dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+	} else {
+		while (bm_fo < b->bm_bits) {
+			unsigned long offset;
+			bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
+			offset = bit_offset >> LN2_BPL;    /* word offset of the page */
+			p_addr = __bm_map_paddr(b, offset, km);
+
+			if (find_zero_bit)
+				i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+			else
+				i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+
+			__bm_unmap(p_addr, km);
+			if (i < PAGE_SIZE*8) {
+				i = bit_offset + i;
+				if (i >= b->bm_bits)
+					break;
+				goto found;
+			}
+			bm_fo = bit_offset + PAGE_SIZE*8;
+		}
+		i = -1UL;
+	}
+ found:
+	return i;
+}
+
+static unsigned long bm_find_next(struct drbd_conf *mdev,
+	unsigned long bm_fo, const int find_zero_bit)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long i = -1UL;
+
+	ERR_IF(!b) return i;
+	ERR_IF(!b->bm_pages) return i;
+
+	spin_lock_irq(&b->bm_lock);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+
+	i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+
+	spin_unlock_irq(&b->bm_lock);
+	return i;
+}
+
+unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	return bm_find_next(mdev, bm_fo, 0);
+}
+
+#if 0
+/* not yet needed for anything. */
+unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	return bm_find_next(mdev, bm_fo, 1);
+}
+#endif
+
+/* does not spin_lock_irqsave.
+ * you must take drbd_bm_lock() first */
+unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	/* WARN_ON(!bm_is_locked(mdev)); */
+	return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+}
+
+unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	/* WARN_ON(!bm_is_locked(mdev)); */
+	return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector.
+ * expected to be called for only a few bits (e - s about BITS_PER_LONG).
+ * Must hold bitmap lock already. */
+int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+	unsigned long e, int val, const enum km_type km)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr = NULL;
+	unsigned long bitnr;
+	unsigned long last_page_nr = -1UL;
+	int c = 0;
+
+	if (e >= b->bm_bits) {
+		dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
+				s, e, b->bm_bits);
+		e = b->bm_bits ? b->bm_bits -1 : 0;
+	}
+	for (bitnr = s; bitnr <= e; bitnr++) {
+		unsigned long offset = bitnr>>LN2_BPL;
+		unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+		if (page_nr != last_page_nr) {
+			if (p_addr)
+				__bm_unmap(p_addr, km);
+			p_addr = __bm_map_paddr(b, offset, km);
+			last_page_nr = page_nr;
+		}
+		if (val)
+			c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
+		else
+			c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
+	}
+	if (p_addr)
+		__bm_unmap(p_addr, km);
+	b->bm_set += c;
+	return c;
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector */
+int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+	const unsigned long e, int val)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	int c = 0;
+
+	ERR_IF(!b) return 1;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+
+	c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
+
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return c;
+}
+
+/* returns number of bits changed 0 -> 1 */
+int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	return bm_change_bits_to(mdev, s, e, 1);
+}
+
+/* returns number of bits changed 1 -> 0 */
+int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	return -bm_change_bits_to(mdev, s, e, 0);
+}
+
+/* sets all bits in full words,
+ * from first_word up to, but not including, last_word */
+static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
+		int page_nr, int first_word, int last_word)
+{
+	int i;
+	int bits;
+	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
+	for (i = first_word; i < last_word; i++) {
+		bits = hweight_long(paddr[i]);
+		paddr[i] = ~0UL;
+		b->bm_set += BITS_PER_LONG - bits;
+	}
+	kunmap_atomic(paddr, KM_USER0);
+}
+
+/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
+ * You must first drbd_bm_lock().
+ * Can be called to set the whole bitmap in one go.
+ * Sets bits from s to e _inclusive_. */
+void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	/* First set_bit from the first bit (s)
+	 * up to the next long boundary (sl),
+	 * then assign full words up to the last long boundary (el),
+	 * then set_bit up to and including the last bit (e).
+	 *
+	 * Do not use memset, because we must account for changes,
+	 * so we need to loop over the words with hweight() anyways.
+	 */
+	unsigned long sl = ALIGN(s,BITS_PER_LONG);
+	unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
+	int first_page;
+	int last_page;
+	int page_nr;
+	int first_word;
+	int last_word;
+
+	if (e - s <= 3*BITS_PER_LONG) {
+		/* don't bother; el and sl may even be wrong. */
+		__bm_change_bits_to(mdev, s, e, 1, KM_USER0);
+		return;
+	}
+
+	/* difference is large enough that we can trust sl and el */
+
+	/* bits filling the current long */
+	if (sl)
+		__bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
+
+	first_page = sl >> (3 + PAGE_SHIFT);
+	last_page = el >> (3 + PAGE_SHIFT);
+
+	/* MLPP: modulo longs per page */
+	/* LWPP: long words per page */
+	first_word = MLPP(sl >> LN2_BPL);
+	last_word = LWPP;
+
+	/* first and full pages, unless first page == last page */
+	for (page_nr = first_page; page_nr < last_page; page_nr++) {
+		bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
+		cond_resched();
+		first_word = 0;
+	}
+
+	/* last page (respectively only page, for first page == last page) */
+	last_word = MLPP(el >> LN2_BPL);
+	bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+	/* possibly trailing bits.
+	 * example: (e & 63) == 63, el will be e+1.
+	 * if that even was the very last bit,
+	 * it would trigger an assert in __bm_change_bits_to()
+	 */
+	if (el <= e)
+		__bm_change_bits_to(mdev, el, e, 1, KM_USER0);
+}
+
+/* returns bit state
+ * wants bitnr, NOT sector.
+ * inherently racy... area needs to be locked by means of {al,rs}_lru
+ *  1 ... bit set
+ *  0 ... bit not set
+ * -1 ... first out of bounds access, stop testing for bits!
+ */
+int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr;
+	int i;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+	if (bitnr < b->bm_bits) {
+		unsigned long offset = bitnr>>LN2_BPL;
+		p_addr = bm_map_paddr(b, offset);
+		i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
+		bm_unmap(p_addr);
+	} else if (bitnr == b->bm_bits) {
+		i = -1;
+	} else { /* (bitnr > b->bm_bits) */
+		dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
+		i = 0;
+	}
+
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return i;
+}
+
+/* returns number of bits set in the range [s, e] */
+int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr = NULL, page_nr = -1;
+	unsigned long bitnr;
+	int c = 0;
+	size_t w;
+
+	/* If this is called without a bitmap, that is a bug.  But just to be
+	 * robust in case we screwed up elsewhere, in that case pretend there
+	 * was one dirty bit in the requested area, so we won't try to do a
+	 * local read there (no bitmap probably implies no disk) */
+	ERR_IF(!b) return 1;
+	ERR_IF(!b->bm_pages) return 1;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+	for (bitnr = s; bitnr <= e; bitnr++) {
+		w = bitnr >> LN2_BPL;
+		if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
+			page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
+			if (p_addr)
+				bm_unmap(p_addr);
+			p_addr = bm_map_paddr(b, w);
+		}
+		ERR_IF (bitnr >= b->bm_bits) {
+			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
+		} else {
+			c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
+		}
+	}
+	if (p_addr)
+		bm_unmap(p_addr);
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return c;
+}
+
+
+/* inherently racy...
+ * return value may be already out-of-date when this function returns.
+ * but the general usage is that this is only use during a cstate when bits are
+ * only cleared, not set, and typically only care for the case when the return
+ * value is zero, or we already "locked" this "bitmap extent" by other means.
+ *
+ * enr is bm-extent number, since we chose to name one sector (512 bytes)
+ * worth of the bitmap a "bitmap extent".
+ *
+ * TODO
+ * I think since we use it like a reference count, we should use the real
+ * reference count of some bitmap extent element from some lru instead...
+ *
+ */
+int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	int count, s, e;
+	unsigned long flags;
+	unsigned long *p_addr, *bm;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+
+	s = S2W(enr);
+	e = min((size_t)S2W(enr+1), b->bm_words);
+	count = 0;
+	if (s < b->bm_words) {
+		int n = e-s;
+		p_addr = bm_map_paddr(b, s);
+		bm = p_addr + MLPP(s);
+		while (n--)
+			count += hweight_long(*bm++);
+		bm_unmap(p_addr);
+	} else {
+		dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
+	}
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return count;
+}
+
+/* set all bits covered by the AL-extent al_enr */
+unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	unsigned long weight;
+	int count, s, e, i, do_now;
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irq(&b->bm_lock);
+	if (bm_is_locked(b))
+		bm_print_lock_info(mdev);
+	weight = b->bm_set;
+
+	s = al_enr * BM_WORDS_PER_AL_EXT;
+	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
+	/* assert that s and e are on the same page */
+	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
+	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
+	count = 0;
+	if (s < b->bm_words) {
+		i = do_now = e-s;
+		p_addr = bm_map_paddr(b, s);
+		bm = p_addr + MLPP(s);
+		while (i--) {
+			count += hweight_long(*bm);
+			*bm = -1UL;
+			bm++;
+		}
+		bm_unmap(p_addr);
+		b->bm_set += do_now*BITS_PER_LONG - count;
+		if (e == b->bm_words)
+			b->bm_set -= bm_clear_surplus(b);
+	} else {
+		dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
+	}
+	weight = b->bm_set - weight;
+	spin_unlock_irq(&b->bm_lock);
+	return weight;
+}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
new file mode 100644
index 000000000000..8da602e010bb
--- /dev/null
+++ b/drivers/block/drbd/drbd_int.h
@@ -0,0 +1,2258 @@
+/*
+  drbd_int.h
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#ifndef _DRBD_INT_H
+#define _DRBD_INT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/crypto.h>
+#include <linux/tcp.h>
+#include <linux/mutex.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <net/tcp.h>
+#include <linux/lru_cache.h>
+
+#ifdef __CHECKER__
+# define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
+# define __protected_read_by(x)  __attribute__((require_context(x,1,999,"read")))
+# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
+# define __must_hold(x)       __attribute__((context(x,1,1), require_context(x,1,999,"call")))
+#else
+# define __protected_by(x)
+# define __protected_read_by(x)
+# define __protected_write_by(x)
+# define __must_hold(x)
+#endif
+
+#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
+
+/* module parameter, defined in drbd_main.c */
+extern unsigned int minor_count;
+extern int disable_sendpage;
+extern int allow_oos;
+extern unsigned int cn_idx;
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+extern int enable_faults;
+extern int fault_rate;
+extern int fault_devs;
+#endif
+
+extern char usermode_helper[];
+
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* I don't remember why XCPU ...
+ * This is used to wake the asender,
+ * and to interrupt sending the sending task
+ * on disconnect.
+ */
+#define DRBD_SIG SIGXCPU
+
+/* This is used to stop/restart our threads.
+ * Cannot use SIGTERM nor SIGKILL, since these
+ * are sent out by init on runlevel changes
+ * I choose SIGHUP for now.
+ */
+#define DRBD_SIGKILL SIGHUP
+
+/* All EEs on the free list should have ID_VACANT (== 0)
+ * freshly allocated EEs get !ID_VACANT (== 1)
+ * so if it says "cannot dereference null pointer at adress 0x00000001",
+ * it is most likely one of these :( */
+
+#define ID_IN_SYNC      (4711ULL)
+#define ID_OUT_OF_SYNC  (4712ULL)
+
+#define ID_SYNCER (-1ULL)
+#define ID_VACANT 0
+#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+
+struct drbd_conf;
+
+
+/* to shorten dev_warn(DEV, "msg"); and relatives statements */
+#define DEV (disk_to_dev(mdev->vdisk))
+
+#define D_ASSERT(exp)	if (!(exp)) \
+	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
+
+#define ERR_IF(exp) if (({				\
+	int _b = (exp) != 0;				\
+	if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n",	\
+		__func__, #exp, __FILE__, __LINE__);	\
+	 _b;						\
+	}))
+
+/* Defines to control fault insertion */
+enum {
+	DRBD_FAULT_MD_WR = 0,	/* meta data write */
+	DRBD_FAULT_MD_RD = 1,	/*           read  */
+	DRBD_FAULT_RS_WR = 2,	/* resync          */
+	DRBD_FAULT_RS_RD = 3,
+	DRBD_FAULT_DT_WR = 4,	/* data            */
+	DRBD_FAULT_DT_RD = 5,
+	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
+	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
+	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
+
+	DRBD_FAULT_MAX,
+};
+
+extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...);
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+extern unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
+static inline int
+drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
+	return fault_rate &&
+		(enable_faults & (1<<type)) &&
+		_drbd_insert_fault(mdev, type);
+}
+#define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t)))
+
+#else
+#define FAULT_ACTIVE(_m, _t) (0)
+#endif
+
+/* integer division, round _UP_ to the next integer */
+#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
+/* usual integer division */
+#define div_floor(A, B) ((A)/(B))
+
+/* drbd_meta-data.c (still in drbd_main.c) */
+/* 4th incarnation of the disk layout. */
+#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
+
+extern struct drbd_conf **minor_table;
+extern struct ratelimit_state drbd_ratelimit_state;
+
+/* on the wire */
+enum drbd_packets {
+	/* receiver (data socket) */
+	P_DATA		      = 0x00,
+	P_DATA_REPLY	      = 0x01, /* Response to P_DATA_REQUEST */
+	P_RS_DATA_REPLY	      = 0x02, /* Response to P_RS_DATA_REQUEST */
+	P_BARRIER	      = 0x03,
+	P_BITMAP	      = 0x04,
+	P_BECOME_SYNC_TARGET  = 0x05,
+	P_BECOME_SYNC_SOURCE  = 0x06,
+	P_UNPLUG_REMOTE	      = 0x07, /* Used at various times to hint the peer */
+	P_DATA_REQUEST	      = 0x08, /* Used to ask for a data block */
+	P_RS_DATA_REQUEST     = 0x09, /* Used to ask for a data block for resync */
+	P_SYNC_PARAM	      = 0x0a,
+	P_PROTOCOL	      = 0x0b,
+	P_UUIDS		      = 0x0c,
+	P_SIZES		      = 0x0d,
+	P_STATE		      = 0x0e,
+	P_SYNC_UUID	      = 0x0f,
+	P_AUTH_CHALLENGE      = 0x10,
+	P_AUTH_RESPONSE	      = 0x11,
+	P_STATE_CHG_REQ	      = 0x12,
+
+	/* asender (meta socket */
+	P_PING		      = 0x13,
+	P_PING_ACK	      = 0x14,
+	P_RECV_ACK	      = 0x15, /* Used in protocol B */
+	P_WRITE_ACK	      = 0x16, /* Used in protocol C */
+	P_RS_WRITE_ACK	      = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
+	P_DISCARD_ACK	      = 0x18, /* Used in proto C, two-primaries conflict detection */
+	P_NEG_ACK	      = 0x19, /* Sent if local disk is unusable */
+	P_NEG_DREPLY	      = 0x1a, /* Local disk is broken... */
+	P_NEG_RS_DREPLY	      = 0x1b, /* Local disk is broken... */
+	P_BARRIER_ACK	      = 0x1c,
+	P_STATE_CHG_REPLY     = 0x1d,
+
+	/* "new" commands, no longer fitting into the ordering scheme above */
+
+	P_OV_REQUEST	      = 0x1e, /* data socket */
+	P_OV_REPLY	      = 0x1f,
+	P_OV_RESULT	      = 0x20, /* meta socket */
+	P_CSUM_RS_REQUEST     = 0x21, /* data socket */
+	P_RS_IS_IN_SYNC	      = 0x22, /* meta socket */
+	P_SYNC_PARAM89	      = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
+	P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
+
+	P_MAX_CMD	      = 0x25,
+	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
+	P_MAX_OPT_CMD	      = 0x101,
+
+	/* special command ids for handshake */
+
+	P_HAND_SHAKE_M	      = 0xfff1, /* First Packet on the MetaSock */
+	P_HAND_SHAKE_S	      = 0xfff2, /* First Packet on the Socket */
+
+	P_HAND_SHAKE	      = 0xfffe	/* FIXED for the next century! */
+};
+
+static inline const char *cmdname(enum drbd_packets cmd)
+{
+	/* THINK may need to become several global tables
+	 * when we want to support more than
+	 * one PRO_VERSION */
+	static const char *cmdnames[] = {
+		[P_DATA]	        = "Data",
+		[P_DATA_REPLY]	        = "DataReply",
+		[P_RS_DATA_REPLY]	= "RSDataReply",
+		[P_BARRIER]	        = "Barrier",
+		[P_BITMAP]	        = "ReportBitMap",
+		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
+		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
+		[P_UNPLUG_REMOTE]	= "UnplugRemote",
+		[P_DATA_REQUEST]	= "DataRequest",
+		[P_RS_DATA_REQUEST]     = "RSDataRequest",
+		[P_SYNC_PARAM]	        = "SyncParam",
+		[P_SYNC_PARAM89]	= "SyncParam89",
+		[P_PROTOCOL]            = "ReportProtocol",
+		[P_UUIDS]	        = "ReportUUIDs",
+		[P_SIZES]	        = "ReportSizes",
+		[P_STATE]	        = "ReportState",
+		[P_SYNC_UUID]           = "ReportSyncUUID",
+		[P_AUTH_CHALLENGE]      = "AuthChallenge",
+		[P_AUTH_RESPONSE]	= "AuthResponse",
+		[P_PING]		= "Ping",
+		[P_PING_ACK]	        = "PingAck",
+		[P_RECV_ACK]	        = "RecvAck",
+		[P_WRITE_ACK]	        = "WriteAck",
+		[P_RS_WRITE_ACK]	= "RSWriteAck",
+		[P_DISCARD_ACK]	        = "DiscardAck",
+		[P_NEG_ACK]	        = "NegAck",
+		[P_NEG_DREPLY]	        = "NegDReply",
+		[P_NEG_RS_DREPLY]	= "NegRSDReply",
+		[P_BARRIER_ACK]	        = "BarrierAck",
+		[P_STATE_CHG_REQ]       = "StateChgRequest",
+		[P_STATE_CHG_REPLY]     = "StateChgReply",
+		[P_OV_REQUEST]          = "OVRequest",
+		[P_OV_REPLY]            = "OVReply",
+		[P_OV_RESULT]           = "OVResult",
+		[P_MAX_CMD]	        = NULL,
+	};
+
+	if (cmd == P_HAND_SHAKE_M)
+		return "HandShakeM";
+	if (cmd == P_HAND_SHAKE_S)
+		return "HandShakeS";
+	if (cmd == P_HAND_SHAKE)
+		return "HandShake";
+	if (cmd >= P_MAX_CMD)
+		return "Unknown";
+	return cmdnames[cmd];
+}
+
+/* for sending/receiving the bitmap,
+ * possibly in some encoding scheme */
+struct bm_xfer_ctx {
+	/* "const"
+	 * stores total bits and long words
+	 * of the bitmap, so we don't need to
+	 * call the accessor functions over and again. */
+	unsigned long bm_bits;
+	unsigned long bm_words;
+	/* during xfer, current position within the bitmap */
+	unsigned long bit_offset;
+	unsigned long word_offset;
+
+	/* statistics; index: (h->command == P_BITMAP) */
+	unsigned packets[2];
+	unsigned bytes[2];
+};
+
+extern void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+		const char *direction, struct bm_xfer_ctx *c);
+
+static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
+{
+	/* word_offset counts "native long words" (32 or 64 bit),
+	 * aligned at 64 bit.
+	 * Encoded packet may end at an unaligned bit offset.
+	 * In case a fallback clear text packet is transmitted in
+	 * between, we adjust this offset back to the last 64bit
+	 * aligned "native long word", which makes coding and decoding
+	 * the plain text bitmap much more convenient.  */
+#if BITS_PER_LONG == 64
+	c->word_offset = c->bit_offset >> 6;
+#elif BITS_PER_LONG == 32
+	c->word_offset = c->bit_offset >> 5;
+	c->word_offset &= ~(1UL);
+#else
+# error "unsupported BITS_PER_LONG"
+#endif
+}
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+/* This is the layout for a packet on the wire.
+ * The byteorder is the network byte order.
+ *     (except block_id and barrier fields.
+ *	these are pointers to local structs
+ *	and have no relevance for the partner,
+ *	which just echoes them as received.)
+ *
+ * NOTE that the payload starts at a long aligned offset,
+ * regardless of 32 or 64 bit arch!
+ */
+struct p_header {
+	u32	  magic;
+	u16	  command;
+	u16	  length;	/* bytes of data after this header */
+	u8	  payload[0];
+} __packed;
+/* 8 bytes. packet FIXED for the next century! */
+
+/*
+ * short commands, packets without payload, plain p_header:
+ *   P_PING
+ *   P_PING_ACK
+ *   P_BECOME_SYNC_TARGET
+ *   P_BECOME_SYNC_SOURCE
+ *   P_UNPLUG_REMOTE
+ */
+
+/*
+ * commands with out-of-struct payload:
+ *   P_BITMAP    (no additional fields)
+ *   P_DATA, P_DATA_REPLY (see p_data)
+ *   P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
+ */
+
+/* these defines must not be changed without changing the protocol version */
+#define DP_HARDBARRIER	      1
+#define DP_RW_SYNC	      2
+#define DP_MAY_SET_IN_SYNC    4
+
+struct p_data {
+	struct p_header head;
+	u64	    sector;    /* 64 bits sector number */
+	u64	    block_id;  /* to identify the request in protocol B&C */
+	u32	    seq_num;
+	u32	    dp_flags;
+} __packed;
+
+/*
+ * commands which share a struct:
+ *  p_block_ack:
+ *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
+ *   P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ *  p_block_req:
+ *   P_DATA_REQUEST, P_RS_DATA_REQUEST
+ */
+struct p_block_ack {
+	struct p_header head;
+	u64	    sector;
+	u64	    block_id;
+	u32	    blksize;
+	u32	    seq_num;
+} __packed;
+
+
+struct p_block_req {
+	struct p_header head;
+	u64 sector;
+	u64 block_id;
+	u32 blksize;
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
+/*
+ * commands with their own struct for additional fields:
+ *   P_HAND_SHAKE
+ *   P_BARRIER
+ *   P_BARRIER_ACK
+ *   P_SYNC_PARAM
+ *   ReportParams
+ */
+
+struct p_handshake {
+	struct p_header head;	/* 8 bytes */
+	u32 protocol_min;
+	u32 feature_flags;
+	u32 protocol_max;
+
+	/* should be more than enough for future enhancements
+	 * for now, feature_flags and the reserverd array shall be zero.
+	 */
+
+	u32 _pad;
+	u64 reserverd[7];
+} __packed;
+/* 80 bytes, FIXED for the next century */
+
+struct p_barrier {
+	struct p_header head;
+	u32 barrier;	/* barrier number _handle_ only */
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
+struct p_barrier_ack {
+	struct p_header head;
+	u32 barrier;
+	u32 set_size;
+} __packed;
+
+struct p_rs_param {
+	struct p_header head;
+	u32 rate;
+
+	      /* Since protocol version 88 and higher. */
+	char verify_alg[0];
+} __packed;
+
+struct p_rs_param_89 {
+	struct p_header head;
+	u32 rate;
+        /* protocol version 89: */
+	char verify_alg[SHARED_SECRET_MAX];
+	char csums_alg[SHARED_SECRET_MAX];
+} __packed;
+
+struct p_protocol {
+	struct p_header head;
+	u32 protocol;
+	u32 after_sb_0p;
+	u32 after_sb_1p;
+	u32 after_sb_2p;
+	u32 want_lose;
+	u32 two_primaries;
+
+              /* Since protocol version 87 and higher. */
+	char integrity_alg[0];
+
+} __packed;
+
+struct p_uuids {
+	struct p_header head;
+	u64 uuid[UI_EXTENDED_SIZE];
+} __packed;
+
+struct p_rs_uuid {
+	struct p_header head;
+	u64	    uuid;
+} __packed;
+
+struct p_sizes {
+	struct p_header head;
+	u64	    d_size;  /* size of disk */
+	u64	    u_size;  /* user requested size */
+	u64	    c_size;  /* current exported size */
+	u32	    max_segment_size;  /* Maximal size of a BIO */
+	u32	    queue_order_type;
+} __packed;
+
+struct p_state {
+	struct p_header head;
+	u32	    state;
+} __packed;
+
+struct p_req_state {
+	struct p_header head;
+	u32	    mask;
+	u32	    val;
+} __packed;
+
+struct p_req_state_reply {
+	struct p_header head;
+	u32	    retcode;
+} __packed;
+
+struct p_drbd06_param {
+	u64	  size;
+	u32	  state;
+	u32	  blksize;
+	u32	  protocol;
+	u32	  version;
+	u32	  gen_cnt[5];
+	u32	  bit_map_gen[5];
+} __packed;
+
+struct p_discard {
+	struct p_header head;
+	u64	    block_id;
+	u32	    seq_num;
+	u32	    pad;
+} __packed;
+
+/* Valid values for the encoding field.
+ * Bump proto version when changing this. */
+enum drbd_bitmap_code {
+	/* RLE_VLI_Bytes = 0,
+	 * and other bit variants had been defined during
+	 * algorithm evaluation. */
+	RLE_VLI_Bits = 2,
+};
+
+struct p_compressed_bm {
+	struct p_header head;
+	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
+	 * (encoding & 0x80): polarity (set/unset) of first runlength
+	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
+	 * used to pad up to head.length bytes
+	 */
+	u8 encoding;
+
+	u8 code[0];
+} __packed;
+
+/* DCBP: Drbd Compressed Bitmap Packet ... */
+static inline enum drbd_bitmap_code
+DCBP_get_code(struct p_compressed_bm *p)
+{
+	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
+}
+
+static inline void
+DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
+{
+	BUG_ON(code & ~0xf);
+	p->encoding = (p->encoding & ~0xf) | code;
+}
+
+static inline int
+DCBP_get_start(struct p_compressed_bm *p)
+{
+	return (p->encoding & 0x80) != 0;
+}
+
+static inline void
+DCBP_set_start(struct p_compressed_bm *p, int set)
+{
+	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
+}
+
+static inline int
+DCBP_get_pad_bits(struct p_compressed_bm *p)
+{
+	return (p->encoding >> 4) & 0x7;
+}
+
+static inline void
+DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
+{
+	BUG_ON(n & ~0x7);
+	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
+}
+
+/* one bitmap packet, including the p_header,
+ * should fit within one _architecture independend_ page.
+ * so we need to use the fixed size 4KiB page size
+ * most architechtures have used for a long time.
+ */
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
+#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
+#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
+#if (PAGE_SIZE < 4096)
+/* drbd_send_bitmap / receive_bitmap would break horribly */
+#error "PAGE_SIZE too small"
+#endif
+
+union p_polymorph {
+        struct p_header          header;
+        struct p_handshake       handshake;
+        struct p_data            data;
+        struct p_block_ack       block_ack;
+        struct p_barrier         barrier;
+        struct p_barrier_ack     barrier_ack;
+        struct p_rs_param_89     rs_param_89;
+        struct p_protocol        protocol;
+        struct p_sizes           sizes;
+        struct p_uuids           uuids;
+        struct p_state           state;
+        struct p_req_state       req_state;
+        struct p_req_state_reply req_state_reply;
+        struct p_block_req       block_req;
+} __packed;
+
+/**********************************************************************/
+enum drbd_thread_state {
+	None,
+	Running,
+	Exiting,
+	Restarting
+};
+
+struct drbd_thread {
+	spinlock_t t_lock;
+	struct task_struct *task;
+	struct completion stop;
+	enum drbd_thread_state t_state;
+	int (*function) (struct drbd_thread *);
+	struct drbd_conf *mdev;
+	int reset_cpu_mask;
+};
+
+static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
+{
+	/* THINK testing the t_state seems to be uncritical in all cases
+	 * (but thread_{start,stop}), so we can read it *without* the lock.
+	 *	--lge */
+
+	smp_rmb();
+	return thi->t_state;
+}
+
+
+/*
+ * Having this as the first member of a struct provides sort of "inheritance".
+ * "derived" structs can be "drbd_queue_work()"ed.
+ * The callback should know and cast back to the descendant struct.
+ * drbd_request and drbd_epoch_entry are descendants of drbd_work.
+ */
+struct drbd_work;
+typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
+struct drbd_work {
+	struct list_head list;
+	drbd_work_cb cb;
+};
+
+struct drbd_tl_epoch;
+struct drbd_request {
+	struct drbd_work w;
+	struct drbd_conf *mdev;
+
+	/* if local IO is not allowed, will be NULL.
+	 * if local IO _is_ allowed, holds the locally submitted bio clone,
+	 * or, after local IO completion, the ERR_PTR(error).
+	 * see drbd_endio_pri(). */
+	struct bio *private_bio;
+
+	struct hlist_node colision;
+	sector_t sector;
+	unsigned int size;
+	unsigned int epoch; /* barrier_nr */
+
+	/* barrier_nr: used to check on "completion" whether this req was in
+	 * the current epoch, and we therefore have to close it,
+	 * starting a new epoch...
+	 */
+
+	/* up to here, the struct layout is identical to drbd_epoch_entry;
+	 * we might be able to use that to our advantage...  */
+
+	struct list_head tl_requests; /* ring list in the transfer log */
+	struct bio *master_bio;       /* master bio pointer */
+	unsigned long rq_state; /* see comments above _req_mod() */
+	int seq_num;
+	unsigned long start_time;
+};
+
+struct drbd_tl_epoch {
+	struct drbd_work w;
+	struct list_head requests; /* requests before */
+	struct drbd_tl_epoch *next; /* pointer to the next barrier */
+	unsigned int br_number;  /* the barriers identifier. */
+	int n_req;	/* number of requests attached before this barrier */
+};
+
+struct drbd_request;
+
+/* These Tl_epoch_entries may be in one of 6 lists:
+   active_ee .. data packet being written
+   sync_ee   .. syncer block being written
+   done_ee   .. block written, need to send P_WRITE_ACK
+   read_ee   .. [RS]P_DATA_REQUEST being read
+*/
+
+struct drbd_epoch {
+	struct list_head list;
+	unsigned int barrier_nr;
+	atomic_t epoch_size; /* increased on every request added. */
+	atomic_t active;     /* increased on every req. added, and dec on every finished. */
+	unsigned long flags;
+};
+
+/* drbd_epoch flag bits */
+enum {
+	DE_BARRIER_IN_NEXT_EPOCH_ISSUED,
+	DE_BARRIER_IN_NEXT_EPOCH_DONE,
+	DE_CONTAINS_A_BARRIER,
+	DE_HAVE_BARRIER_NUMBER,
+	DE_IS_FINISHING,
+};
+
+enum epoch_event {
+	EV_PUT,
+	EV_GOT_BARRIER_NR,
+	EV_BARRIER_DONE,
+	EV_BECAME_LAST,
+	EV_TRACE_FLUSH,       /* TRACE_ are not real events, only used for tracing */
+	EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */
+	EV_TRACE_SETTING_BI,  /* Barrier is expressed with the first write of the next epoch */
+	EV_TRACE_ALLOC,
+	EV_TRACE_FREE,
+	EV_CLEANUP = 32, /* used as flag */
+};
+
+struct drbd_epoch_entry {
+	struct drbd_work    w;
+	struct drbd_conf *mdev;
+	struct bio *private_bio;
+	struct hlist_node colision;
+	sector_t sector;
+	unsigned int size;
+	struct drbd_epoch *epoch;
+
+	/* up to here, the struct layout is identical to drbd_request;
+	 * we might be able to use that to our advantage...  */
+
+	unsigned int flags;
+	u64    block_id;
+};
+
+struct drbd_wq_barrier {
+	struct drbd_work w;
+	struct completion done;
+};
+
+struct digest_info {
+	int digest_size;
+	void *digest;
+};
+
+/* ee flag bits */
+enum {
+	__EE_CALL_AL_COMPLETE_IO,
+	__EE_CONFLICT_PENDING,
+	__EE_MAY_SET_IN_SYNC,
+	__EE_IS_BARRIER,
+};
+#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
+#define EE_CONFLICT_PENDING    (1<<__EE_CONFLICT_PENDING)
+#define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
+#define EE_IS_BARRIER          (1<<__EE_IS_BARRIER)
+
+/* global flag bits */
+enum {
+	CREATE_BARRIER,		/* next P_DATA is preceeded by a P_BARRIER */
+	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
+	SEND_PING,		/* whether asender should send a ping asap */
+
+	STOP_SYNC_TIMER,	/* tell timer to cancel itself */
+	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
+	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
+	MD_DIRTY,		/* current uuids and flags not yet on disk */
+	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
+	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
+	CLUSTER_ST_CHANGE,	/* Cluster wide state change going on... */
+	CL_ST_CHG_SUCCESS,
+	CL_ST_CHG_FAIL,
+	CRASHED_PRIMARY,	/* This node was a crashed primary.
+				 * Gets cleared when the state.conn
+				 * goes into C_CONNECTED state. */
+	WRITE_BM_AFTER_RESYNC,	/* A kmalloc() during resync failed */
+	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
+	CONSIDER_RESYNC,
+
+	MD_NO_BARRIER,		/* meta data device does not support barriers,
+				   so don't even try */
+	SUSPEND_IO,		/* suspend application io */
+	BITMAP_IO,		/* suspend application io;
+				   once no more io in flight, start bitmap io */
+	BITMAP_IO_QUEUED,       /* Started bitmap IO */
+	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
+	NET_CONGESTED,		/* The data socket is congested */
+
+	CONFIG_PENDING,		/* serialization of (re)configuration requests.
+				 * if set, also prevents the device from dying */
+	DEVICE_DYING,		/* device became unconfigured,
+				 * but worker thread is still handling the cleanup.
+				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
+				 * while this is set. */
+	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
+				 * the peer, if it changed there as well. */
+};
+
+struct drbd_bitmap; /* opaque for drbd_conf */
+
+/* TODO sort members for performance
+ * MAYBE group them further */
+
+/* THINK maybe we actually want to use the default "event/%s" worker threads
+ * or similar in linux 2.6, which uses per cpu data and threads.
+ *
+ * To be general, this might need a spin_lock member.
+ * For now, please use the mdev->req_lock to protect list_head,
+ * see drbd_queue_work below.
+ */
+struct drbd_work_queue {
+	struct list_head q;
+	struct semaphore s; /* producers up it, worker down()s it */
+	spinlock_t q_lock;  /* to protect the list. */
+};
+
+struct drbd_socket {
+	struct drbd_work_queue work;
+	struct mutex mutex;
+	struct socket    *socket;
+	/* this way we get our
+	 * send/receive buffers off the stack */
+	union p_polymorph sbuf;
+	union p_polymorph rbuf;
+};
+
+struct drbd_md {
+	u64 md_offset;		/* sector offset to 'super' block */
+
+	u64 la_size_sect;	/* last agreed size, unit sectors */
+	u64 uuid[UI_SIZE];
+	u64 device_uuid;
+	u32 flags;
+	u32 md_size_sect;
+
+	s32 al_offset;	/* signed relative sector offset to al area */
+	s32 bm_offset;	/* signed relative sector offset to bitmap */
+
+	/* u32 al_nr_extents;	   important for restoring the AL
+	 * is stored into  sync_conf.al_extents, which in turn
+	 * gets applied to act_log->nr_elements
+	 */
+};
+
+/* for sync_conf and other types... */
+#define NL_PACKET(name, number, fields) struct name { fields };
+#define NL_INTEGER(pn,pr,member) int member;
+#define NL_INT64(pn,pr,member) __u64 member;
+#define NL_BIT(pn,pr,member)   unsigned member:1;
+#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
+#include "linux/drbd_nl.h"
+
+struct drbd_backing_dev {
+	struct block_device *backing_bdev;
+	struct block_device *md_bdev;
+	struct file *lo_file;
+	struct file *md_file;
+	struct drbd_md md;
+	struct disk_conf dc; /* The user provided config... */
+	sector_t known_size; /* last known size of that backing device */
+};
+
+struct drbd_md_io {
+	struct drbd_conf *mdev;
+	struct completion event;
+	int error;
+};
+
+struct bm_io_work {
+	struct drbd_work w;
+	char *why;
+	int (*io_fn)(struct drbd_conf *mdev);
+	void (*done)(struct drbd_conf *mdev, int rv);
+};
+
+enum write_ordering_e {
+	WO_none,
+	WO_drain_io,
+	WO_bdev_flush,
+	WO_bio_barrier
+};
+
+struct drbd_conf {
+	/* things that are stored as / read from meta data on disk */
+	unsigned long flags;
+
+	/* configured by drbdsetup */
+	struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
+	struct syncer_conf sync_conf;
+	struct drbd_backing_dev *ldev __protected_by(local);
+
+	sector_t p_size;     /* partner's disk size */
+	struct request_queue *rq_queue;
+	struct block_device *this_bdev;
+	struct gendisk	    *vdisk;
+
+	struct drbd_socket data; /* data/barrier/cstate/parameter packets */
+	struct drbd_socket meta; /* ping/ack (metadata) packets */
+	int agreed_pro_version;  /* actually used protocol version */
+	unsigned long last_received; /* in jiffies, either socket */
+	unsigned int ko_count;
+	struct drbd_work  resync_work,
+			  unplug_work,
+			  md_sync_work;
+	struct timer_list resync_timer;
+	struct timer_list md_sync_timer;
+
+	/* Used after attach while negotiating new disk state. */
+	union drbd_state new_state_tmp;
+
+	union drbd_state state;
+	wait_queue_head_t misc_wait;
+	wait_queue_head_t state_wait;  /* upon each state change. */
+	unsigned int send_cnt;
+	unsigned int recv_cnt;
+	unsigned int read_cnt;
+	unsigned int writ_cnt;
+	unsigned int al_writ_cnt;
+	unsigned int bm_writ_cnt;
+	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
+	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
+	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
+	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t local_cnt;	 /* Waiting for local completion */
+	atomic_t net_cnt;	 /* Users of net_conf */
+	spinlock_t req_lock;
+	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
+	struct drbd_tl_epoch *newest_tle;
+	struct drbd_tl_epoch *oldest_tle;
+	struct list_head out_of_sequence_requests;
+	struct hlist_head *tl_hash;
+	unsigned int tl_hash_s;
+
+	/* blocks to sync in this run [unit BM_BLOCK_SIZE] */
+	unsigned long rs_total;
+	/* number of sync IOs that failed in this run */
+	unsigned long rs_failed;
+	/* Syncer's start time [unit jiffies] */
+	unsigned long rs_start;
+	/* cumulated time in PausedSyncX state [unit jiffies] */
+	unsigned long rs_paused;
+	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
+	unsigned long rs_mark_left;
+	/* marks's time [unit jiffies] */
+	unsigned long rs_mark_time;
+	/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
+	unsigned long rs_same_csum;
+
+	/* where does the admin want us to start? (sector) */
+	sector_t ov_start_sector;
+	/* where are we now? (sector) */
+	sector_t ov_position;
+	/* Start sector of out of sync range (to merge printk reporting). */
+	sector_t ov_last_oos_start;
+	/* size of out-of-sync range in sectors. */
+	sector_t ov_last_oos_size;
+	unsigned long ov_left; /* in bits */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
+
+	struct drbd_thread receiver;
+	struct drbd_thread worker;
+	struct drbd_thread asender;
+	struct drbd_bitmap *bitmap;
+	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
+
+	/* Used to track operations of resync... */
+	struct lru_cache *resync;
+	/* Number of locked elements in resync LRU */
+	unsigned int resync_locked;
+	/* resync extent number waiting for application requests */
+	unsigned int resync_wenr;
+
+	int open_cnt;
+	u64 *p_uuid;
+	struct drbd_epoch *current_epoch;
+	spinlock_t epoch_lock;
+	unsigned int epochs;
+	enum write_ordering_e write_ordering;
+	struct list_head active_ee; /* IO in progress */
+	struct list_head sync_ee;   /* IO in progress */
+	struct list_head done_ee;   /* send ack */
+	struct list_head read_ee;   /* IO in progress */
+	struct list_head net_ee;    /* zero-copy network send in progress */
+	struct hlist_head *ee_hash; /* is proteced by req_lock! */
+	unsigned int ee_hash_s;
+
+	/* this one is protected by ee_lock, single thread */
+	struct drbd_epoch_entry *last_write_w_barrier;
+
+	int next_barrier_nr;
+	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
+	struct list_head resync_reads;
+	atomic_t pp_in_use;
+	wait_queue_head_t ee_wait;
+	struct page *md_io_page;	/* one page buffer for md_io */
+	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
+	struct mutex md_io_mutex;	/* protects the md_io_buffer */
+	spinlock_t al_lock;
+	wait_queue_head_t al_wait;
+	struct lru_cache *act_log;	/* activity log */
+	unsigned int al_tr_number;
+	int al_tr_cycle;
+	int al_tr_pos;   /* position of the next transaction in the journal */
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
+	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
+	void *int_dig_out;
+	void *int_dig_in;
+	void *int_dig_vv;
+	wait_queue_head_t seq_wait;
+	atomic_t packet_seq;
+	unsigned int peer_seq;
+	spinlock_t peer_seq_lock;
+	unsigned int minor;
+	unsigned long comm_bm_set; /* communicated number of set bits. */
+	cpumask_var_t cpu_mask;
+	struct bm_io_work bm_io_work;
+	u64 ed_uuid; /* UUID of the exposed data */
+	struct mutex state_mutex;
+	char congestion_reason;  /* Why we where congested... */
+};
+
+static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
+{
+	struct drbd_conf *mdev;
+
+	mdev = minor < minor_count ? minor_table[minor] : NULL;
+
+	return mdev;
+}
+
+static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
+{
+	return mdev->minor;
+}
+
+/* returns 1 if it was successfull,
+ * returns 0 if there was no data socket.
+ * so wherever you are going to use the data.socket, e.g. do
+ * if (!drbd_get_data_sock(mdev))
+ *	return 0;
+ *	CODE();
+ * drbd_put_data_sock(mdev);
+ */
+static inline int drbd_get_data_sock(struct drbd_conf *mdev)
+{
+	mutex_lock(&mdev->data.mutex);
+	/* drbd_disconnect() could have called drbd_free_sock()
+	 * while we were waiting in down()... */
+	if (unlikely(mdev->data.socket == NULL)) {
+		mutex_unlock(&mdev->data.mutex);
+		return 0;
+	}
+	return 1;
+}
+
+static inline void drbd_put_data_sock(struct drbd_conf *mdev)
+{
+	mutex_unlock(&mdev->data.mutex);
+}
+
+/*
+ * function declarations
+ *************************/
+
+/* drbd_main.c */
+
+enum chg_state_flags {
+	CS_HARD	= 1,
+	CS_VERBOSE = 2,
+	CS_WAIT_COMPLETE = 4,
+	CS_SERIALIZE    = 8,
+	CS_ORDERED      = CS_WAIT_COMPLETE + CS_SERIALIZE,
+};
+
+extern void drbd_init_set_defaults(struct drbd_conf *mdev);
+extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+			union drbd_state mask, union drbd_state val);
+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
+			union drbd_state);
+extern int _drbd_request_state(struct drbd_conf *, union drbd_state,
+			union drbd_state, enum chg_state_flags);
+extern int __drbd_set_state(struct drbd_conf *, union drbd_state,
+			    enum chg_state_flags, struct completion *done);
+extern void print_st_err(struct drbd_conf *, union drbd_state,
+			union drbd_state, int);
+extern int  drbd_thread_start(struct drbd_thread *thi);
+extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
+#ifdef CONFIG_SMP
+extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
+extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
+#else
+#define drbd_thread_current_set_cpu(A) ({})
+#define drbd_calc_cpu_mask(A) ({})
+#endif
+extern void drbd_free_resources(struct drbd_conf *mdev);
+extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+		       unsigned int set_size);
+extern void tl_clear(struct drbd_conf *mdev);
+extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
+extern void drbd_free_sock(struct drbd_conf *mdev);
+extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+			void *buf, size_t size, unsigned msg_flags);
+extern int drbd_send_protocol(struct drbd_conf *mdev);
+extern int drbd_send_uuids(struct drbd_conf *mdev);
+extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
+extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
+extern int _drbd_send_state(struct drbd_conf *mdev);
+extern int drbd_send_state(struct drbd_conf *mdev);
+extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+			enum drbd_packets cmd, struct p_header *h,
+			size_t size, unsigned msg_flags);
+#define USE_DATA_SOCKET 1
+#define USE_META_SOCKET 0
+extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+			enum drbd_packets cmd, struct p_header *h,
+			size_t size);
+extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
+			char *data, size_t size);
+extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
+extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
+			u32 set_size);
+extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct drbd_epoch_entry *e);
+extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct p_block_req *rp);
+extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct p_data *dp);
+extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+			    sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+			   struct drbd_epoch_entry *e);
+extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
+extern int _drbd_send_barrier(struct drbd_conf *mdev,
+			struct drbd_tl_epoch *barrier);
+extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+			      sector_t sector, int size, u64 block_id);
+extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
+				   sector_t sector,int size,
+				   void *digest, int digest_size,
+				   enum drbd_packets cmd);
+extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
+
+extern int drbd_send_bitmap(struct drbd_conf *mdev);
+extern int _drbd_send_bitmap(struct drbd_conf *mdev);
+extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
+extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
+
+/* drbd_meta-data.c (still in drbd_main.c) */
+extern void drbd_md_sync(struct drbd_conf *mdev);
+extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
+/* maybe define them below as inline? */
+extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local);
+extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
+extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
+extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+				 int (*io_fn)(struct drbd_conf *),
+				 void (*done)(struct drbd_conf *, int),
+				 char *why);
+extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
+extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
+extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
+
+
+/* Meta data layout
+   We reserve a 128MB Block (4k aligned)
+   * either at the end of the backing device
+   * or on a seperate meta data device. */
+
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+/* The following numbers are sectors */
+#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
+#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
+/* Allows up to about 3.8TB */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
+
+/* Since the smalles IO unit is usually 512 byte */
+#define MD_SECTOR_SHIFT	 9
+#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
+
+/* activity log */
+#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
+#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
+
+#if BITS_PER_LONG == 32
+#define LN2_BPL 5
+#define cpu_to_lel(A) cpu_to_le32(A)
+#define lel_to_cpu(A) le32_to_cpu(A)
+#elif BITS_PER_LONG == 64
+#define LN2_BPL 6
+#define cpu_to_lel(A) cpu_to_le64(A)
+#define lel_to_cpu(A) le64_to_cpu(A)
+#else
+#error "LN2 of BITS_PER_LONG unknown!"
+#endif
+
+/* resync bitmap */
+/* 16MB sized 'bitmap extent' to track syncer usage */
+struct bm_extent {
+	int rs_left; /* number of bits set (out of sync) in this extent. */
+	int rs_failed; /* number of failed resync requests in this extent. */
+	unsigned long flags;
+	struct lc_element lce;
+};
+
+#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
+#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
+
+/* drbd_bitmap.c */
+/*
+ * We need to store one bit for a block.
+ * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
+ * Bit 0 ==> local node thinks this block is binary identical on both nodes
+ * Bit 1 ==> local node thinks this block needs to be synced.
+ */
+
+#define BM_BLOCK_SHIFT  12			 /* 4k per bit */
+#define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
+/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
+ * per sector of on disk bitmap */
+#define BM_EXT_SHIFT	 (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3)  /* = 24 */
+#define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
+
+#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
+#error "HAVE YOU FIXED drbdmeta AS WELL??"
+#endif
+
+/* thus many _storage_ sectors are described by one bit */
+#define BM_SECT_TO_BIT(x)   ((x)>>(BM_BLOCK_SHIFT-9))
+#define BM_BIT_TO_SECT(x)   ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
+#define BM_SECT_PER_BIT     BM_BIT_TO_SECT(1)
+
+/* bit to represented kilo byte conversion */
+#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
+
+/* in which _bitmap_ extent (resp. sector) the bit for a certain
+ * _storage_ sector is located in */
+#define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
+
+/* how much _storage_ sectors we have per bitmap sector */
+#define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
+#define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
+
+/* in one sector of the bitmap, we have this many activity_log extents. */
+#define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
+#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+
+#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
+#define BM_BLOCKS_PER_BM_EXT_MASK  ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
+
+/* the extent in "PER_EXTENT" below is an activity log extent
+ * we need that many (long words/bytes) to store the bitmap
+ *		     of one AL_EXTENT_SIZE chunk of storage.
+ * we can store the bitmap for that many AL_EXTENTS within
+ * one sector of the _on_disk_ bitmap:
+ * bit	 0	  bit 37   bit 38	     bit (512*8)-1
+ *	     ...|........|........|.. // ..|........|
+ * sect. 0	 `296	  `304			   ^(512*8*8)-1
+ *
+#define BM_WORDS_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
+#define BM_BYTES_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 )  // 128
+#define BM_EXT_PER_SECT	    ( 512 / BM_BYTES_PER_EXTENT )	 //   4
+ */
+
+#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
+#define DRBD_MAX_SECTORS_BM \
+	  ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9)))
+#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM
+#elif !defined(CONFIG_LBD) && BITS_PER_LONG == 32
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
+#else
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
+/* 16 TB in units of sectors */
+#if BITS_PER_LONG == 32
+/* adjust by one page worth of bitmap,
+ * so we won't wrap around in drbd_bm_find_next_bit.
+ * you should use 64bit OS for that much storage, anyways. */
+#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
+#else
+#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32)
+#endif
+#endif
+
+/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
+ * With a value of 6 all IO in one 32K block make it to the same slot of the
+ * hash table. */
+#define HT_SHIFT 6
+#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
+
+/* Number of elements in the app_reads_hash */
+#define APP_R_HSIZE 15
+
+extern int  drbd_bm_init(struct drbd_conf *mdev);
+extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors);
+extern void drbd_bm_cleanup(struct drbd_conf *mdev);
+extern void drbd_bm_set_all(struct drbd_conf *mdev);
+extern void drbd_bm_clear_all(struct drbd_conf *mdev);
+extern int  drbd_bm_set_bits(
+		struct drbd_conf *mdev, unsigned long s, unsigned long e);
+extern int  drbd_bm_clear_bits(
+		struct drbd_conf *mdev, unsigned long s, unsigned long e);
+/* bm_set_bits variant for use while holding drbd_bm_lock */
+extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
+		const unsigned long s, const unsigned long e);
+extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
+extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
+extern int  drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local);
+extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
+extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
+		unsigned long al_enr);
+extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
+extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+/* bm_find_next variants for use while you hold drbd_bm_lock() */
+extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
+extern int drbd_bm_rs_done(struct drbd_conf *mdev);
+/* for receive_bitmap */
+extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
+		size_t number, unsigned long *buffer);
+/* for _drbd_send_bitmap and drbd_bm_write_sect */
+extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
+		size_t number, unsigned long *buffer);
+
+extern void drbd_bm_lock(struct drbd_conf *mdev, char *why);
+extern void drbd_bm_unlock(struct drbd_conf *mdev);
+
+extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
+/* drbd_main.c */
+
+extern struct kmem_cache *drbd_request_cache;
+extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
+extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
+extern mempool_t *drbd_request_mempool;
+extern mempool_t *drbd_ee_mempool;
+
+extern struct page *drbd_pp_pool; /* drbd's page pool */
+extern spinlock_t   drbd_pp_lock;
+extern int	    drbd_pp_vacant;
+extern wait_queue_head_t drbd_pp_wait;
+
+extern rwlock_t global_state_lock;
+
+extern struct drbd_conf *drbd_new_device(unsigned int minor);
+extern void drbd_free_mdev(struct drbd_conf *mdev);
+
+extern int proc_details;
+
+/* drbd_req */
+extern int drbd_make_request_26(struct request_queue *q, struct bio *bio);
+extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
+extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
+extern int is_valid_ar_handle(struct drbd_request *, sector_t);
+
+
+/* drbd_nl.c */
+extern void drbd_suspend_io(struct drbd_conf *mdev);
+extern void drbd_resume_io(struct drbd_conf *mdev);
+extern char *ppsize(char *buf, unsigned long long size);
+extern sector_t drbd_new_dev_size(struct drbd_conf *,
+		struct drbd_backing_dev *);
+enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern void resync_after_online_grow(struct drbd_conf *);
+extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
+extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
+		int force);
+enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
+
+/* drbd_worker.c */
+extern int drbd_worker(struct drbd_thread *thi);
+extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
+extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
+extern void resume_next_sg(struct drbd_conf *mdev);
+extern void suspend_other_sg(struct drbd_conf *mdev);
+extern int drbd_resync_finished(struct drbd_conf *mdev);
+/* maybe rather drbd_main.c ? */
+extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
+		struct drbd_backing_dev *bdev, sector_t sector, int rw);
+extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+
+static inline void ov_oos_print(struct drbd_conf *mdev)
+{
+	if (mdev->ov_last_oos_size) {
+		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
+		     (unsigned long long)mdev->ov_last_oos_start,
+		     (unsigned long)mdev->ov_last_oos_size);
+	}
+	mdev->ov_last_oos_size=0;
+}
+
+
+extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+/* worker callbacks */
+extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
+extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
+extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
+extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+
+extern void resync_timer_fn(unsigned long data);
+
+/* drbd_receiver.c */
+extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
+extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+					    u64 id,
+					    sector_t sector,
+					    unsigned int data_size,
+					    gfp_t gfp_mask) __must_hold(local);
+extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
+extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+		struct list_head *head);
+extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+		struct list_head *head);
+extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
+extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
+extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+
+/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
+ * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
+static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
+			char __user *optval, int optlen)
+{
+	int err;
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	return err;
+}
+
+static inline void drbd_tcp_cork(struct socket *sock)
+{
+	int __user val = 1;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_uncork(struct socket *sock)
+{
+	int __user val = 0;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_nodelay(struct socket *sock)
+{
+	int __user val = 1;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_quickack(struct socket *sock)
+{
+	int __user val = 1;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
+			(char __user *)&val, sizeof(val));
+}
+
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
+
+/* drbd_proc.c */
+extern struct proc_dir_entry *drbd_proc;
+extern struct file_operations drbd_proc_fops;
+extern const char *drbd_conn_str(enum drbd_conns s);
+extern const char *drbd_role_str(enum drbd_role s);
+
+/* drbd_actlog.c */
+extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
+extern int drbd_rs_del_all(struct drbd_conf *mdev);
+extern void drbd_rs_failed_io(struct drbd_conf *mdev,
+		sector_t sector, int size);
+extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
+extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
+		int size, const char *file, const unsigned int line);
+#define drbd_set_in_sync(mdev, sector, size) \
+	__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
+extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+		int size, const char *file, const unsigned int line);
+#define drbd_set_out_of_sync(mdev, sector, size) \
+	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
+extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
+extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
+extern void drbd_al_shrink(struct drbd_conf *mdev);
+
+
+/* drbd_nl.c */
+
+void drbd_nl_cleanup(void);
+int __init drbd_nl_init(void);
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
+void drbd_bcast_sync_progress(struct drbd_conf *mdev);
+void drbd_bcast_ee(struct drbd_conf *mdev,
+		const char *reason, const int dgs,
+		const char* seen_hash, const char* calc_hash,
+		const struct drbd_epoch_entry* e);
+
+
+/**
+ * DOC: DRBD State macros
+ *
+ * These macros are used to express state changes in easily readable form.
+ *
+ * The NS macros expand to a mask and a value, that can be bit ored onto the
+ * current state as soon as the spinlock (req_lock) was taken.
+ *
+ * The _NS macros are used for state functions that get called with the
+ * spinlock. These macros expand directly to the new state value.
+ *
+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
+ * to express state changes that affect more than one aspect of the state.
+ *
+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
+ * Means that the network connection was established and that the peer
+ * is in secondary role.
+ */
+#define role_MASK R_MASK
+#define peer_MASK R_MASK
+#define disk_MASK D_MASK
+#define pdsk_MASK D_MASK
+#define conn_MASK C_MASK
+#define susp_MASK 1
+#define user_isp_MASK 1
+#define aftr_isp_MASK 1
+
+#define NS(T, S) \
+	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T = (S); val; })
+#define NS2(T1, S1, T2, S2) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val; })
+#define NS3(T1, S1, T2, S2, T3, S3) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
+	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val.T3 = (S3); val; })
+
+#define _NS(D, T, S) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
+#define _NS2(D, T1, S1, T2, S2) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns; })
+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+
+/*
+ * inline helper functions
+ *************************/
+
+static inline void drbd_state_lock(struct drbd_conf *mdev)
+{
+	wait_event(mdev->misc_wait,
+		   !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+}
+
+static inline void drbd_state_unlock(struct drbd_conf *mdev)
+{
+	clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
+	wake_up(&mdev->misc_wait);
+}
+
+static inline int _drbd_set_state(struct drbd_conf *mdev,
+				   union drbd_state ns, enum chg_state_flags flags,
+				   struct completion *done)
+{
+	int rv;
+
+	read_lock(&global_state_lock);
+	rv = __drbd_set_state(mdev, ns, flags, done);
+	read_unlock(&global_state_lock);
+
+	return rv;
+}
+
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
+static inline int drbd_request_state(struct drbd_conf *mdev,
+				     union drbd_state mask,
+				     union drbd_state val)
+{
+	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+}
+
+#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+{
+	switch (mdev->ldev->dc.on_io_error) {
+	case EP_PASS_ON:
+		if (!forcedetach) {
+			if (printk_ratelimit())
+				dev_err(DEV, "Local IO failed in %s."
+					     "Passing error on...\n", where);
+			break;
+		}
+		/* NOTE fall through to detach case if forcedetach set */
+	case EP_DETACH:
+	case EP_CALL_HELPER:
+		if (mdev->state.disk > D_FAILED) {
+			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
+			dev_err(DEV, "Local IO failed in %s."
+				     "Detaching...\n", where);
+		}
+		break;
+	}
+}
+
+/**
+ * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
+ * @mdev:	 DRBD device.
+ * @error:	 Error code passed to the IO completion callback
+ * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
+ *
+ * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
+ */
+#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
+static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
+	int error, int forcedetach, const char *where)
+{
+	if (error) {
+		unsigned long flags;
+		spin_lock_irqsave(&mdev->req_lock, flags);
+		__drbd_chk_io_error_(mdev, forcedetach, where);
+		spin_unlock_irqrestore(&mdev->req_lock, flags);
+	}
+}
+
+
+/**
+ * drbd_md_first_sector() - Returns the first sector number of the meta data area
+ * @bdev:	Meta data block device.
+ *
+ * BTW, for internal meta data, this happens to be the maximum capacity
+ * we could agree upon with our peer node.
+ */
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		return bdev->md.md_offset + bdev->md.bm_offset;
+	case DRBD_MD_INDEX_FLEX_EXT:
+	default:
+		return bdev->md.md_offset;
+	}
+}
+
+/**
+ * drbd_md_last_sector() - Return the last sector number of the meta data area
+ * @bdev:	Meta data block device.
+ */
+static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		return bdev->md.md_offset + MD_AL_OFFSET - 1;
+	case DRBD_MD_INDEX_FLEX_EXT:
+	default:
+		return bdev->md.md_offset + bdev->md.md_size_sect;
+	}
+}
+
+/* Returns the number of 512 byte sectors of the device */
+static inline sector_t drbd_get_capacity(struct block_device *bdev)
+{
+	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
+	return bdev ? bdev->bd_inode->i_size >> 9 : 0;
+}
+
+/**
+ * drbd_get_max_capacity() - Returns the capacity we announce to out peer
+ * @bdev:	Meta data block device.
+ *
+ * returns the capacity we announce to out peer.  we clip ourselves at the
+ * various MAX_SECTORS, because if we don't, current implementation will
+ * oops sooner or later
+ */
+static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
+{
+	sector_t s;
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		s = drbd_get_capacity(bdev->backing_bdev)
+			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+					drbd_md_first_sector(bdev))
+			: 0;
+		break;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+				drbd_get_capacity(bdev->backing_bdev));
+		/* clip at maximum size the meta device can support */
+		s = min_t(sector_t, s,
+			BM_EXT_TO_SECT(bdev->md.md_size_sect
+				     - bdev->md.bm_offset));
+		break;
+	default:
+		s = min_t(sector_t, DRBD_MAX_SECTORS,
+				drbd_get_capacity(bdev->backing_bdev));
+	}
+	return s;
+}
+
+/**
+ * drbd_md_ss__() - Return the sector number of our meta data super block
+ * @mdev:	DRBD device.
+ * @bdev:	Meta data block device.
+ */
+static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
+				    struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	default: /* external, some index */
+		return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
+	case DRBD_MD_INDEX_INTERNAL:
+		/* with drbd08, internal meta data is always "flexible" */
+	case DRBD_MD_INDEX_FLEX_INT:
+		/* sizeof(struct md_on_disk_07) == 4k
+		 * position: last 4k aligned block of 4k size */
+		if (!bdev->backing_bdev) {
+			if (__ratelimit(&drbd_ratelimit_state)) {
+				dev_err(DEV, "bdev->backing_bdev==NULL\n");
+				dump_stack();
+			}
+			return 0;
+		}
+		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL)
+			- MD_AL_OFFSET;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		return 0;
+	}
+}
+
+static inline void
+_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+{
+	list_add_tail(&w->list, &q->q);
+	up(&q->s);
+}
+
+static inline void
+drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&q->q_lock, flags);
+	list_add(&w->list, &q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
+	spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&q->q_lock, flags);
+	list_add_tail(&w->list, &q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
+	spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void wake_asender(struct drbd_conf *mdev)
+{
+	if (test_bit(SIGNAL_ASENDER, &mdev->flags))
+		force_sig(DRBD_SIG, mdev->asender.task);
+}
+
+static inline void request_ping(struct drbd_conf *mdev)
+{
+	set_bit(SEND_PING, &mdev->flags);
+	wake_asender(mdev);
+}
+
+static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
+	enum drbd_packets cmd)
+{
+	struct p_header h;
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping(struct drbd_conf *mdev)
+{
+	struct p_header h;
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
+{
+	struct p_header h;
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
+}
+
+static inline void drbd_thread_stop(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, FALSE, TRUE);
+}
+
+static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, FALSE, FALSE);
+}
+
+static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, TRUE, FALSE);
+}
+
+/* counts how many answer packets packets we expect from our peer,
+ * for either explicit application requests,
+ * or implicit barrier packets as necessary.
+ * increased:
+ *  w_send_barrier
+ *  _req_mod(req, queue_for_net_write or queue_for_net_read);
+ *    it is much easier and equally valid to count what we queue for the
+ *    worker, even before it actually was queued or send.
+ *    (drbd_make_request_common; recovery path on read io-error)
+ * decreased:
+ *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
+ *  _req_mod(req, data_received)
+ *     [from receive_DataReply]
+ *  _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
+ *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
+ *     for some reason it is NOT decreased in got_NegAck,
+ *     but in the resulting cleanup code from report_params.
+ *     we should try to remember the reason for that...
+ *  _req_mod(req, send_failed or send_canceled)
+ *  _req_mod(req, connection_lost_while_pending)
+ *     [from tl_clear_barrier]
+ */
+static inline void inc_ap_pending(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->ap_pending_cnt);
+}
+
+#define ERR_IF_CNT_IS_NEGATIVE(which)				\
+	if (atomic_read(&mdev->which) < 0)			\
+		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
+		    __func__ , __LINE__ ,			\
+		    atomic_read(&mdev->which))
+
+#define dec_ap_pending(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	if (atomic_dec_and_test(&mdev->ap_pending_cnt))		\
+		wake_up(&mdev->misc_wait);			\
+	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
+
+/* counts how many resync-related answers we still expect from the peer
+ *		     increase			decrease
+ * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
+ * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK whith ID_SYNCER)
+ *					   (or P_NEG_ACK with ID_SYNCER)
+ */
+static inline void inc_rs_pending(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->rs_pending_cnt);
+}
+
+#define dec_rs_pending(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_dec(&mdev->rs_pending_cnt);			\
+	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
+
+/* counts how many answers we still need to send to the peer.
+ * increased on
+ *  receive_Data	unless protocol A;
+ *			we need to send a P_RECV_ACK (proto B)
+ *			or P_WRITE_ACK (proto C)
+ *  receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
+ *  receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
+ *  receive_Barrier_*	we need to send a P_BARRIER_ACK
+ */
+static inline void inc_unacked(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->unacked_cnt);
+}
+
+#define dec_unacked(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_dec(&mdev->unacked_cnt);				\
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+#define sub_unacked(mdev, n)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_sub(n, &mdev->unacked_cnt);			\
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+
+static inline void put_net_conf(struct drbd_conf *mdev)
+{
+	if (atomic_dec_and_test(&mdev->net_cnt))
+		wake_up(&mdev->misc_wait);
+}
+
+/**
+ * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
+ * @mdev:	DRBD device.
+ *
+ * You have to call put_net_conf() when finished working with mdev->net_conf.
+ */
+static inline int get_net_conf(struct drbd_conf *mdev)
+{
+	int have_net_conf;
+
+	atomic_inc(&mdev->net_cnt);
+	have_net_conf = mdev->state.conn >= C_UNCONNECTED;
+	if (!have_net_conf)
+		put_net_conf(mdev);
+	return have_net_conf;
+}
+
+/**
+ * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev
+ * @M:		DRBD device.
+ *
+ * You have to call put_ldev() when finished working with mdev->ldev.
+ */
+#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
+#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
+
+static inline void put_ldev(struct drbd_conf *mdev)
+{
+	__release(local);
+	if (atomic_dec_and_test(&mdev->local_cnt))
+		wake_up(&mdev->misc_wait);
+	D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
+}
+
+#ifndef __CHECKER__
+static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+	int io_allowed;
+
+	atomic_inc(&mdev->local_cnt);
+	io_allowed = (mdev->state.disk >= mins);
+	if (!io_allowed)
+		put_ldev(mdev);
+	return io_allowed;
+}
+#else
+extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins);
+#endif
+
+/* you must have an "get_ldev" reference */
+static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
+		unsigned long *bits_left, unsigned int *per_mil_done)
+{
+	/*
+	 * this is to break it at compile time when we change that
+	 * (we may feel 4TB maximum storage per drbd is not enough)
+	 */
+	typecheck(unsigned long, mdev->rs_total);
+
+	/* note: both rs_total and rs_left are in bits, i.e. in
+	 * units of BM_BLOCK_SIZE.
+	 * for the percentage, we don't care. */
+
+	*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+	/* >> 10 to prevent overflow,
+	 * +1 to prevent division by zero */
+	if (*bits_left > mdev->rs_total) {
+		/* doh. maybe a logic bug somewhere.
+		 * may also be just a race condition
+		 * between this and a disconnect during sync.
+		 * for now, just prevent in-kernel buffer overflow.
+		 */
+		smp_rmb();
+		dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
+				drbd_conn_str(mdev->state.conn),
+				*bits_left, mdev->rs_total, mdev->rs_failed);
+		*per_mil_done = 0;
+	} else {
+		/* make sure the calculation happens in long context */
+		unsigned long tmp = 1000UL -
+				(*bits_left >> 10)*1000UL
+				/ ((mdev->rs_total >> 10) + 1UL);
+		*per_mil_done = tmp;
+	}
+}
+
+
+/* this throttles on-the-fly application requests
+ * according to max_buffers settings;
+ * maybe re-implement using semaphores? */
+static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
+{
+	int mxb = 1000000; /* arbitrary limit on open requests */
+	if (get_net_conf(mdev)) {
+		mxb = mdev->net_conf->max_buffers;
+		put_net_conf(mdev);
+	}
+	return mxb;
+}
+
+static inline int drbd_state_is_stable(union drbd_state s)
+{
+
+	/* DO NOT add a default clause, we want the compiler to warn us
+	 * for any newly introduced state we may have forgotten to add here */
+
+	switch ((enum drbd_conns)s.conn) {
+	/* new io only accepted when there is no connection, ... */
+	case C_STANDALONE:
+	case C_WF_CONNECTION:
+	/* ... or there is a well established connection. */
+	case C_CONNECTED:
+	case C_SYNC_SOURCE:
+	case C_SYNC_TARGET:
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+	case C_PAUSED_SYNC_S:
+	case C_PAUSED_SYNC_T:
+		/* maybe stable, look at the disk state */
+		break;
+
+	/* no new io accepted during tansitional states
+	 * like handshake or teardown */
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_REPORT_PARAMS:
+	case C_STARTING_SYNC_S:
+	case C_STARTING_SYNC_T:
+	case C_WF_BITMAP_S:
+	case C_WF_BITMAP_T:
+	case C_WF_SYNC_UUID:
+	case C_MASK:
+		/* not "stable" */
+		return 0;
+	}
+
+	switch ((enum drbd_disk_state)s.disk) {
+	case D_DISKLESS:
+	case D_INCONSISTENT:
+	case D_OUTDATED:
+	case D_CONSISTENT:
+	case D_UP_TO_DATE:
+		/* disk state is stable as well. */
+		break;
+
+	/* no new io accepted during tansitional states */
+	case D_ATTACHING:
+	case D_FAILED:
+	case D_NEGOTIATING:
+	case D_UNKNOWN:
+	case D_MASK:
+		/* not "stable" */
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
+{
+	int mxb = drbd_get_max_buffers(mdev);
+
+	if (mdev->state.susp)
+		return 0;
+	if (test_bit(SUSPEND_IO, &mdev->flags))
+		return 0;
+
+	/* to avoid potential deadlock or bitmap corruption,
+	 * in various places, we only allow new application io
+	 * to start during "stable" states. */
+
+	/* no new io accepted when attaching or detaching the disk */
+	if (!drbd_state_is_stable(mdev->state))
+		return 0;
+
+	/* since some older kernels don't have atomic_add_unless,
+	 * and we are within the spinlock anyways, we have this workaround.  */
+	if (atomic_read(&mdev->ap_bio_cnt) > mxb)
+		return 0;
+	if (test_bit(BITMAP_IO, &mdev->flags))
+		return 0;
+	return 1;
+}
+
+/* I'd like to use wait_event_lock_irq,
+ * but I'm not sure when it got introduced,
+ * and not sure when it has 3 or 4 arguments */
+static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
+{
+	/* compare with after_state_ch,
+	 * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
+	DEFINE_WAIT(wait);
+
+	/* we wait here
+	 *    as long as the device is suspended
+	 *    until the bitmap is no longer on the fly during connection
+	 *    handshake as long as we would exeed the max_buffer limit.
+	 *
+	 * to avoid races with the reconnect code,
+	 * we need to atomic_inc within the spinlock. */
+
+	spin_lock_irq(&mdev->req_lock);
+	while (!__inc_ap_bio_cond(mdev)) {
+		prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock_irq(&mdev->req_lock);
+		schedule();
+		finish_wait(&mdev->misc_wait, &wait);
+		spin_lock_irq(&mdev->req_lock);
+	}
+	atomic_add(one_or_two, &mdev->ap_bio_cnt);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+static inline void dec_ap_bio(struct drbd_conf *mdev)
+{
+	int mxb = drbd_get_max_buffers(mdev);
+	int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
+
+	D_ASSERT(ap_bio >= 0);
+	/* this currently does wake_up for every dec_ap_bio!
+	 * maybe rather introduce some type of hysteresis?
+	 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
+	if (ap_bio < mxb)
+		wake_up(&mdev->misc_wait);
+	if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+	}
+}
+
+static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
+{
+	mdev->ed_uuid = val;
+}
+
+static inline int seq_cmp(u32 a, u32 b)
+{
+	/* we assume wrap around at 32bit.
+	 * for wrap around at 24bit (old atomic_t),
+	 * we'd have to
+	 *  a <<= 8; b <<= 8;
+	 */
+	return (s32)(a) - (s32)(b);
+}
+#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
+#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
+#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
+#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
+/* CAUTION: please no side effects in arguments! */
+#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
+
+static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
+{
+	unsigned int m;
+	spin_lock(&mdev->peer_seq_lock);
+	m = seq_max(mdev->peer_seq, new_seq);
+	mdev->peer_seq = m;
+	spin_unlock(&mdev->peer_seq_lock);
+	if (m == new_seq)
+		wake_up(&mdev->seq_wait);
+}
+
+static inline void drbd_update_congested(struct drbd_conf *mdev)
+{
+	struct sock *sk = mdev->data.socket->sk;
+	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
+		set_bit(NET_CONGESTED, &mdev->flags);
+}
+
+static inline int drbd_queue_order_type(struct drbd_conf *mdev)
+{
+	/* sorry, we currently have no working implementation
+	 * of distributed TCQ stuff */
+#ifndef QUEUE_ORDERED_NONE
+#define QUEUE_ORDERED_NONE 0
+#endif
+	return QUEUE_ORDERED_NONE;
+}
+
+static inline void drbd_blk_run_queue(struct request_queue *q)
+{
+	if (q && q->unplug_fn)
+		q->unplug_fn(q);
+}
+
+static inline void drbd_kick_lo(struct drbd_conf *mdev)
+{
+	if (get_ldev(mdev)) {
+		drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
+		put_ldev(mdev);
+	}
+}
+
+static inline void drbd_md_flush(struct drbd_conf *mdev)
+{
+	int r;
+
+	if (test_bit(MD_NO_BARRIER, &mdev->flags))
+		return;
+
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+	if (r) {
+		set_bit(MD_NO_BARRIER, &mdev->flags);
+		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
+	}
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
new file mode 100644
index 000000000000..edf0b8031e69
--- /dev/null
+++ b/drivers/block/drbd/drbd_main.c
@@ -0,0 +1,3735 @@
+/*
+   drbd.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
+   from Logicworks, Inc. for making SDP replication support possible.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <asm/uaccess.h>
+#include <asm/types.h>
+#include <net/sock.h>
+#include <linux/ctype.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+
+#include <linux/drbd_limits.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
+
+#include "drbd_vli.h"
+
+struct after_state_chg_work {
+	struct drbd_work w;
+	union drbd_state os;
+	union drbd_state ns;
+	enum chg_state_flags flags;
+	struct completion *done;
+};
+
+int drbdd_init(struct drbd_thread *);
+int drbd_worker(struct drbd_thread *);
+int drbd_asender(struct drbd_thread *);
+
+int drbd_init(void);
+static int drbd_open(struct block_device *bdev, fmode_t mode);
+static int drbd_release(struct gendisk *gd, fmode_t mode);
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags);
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void md_sync_timer_fn(unsigned long data);
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+
+DEFINE_TRACE(drbd_unplug);
+DEFINE_TRACE(drbd_uuid);
+DEFINE_TRACE(drbd_ee);
+DEFINE_TRACE(drbd_packet);
+DEFINE_TRACE(drbd_md_io);
+DEFINE_TRACE(drbd_epoch);
+DEFINE_TRACE(drbd_netlink);
+DEFINE_TRACE(drbd_actlog);
+DEFINE_TRACE(drbd_bio);
+DEFINE_TRACE(_drbd_resync);
+DEFINE_TRACE(drbd_req);
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+	      "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
+MODULE_VERSION(REL_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
+
+#include <linux/moduleparam.h>
+/* allow_open_on_secondary */
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+/* thanks to these macros, if compiled into the kernel (not-module),
+ * this becomes the boot parameter drbd.minor_count */
+module_param(minor_count, uint, 0444);
+module_param(disable_sendpage, bool, 0644);
+module_param(allow_oos, bool, 0);
+module_param(cn_idx, uint, 0444);
+module_param(proc_details, int, 0644);
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+int enable_faults;
+int fault_rate;
+static int fault_count;
+int fault_devs;
+/* bitmap of enabled faults */
+module_param(enable_faults, int, 0664);
+/* fault rate % value - applies to all enabled faults */
+module_param(fault_rate, int, 0664);
+/* count of faults inserted */
+module_param(fault_count, int, 0664);
+/* bitmap of devices to insert faults on */
+module_param(fault_devs, int, 0644);
+#endif
+
+/* module parameter, defined */
+unsigned int minor_count = 32;
+int disable_sendpage;
+int allow_oos;
+unsigned int cn_idx = CN_IDX_DRBD;
+int proc_details;       /* Detail level in proc drbd*/
+
+/* Module parameter for setting the user mode helper program
+ * to run. Default is /sbin/drbdadm */
+char usermode_helper[80] = "/sbin/drbdadm";
+
+module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+
+/* in 2.6.x, our device mapping and config info contains our virtual gendisks
+ * as member "struct gendisk *vdisk;"
+ */
+struct drbd_conf **minor_table;
+
+struct kmem_cache *drbd_request_cache;
+struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
+struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
+mempool_t *drbd_request_mempool;
+mempool_t *drbd_ee_mempool;
+
+/* I do not use a standard mempool, because:
+   1) I want to hand out the pre-allocated objects first.
+   2) I want to be able to interrupt sleeping allocation with a signal.
+   Note: This is a single linked list, the next pointer is the private
+	 member of struct page.
+ */
+struct page *drbd_pp_pool;
+spinlock_t   drbd_pp_lock;
+int          drbd_pp_vacant;
+wait_queue_head_t drbd_pp_wait;
+
+DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
+
+static struct block_device_operations drbd_ops = {
+	.owner =   THIS_MODULE,
+	.open =    drbd_open,
+	.release = drbd_release,
+};
+
+#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+
+#ifdef __CHECKER__
+/* When checking with sparse, and this is an inline function, sparse will
+   give tons of false positives. When this is a real functions sparse works.
+ */
+int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+	int io_allowed;
+
+	atomic_inc(&mdev->local_cnt);
+	io_allowed = (mdev->state.disk >= mins);
+	if (!io_allowed) {
+		if (atomic_dec_and_test(&mdev->local_cnt))
+			wake_up(&mdev->misc_wait);
+	}
+	return io_allowed;
+}
+
+#endif
+
+/**
+ * DOC: The transfer log
+ *
+ * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
+ * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
+ * of the list. There is always at least one &struct drbd_tl_epoch object.
+ *
+ * Each &struct drbd_tl_epoch has a circular double linked list of requests
+ * attached.
+ */
+static int tl_init(struct drbd_conf *mdev)
+{
+	struct drbd_tl_epoch *b;
+
+	/* during device minor initialization, we may well use GFP_KERNEL */
+	b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
+	if (!b)
+		return 0;
+	INIT_LIST_HEAD(&b->requests);
+	INIT_LIST_HEAD(&b->w.list);
+	b->next = NULL;
+	b->br_number = 4711;
+	b->n_req = 0;
+	b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+
+	mdev->oldest_tle = b;
+	mdev->newest_tle = b;
+	INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
+
+	mdev->tl_hash = NULL;
+	mdev->tl_hash_s = 0;
+
+	return 1;
+}
+
+static void tl_cleanup(struct drbd_conf *mdev)
+{
+	D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
+	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+	kfree(mdev->oldest_tle);
+	mdev->oldest_tle = NULL;
+	kfree(mdev->unused_spare_tle);
+	mdev->unused_spare_tle = NULL;
+	kfree(mdev->tl_hash);
+	mdev->tl_hash = NULL;
+	mdev->tl_hash_s = 0;
+}
+
+/**
+ * _tl_add_barrier() - Adds a barrier to the transfer log
+ * @mdev:	DRBD device.
+ * @new:	Barrier to be added before the current head of the TL.
+ *
+ * The caller must hold the req_lock.
+ */
+void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
+{
+	struct drbd_tl_epoch *newest_before;
+
+	INIT_LIST_HEAD(&new->requests);
+	INIT_LIST_HEAD(&new->w.list);
+	new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+	new->next = NULL;
+	new->n_req = 0;
+
+	newest_before = mdev->newest_tle;
+	/* never send a barrier number == 0, because that is special-cased
+	 * when using TCQ for our write ordering code */
+	new->br_number = (newest_before->br_number+1) ?: 1;
+	if (mdev->newest_tle != new) {
+		mdev->newest_tle->next = new;
+		mdev->newest_tle = new;
+	}
+}
+
+/**
+ * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
+ * @mdev:	DRBD device.
+ * @barrier_nr:	Expected identifier of the DRBD write barrier packet.
+ * @set_size:	Expected number of requests before that barrier.
+ *
+ * In case the passed barrier_nr or set_size does not match the oldest
+ * &struct drbd_tl_epoch objects this function will cause a termination
+ * of the connection.
+ */
+void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+		       unsigned int set_size)
+{
+	struct drbd_tl_epoch *b, *nob; /* next old barrier */
+	struct list_head *le, *tle;
+	struct drbd_request *r;
+
+	spin_lock_irq(&mdev->req_lock);
+
+	b = mdev->oldest_tle;
+
+	/* first some paranoia code */
+	if (b == NULL) {
+		dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+			barrier_nr);
+		goto bail;
+	}
+	if (b->br_number != barrier_nr) {
+		dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
+			barrier_nr, b->br_number);
+		goto bail;
+	}
+	if (b->n_req != set_size) {
+		dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n",
+			barrier_nr, set_size, b->n_req);
+		goto bail;
+	}
+
+	/* Clean up list of requests processed during current epoch */
+	list_for_each_safe(le, tle, &b->requests) {
+		r = list_entry(le, struct drbd_request, tl_requests);
+		_req_mod(r, barrier_acked);
+	}
+	/* There could be requests on the list waiting for completion
+	   of the write to the local disk. To avoid corruptions of
+	   slab's data structures we have to remove the lists head.
+
+	   Also there could have been a barrier ack out of sequence, overtaking
+	   the write acks - which would be a bug and violating write ordering.
+	   To not deadlock in case we lose connection while such requests are
+	   still pending, we need some way to find them for the
+	   _req_mode(connection_lost_while_pending).
+
+	   These have been list_move'd to the out_of_sequence_requests list in
+	   _req_mod(, barrier_acked) above.
+	   */
+	list_del_init(&b->requests);
+
+	nob = b->next;
+	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+		_tl_add_barrier(mdev, b);
+		if (nob)
+			mdev->oldest_tle = nob;
+		/* if nob == NULL b was the only barrier, and becomes the new
+		   barrier. Therefore mdev->oldest_tle points already to b */
+	} else {
+		D_ASSERT(nob != NULL);
+		mdev->oldest_tle = nob;
+		kfree(b);
+	}
+
+	spin_unlock_irq(&mdev->req_lock);
+	dec_ap_pending(mdev);
+
+	return;
+
+bail:
+	spin_unlock_irq(&mdev->req_lock);
+	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+}
+
+
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev:	DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
+void tl_clear(struct drbd_conf *mdev)
+{
+	struct drbd_tl_epoch *b, *tmp;
+	struct list_head *le, *tle;
+	struct drbd_request *r;
+	int new_initial_bnr = net_random();
+
+	spin_lock_irq(&mdev->req_lock);
+
+	b = mdev->oldest_tle;
+	while (b) {
+		list_for_each_safe(le, tle, &b->requests) {
+			r = list_entry(le, struct drbd_request, tl_requests);
+			/* It would be nice to complete outside of spinlock.
+			 * But this is easier for now. */
+			_req_mod(r, connection_lost_while_pending);
+		}
+		tmp = b->next;
+
+		/* there could still be requests on that ring list,
+		 * in case local io is still pending */
+		list_del(&b->requests);
+
+		/* dec_ap_pending corresponding to queue_barrier.
+		 * the newest barrier may not have been queued yet,
+		 * in which case w.cb is still NULL. */
+		if (b->w.cb != NULL)
+			dec_ap_pending(mdev);
+
+		if (b == mdev->newest_tle) {
+			/* recycle, but reinit! */
+			D_ASSERT(tmp == NULL);
+			INIT_LIST_HEAD(&b->requests);
+			INIT_LIST_HEAD(&b->w.list);
+			b->w.cb = NULL;
+			b->br_number = new_initial_bnr;
+			b->n_req = 0;
+
+			mdev->oldest_tle = b;
+			break;
+		}
+		kfree(b);
+		b = tmp;
+	}
+
+	/* we expect this list to be empty. */
+	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+
+	/* but just in case, clean it up anyways! */
+	list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
+		r = list_entry(le, struct drbd_request, tl_requests);
+		/* It would be nice to complete outside of spinlock.
+		 * But this is easier for now. */
+		_req_mod(r, connection_lost_while_pending);
+	}
+
+	/* ensure bit indicating barrier is required is clear */
+	clear_bit(CREATE_BARRIER, &mdev->flags);
+
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+/**
+ * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * @mdev:	DRBD device.
+ * @os:		old (current) state.
+ * @ns:		new (wanted) state.
+ */
+static int cl_wide_st_chg(struct drbd_conf *mdev,
+			  union drbd_state os, union drbd_state ns)
+{
+	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
+		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
+		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
+		  (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
+		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
+		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
+}
+
+int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		      union drbd_state mask, union drbd_state val)
+{
+	unsigned long flags;
+	union drbd_state os, ns;
+	int rv;
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	rv = _drbd_set_state(mdev, ns, f, NULL);
+	ns = mdev->state;
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ */
+void drbd_force_state(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	drbd_change_state(mdev, CS_HARD, mask, val);
+}
+
+static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
+static int is_valid_state_transition(struct drbd_conf *,
+				     union drbd_state, union drbd_state);
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+				       union drbd_state ns, int *warn_sync_abort);
+int drbd_send_state_req(struct drbd_conf *,
+			union drbd_state, union drbd_state);
+
+static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
+				    union drbd_state mask, union drbd_state val)
+{
+	union drbd_state os, ns;
+	unsigned long flags;
+	int rv;
+
+	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	rv = 0;
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	ns = sanitize_state(mdev, os, ns, NULL);
+
+	if (!cl_wide_st_chg(mdev, os, ns))
+		rv = SS_CW_NO_NEED;
+	if (!rv) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS) {
+			rv = is_valid_state_transition(mdev, ns, os);
+			if (rv == SS_SUCCESS)
+				rv = 0; /* cont waiting, otherwise fail. */
+		}
+	}
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
+ */
+static int drbd_req_state(struct drbd_conf *mdev,
+			  union drbd_state mask, union drbd_state val,
+			  enum chg_state_flags f)
+{
+	struct completion done;
+	unsigned long flags;
+	union drbd_state os, ns;
+	int rv;
+
+	init_completion(&done);
+
+	if (f & CS_SERIALIZE)
+		mutex_lock(&mdev->state_mutex);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	ns = sanitize_state(mdev, os, ns, NULL);
+
+	if (cl_wide_st_chg(mdev, os, ns)) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS)
+			rv = is_valid_state_transition(mdev, ns, os);
+		spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		drbd_state_lock(mdev);
+		if (!drbd_send_state_req(mdev, mask, val)) {
+			drbd_state_unlock(mdev);
+			rv = SS_CW_FAILED_BY_PEER;
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		wait_event(mdev->state_wait,
+			(rv = _req_st_cond(mdev, mask, val)));
+
+		if (rv < SS_SUCCESS) {
+			drbd_state_unlock(mdev);
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+		spin_lock_irqsave(&mdev->req_lock, flags);
+		os = mdev->state;
+		ns.i = (os.i & ~mask.i) | val.i;
+		rv = _drbd_set_state(mdev, ns, f, &done);
+		drbd_state_unlock(mdev);
+	} else {
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	}
+
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
+		D_ASSERT(current != mdev->worker.task);
+		wait_for_completion(&done);
+	}
+
+abort:
+	if (f & CS_SERIALIZE)
+		mutex_unlock(&mdev->state_mutex);
+
+	return rv;
+}
+
+/**
+ * _drbd_request_state() - Request a state change (with flags)
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
+ */
+int _drbd_request_state(struct drbd_conf *mdev,	union drbd_state mask,
+			union drbd_state val,	enum chg_state_flags f)
+{
+	int rv;
+
+	wait_event(mdev->state_wait,
+		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
+
+	return rv;
+}
+
+static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
+{
+	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
+	    name,
+	    drbd_conn_str(ns.conn),
+	    drbd_role_str(ns.role),
+	    drbd_role_str(ns.peer),
+	    drbd_disk_str(ns.disk),
+	    drbd_disk_str(ns.pdsk),
+	    ns.susp ? 's' : 'r',
+	    ns.aftr_isp ? 'a' : '-',
+	    ns.peer_isp ? 'p' : '-',
+	    ns.user_isp ? 'u' : '-'
+	    );
+}
+
+void print_st_err(struct drbd_conf *mdev,
+	union drbd_state os, union drbd_state ns, int err)
+{
+	if (err == SS_IN_TRANSIENT_STATE)
+		return;
+	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
+	print_st(mdev, " state", os);
+	print_st(mdev, "wanted", ns);
+}
+
+
+#define drbd_peer_str drbd_role_str
+#define drbd_pdsk_str drbd_disk_str
+
+#define drbd_susp_str(A)     ((A) ? "1" : "0")
+#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
+#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
+#define drbd_user_isp_str(A) ((A) ? "1" : "0")
+
+#define PSC(A) \
+	({ if (ns.A != os.A) { \
+		pbp += sprintf(pbp, #A "( %s -> %s ) ", \
+			      drbd_##A##_str(os.A), \
+			      drbd_##A##_str(ns.A)); \
+	} })
+
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev:	DRBD device.
+ * @ns:		State to consider.
+ */
+static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+{
+	/* See drbd_state_sw_errors in drbd_strings.c */
+
+	enum drbd_fencing_p fp;
+	int rv = SS_SUCCESS;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	if (get_net_conf(mdev)) {
+		if (!mdev->net_conf->two_primaries &&
+		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
+			rv = SS_TWO_PRIMARIES;
+		put_net_conf(mdev);
+	}
+
+	if (rv <= 0)
+		/* already found a reason to abort */;
+	else if (ns.role == R_SECONDARY && mdev->open_cnt)
+		rv = SS_DEVICE_IN_USE;
+
+	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (fp >= FP_RESOURCE &&
+		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
+		rv = SS_PRIMARY_NOP;
+
+	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
+		rv = SS_NO_LOCAL_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
+		rv = SS_NO_REMOTE_DISK;
+
+	else if ((ns.conn == C_CONNECTED ||
+		  ns.conn == C_WF_BITMAP_S ||
+		  ns.conn == C_SYNC_SOURCE ||
+		  ns.conn == C_PAUSED_SYNC_S) &&
+		  ns.disk == D_OUTDATED)
+		rv = SS_CONNECTED_OUTDATES;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		 (mdev->sync_conf.verify_alg[0] == 0))
+		rv = SS_NO_VERIFY_ALG;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		  mdev->agreed_pro_version < 88)
+		rv = SS_NOT_SUPPORTED;
+
+	return rv;
+}
+
+/**
+ * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+static int is_valid_state_transition(struct drbd_conf *mdev,
+				     union drbd_state ns, union drbd_state os)
+{
+	int rv = SS_SUCCESS;
+
+	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
+	    os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
+		rv = SS_ALREADY_STANDALONE;
+
+	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
+		rv = SS_NO_NET_CONFIG;
+
+	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
+		rv = SS_LOWER_THAN_OUTDATED;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+	    ns.conn != os.conn && os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
+	    os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	return rv;
+}
+
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+				       union drbd_state ns, int *warn_sync_abort)
+{
+	enum drbd_fencing_p fp;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	/* Disallow Network errors to configure a device's network part */
+	if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
+	    os.conn <= C_DISCONNECTING)
+		ns.conn = os.conn;
+
+	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */
+	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
+	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING)
+		ns.conn = os.conn;
+
+	/* After C_DISCONNECTING only C_STANDALONE may follow */
+	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
+		ns.conn = os.conn;
+
+	if (ns.conn < C_CONNECTED) {
+		ns.peer_isp = 0;
+		ns.peer = R_UNKNOWN;
+		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
+			ns.pdsk = D_UNKNOWN;
+	}
+
+	/* Clear the aftr_isp when becoming unconfigured */
+	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
+		ns.aftr_isp = 0;
+
+	if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS)
+		ns.pdsk = D_UNKNOWN;
+
+	/* Abort resync if a disk fails/detaches */
+	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
+	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
+		if (warn_sync_abort)
+			*warn_sync_abort = 1;
+		ns.conn = C_CONNECTED;
+	}
+
+	if (ns.conn >= C_CONNECTED &&
+	    ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
+	     (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
+		switch (ns.conn) {
+		case C_WF_BITMAP_T:
+		case C_PAUSED_SYNC_T:
+			ns.disk = D_OUTDATED;
+			break;
+		case C_CONNECTED:
+		case C_WF_BITMAP_S:
+		case C_SYNC_SOURCE:
+		case C_PAUSED_SYNC_S:
+			ns.disk = D_UP_TO_DATE;
+			break;
+		case C_SYNC_TARGET:
+			ns.disk = D_INCONSISTENT;
+			dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
+			break;
+		}
+		if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
+			dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
+	}
+
+	if (ns.conn >= C_CONNECTED &&
+	    (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
+		switch (ns.conn) {
+		case C_CONNECTED:
+		case C_WF_BITMAP_T:
+		case C_PAUSED_SYNC_T:
+		case C_SYNC_TARGET:
+			ns.pdsk = D_UP_TO_DATE;
+			break;
+		case C_WF_BITMAP_S:
+		case C_PAUSED_SYNC_S:
+			ns.pdsk = D_OUTDATED;
+			break;
+		case C_SYNC_SOURCE:
+			ns.pdsk = D_INCONSISTENT;
+			dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
+			break;
+		}
+		if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
+			dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
+	}
+
+	/* Connection breaks down before we finished "Negotiating" */
+	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
+			ns.disk = mdev->new_state_tmp.disk;
+			ns.pdsk = mdev->new_state_tmp.pdsk;
+		} else {
+			dev_alert(DEV, "Connection lost while negotiating, no data!\n");
+			ns.disk = D_DISKLESS;
+			ns.pdsk = D_UNKNOWN;
+		}
+		put_ldev(mdev);
+	}
+
+	if (fp == FP_STONITH &&
+	    (ns.role == R_PRIMARY &&
+	     ns.conn < C_CONNECTED &&
+	     ns.pdsk > D_OUTDATED))
+			ns.susp = 1;
+
+	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
+		if (ns.conn == C_SYNC_SOURCE)
+			ns.conn = C_PAUSED_SYNC_S;
+		if (ns.conn == C_SYNC_TARGET)
+			ns.conn = C_PAUSED_SYNC_T;
+	} else {
+		if (ns.conn == C_PAUSED_SYNC_S)
+			ns.conn = C_SYNC_SOURCE;
+		if (ns.conn == C_PAUSED_SYNC_T)
+			ns.conn = C_SYNC_TARGET;
+	}
+
+	return ns;
+}
+
+/* helper for __drbd_set_state */
+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
+{
+	if (cs == C_VERIFY_T) {
+		/* starting online verify from an arbitrary position
+		 * does not fit well into the existing protocol.
+		 * on C_VERIFY_T, we initialize ov_left and friends
+		 * implicitly in receive_DataRequest once the
+		 * first P_OV_REQUEST is received */
+		mdev->ov_start_sector = ~(sector_t)0;
+	} else {
+		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
+		if (bit >= mdev->rs_total)
+			mdev->ov_start_sector =
+				BM_BIT_TO_SECT(mdev->rs_total - 1);
+		mdev->ov_position = mdev->ov_start_sector;
+	}
+}
+
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @flags:	Flags
+ * @done:	Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
+int __drbd_set_state(struct drbd_conf *mdev,
+		    union drbd_state ns, enum chg_state_flags flags,
+		    struct completion *done)
+{
+	union drbd_state os;
+	int rv = SS_SUCCESS;
+	int warn_sync_abort = 0;
+	struct after_state_chg_work *ascw;
+
+	os = mdev->state;
+
+	ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
+
+	if (ns.i == os.i)
+		return SS_NOTHING_TO_DO;
+
+	if (!(flags & CS_HARD)) {
+		/*  pre-state-change checks ; only look at ns  */
+		/* See drbd_state_sw_errors in drbd_strings.c */
+
+		rv = is_valid_state(mdev, ns);
+		if (rv < SS_SUCCESS) {
+			/* If the old state was illegal as well, then let
+			   this happen...*/
+
+			if (is_valid_state(mdev, os) == rv) {
+				dev_err(DEV, "Considering state change from bad state. "
+				    "Error would be: '%s'\n",
+				    drbd_set_st_err_str(rv));
+				print_st(mdev, "old", os);
+				print_st(mdev, "new", ns);
+				rv = is_valid_state_transition(mdev, ns, os);
+			}
+		} else
+			rv = is_valid_state_transition(mdev, ns, os);
+	}
+
+	if (rv < SS_SUCCESS) {
+		if (flags & CS_VERBOSE)
+			print_st_err(mdev, os, ns, rv);
+		return rv;
+	}
+
+	if (warn_sync_abort)
+		dev_warn(DEV, "Resync aborted.\n");
+
+	{
+		char *pbp, pb[300];
+		pbp = pb;
+		*pbp = 0;
+		PSC(role);
+		PSC(peer);
+		PSC(conn);
+		PSC(disk);
+		PSC(pdsk);
+		PSC(susp);
+		PSC(aftr_isp);
+		PSC(peer_isp);
+		PSC(user_isp);
+		dev_info(DEV, "%s\n", pb);
+	}
+
+	/* solve the race between becoming unconfigured,
+	 * worker doing the cleanup, and
+	 * admin reconfiguring us:
+	 * on (re)configure, first set CONFIG_PENDING,
+	 * then wait for a potentially exiting worker,
+	 * start the worker, and schedule one no_op.
+	 * then proceed with configuration.
+	 */
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY &&
+	    !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
+		set_bit(DEVICE_DYING, &mdev->flags);
+
+	mdev->state.i = ns.i;
+	wake_up(&mdev->misc_wait);
+	wake_up(&mdev->state_wait);
+
+	/*   post-state-change actions   */
+	if (os.conn >= C_SYNC_SOURCE   && ns.conn <= C_CONNECTED) {
+		set_bit(STOP_SYNC_TIMER, &mdev->flags);
+		mod_timer(&mdev->resync_timer, jiffies);
+	}
+
+	/* aborted verify run. log the last position */
+	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
+	    ns.conn < C_CONNECTED) {
+		mdev->ov_start_sector =
+			BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+		dev_info(DEV, "Online Verify reached sector %llu\n",
+			(unsigned long long)mdev->ov_start_sector);
+	}
+
+	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
+		dev_info(DEV, "Syncer continues.\n");
+		mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time;
+		if (ns.conn == C_SYNC_TARGET) {
+			if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))
+				mod_timer(&mdev->resync_timer, jiffies);
+			/* This if (!test_bit) is only needed for the case
+			   that a device that has ceased to used its timer,
+			   i.e. it is already in drbd_resync_finished() gets
+			   paused and resumed. */
+		}
+	}
+
+	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
+	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
+		dev_info(DEV, "Resync suspended\n");
+		mdev->rs_mark_time = jiffies;
+		if (ns.conn == C_PAUSED_SYNC_T)
+			set_bit(STOP_SYNC_TIMER, &mdev->flags);
+	}
+
+	if (os.conn == C_CONNECTED &&
+	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
+		mdev->ov_position = 0;
+		mdev->rs_total =
+		mdev->rs_mark_left = drbd_bm_bits(mdev);
+		if (mdev->agreed_pro_version >= 90)
+			set_ov_position(mdev, ns.conn);
+		else
+			mdev->ov_start_sector = 0;
+		mdev->ov_left = mdev->rs_total
+			      - BM_SECT_TO_BIT(mdev->ov_position);
+		mdev->rs_start     =
+		mdev->rs_mark_time = jiffies;
+		mdev->ov_last_oos_size = 0;
+		mdev->ov_last_oos_start = 0;
+
+		if (ns.conn == C_VERIFY_S) {
+			dev_info(DEV, "Starting Online Verify from sector %llu\n",
+					(unsigned long long)mdev->ov_position);
+			mod_timer(&mdev->resync_timer, jiffies);
+		}
+	}
+
+	if (get_ldev(mdev)) {
+		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
+						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
+
+		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
+			mdf |= MDF_CRASHED_PRIMARY;
+		if (mdev->state.role == R_PRIMARY ||
+		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
+			mdf |= MDF_PRIMARY_IND;
+		if (mdev->state.conn > C_WF_REPORT_PARAMS)
+			mdf |= MDF_CONNECTED_IND;
+		if (mdev->state.disk > D_INCONSISTENT)
+			mdf |= MDF_CONSISTENT;
+		if (mdev->state.disk > D_OUTDATED)
+			mdf |= MDF_WAS_UP_TO_DATE;
+		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
+			mdf |= MDF_PEER_OUT_DATED;
+		if (mdf != mdev->ldev->md.flags) {
+			mdev->ldev->md.flags = mdf;
+			drbd_md_mark_dirty(mdev);
+		}
+		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
+			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
+		put_ldev(mdev);
+	}
+
+	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
+	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
+	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
+		set_bit(CONSIDER_RESYNC, &mdev->flags);
+
+	/* Receiver should clean up itself */
+	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
+		drbd_thread_stop_nowait(&mdev->receiver);
+
+	/* Now the receiver finished cleaning up itself, it should die */
+	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
+		drbd_thread_stop_nowait(&mdev->receiver);
+
+	/* Upon network failure, we need to restart the receiver. */
+	if (os.conn > C_TEAR_DOWN &&
+	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
+		drbd_thread_restart_nowait(&mdev->receiver);
+
+	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
+	if (ascw) {
+		ascw->os = os;
+		ascw->ns = ns;
+		ascw->flags = flags;
+		ascw->w.cb = w_after_state_ch;
+		ascw->done = done;
+		drbd_queue_work(&mdev->data.work, &ascw->w);
+	} else {
+		dev_warn(DEV, "Could not kmalloc an ascw\n");
+	}
+
+	return rv;
+}
+
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct after_state_chg_work *ascw =
+		container_of(w, struct after_state_chg_work, w);
+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
+	if (ascw->flags & CS_WAIT_COMPLETE) {
+		D_ASSERT(ascw->done != NULL);
+		complete(ascw->done);
+	}
+	kfree(ascw);
+
+	return 1;
+}
+
+static void abw_start_sync(struct drbd_conf *mdev, int rv)
+{
+	if (rv) {
+		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
+		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
+		return;
+	}
+
+	switch (mdev->state.conn) {
+	case C_STARTING_SYNC_T:
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		break;
+	case C_STARTING_SYNC_S:
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+		break;
+	}
+}
+
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @flags:	Flags
+ */
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags)
+{
+	enum drbd_fencing_p fp;
+
+	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+		if (mdev->p_uuid)
+			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
+	}
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	/* Inform userspace about the change... */
+	drbd_bcast_state(mdev, ns);
+
+	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		drbd_khelper(mdev, "pri-on-incon-degr");
+
+	/* Here we have the actions that are performed after a
+	   state change. This function might sleep */
+
+	if (fp == FP_STONITH && ns.susp) {
+		/* case1: The outdate peer handler is successful:
+		 * case2: The connection was established again: */
+		if ((os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) ||
+		    (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) {
+			tl_clear(mdev);
+			spin_lock_irq(&mdev->req_lock);
+			_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->req_lock);
+		}
+	}
+	/* Do not change the order of the if above and the two below... */
+	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+
+	/* Lost contact to peer's copy of the data */
+	if ((os.pdsk >= D_INCONSISTENT &&
+	     os.pdsk != D_UNKNOWN &&
+	     os.pdsk != D_OUTDATED)
+	&&  (ns.pdsk < D_INCONSISTENT ||
+	     ns.pdsk == D_UNKNOWN ||
+	     ns.pdsk == D_OUTDATED)) {
+		kfree(mdev->p_uuid);
+		mdev->p_uuid = NULL;
+		if (get_ldev(mdev)) {
+			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
+			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+				drbd_uuid_new_current(mdev);
+				drbd_send_uuids(mdev);
+			}
+			put_ldev(mdev);
+		}
+	}
+
+	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+		if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
+			drbd_uuid_new_current(mdev);
+
+		/* D_DISKLESS Peer becomes secondary */
+		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
+			drbd_al_to_on_disk_bm(mdev);
+		put_ldev(mdev);
+	}
+
+	/* Last part of the attaching process ... */
+	if (ns.conn >= C_CONNECTED &&
+	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
+		kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
+		mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
+		drbd_send_sizes(mdev, 0);  /* to start sync... */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+
+	/* We want to pause/continue resync, tell peer. */
+	if (ns.conn >= C_CONNECTED &&
+	     ((os.aftr_isp != ns.aftr_isp) ||
+	      (os.user_isp != ns.user_isp)))
+		drbd_send_state(mdev);
+
+	/* In case one of the isp bits got set, suspend other devices. */
+	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
+	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
+		suspend_other_sg(mdev);
+
+	/* Make sure the peer gets informed about eventual state
+	   changes (ISP bits) while we were in WFReportParams. */
+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev);
+
+	/* We are in the progress to start a full sync... */
+	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+
+	/* We are invalidating our self... */
+	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
+	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+
+	if (os.disk > D_FAILED && ns.disk == D_FAILED) {
+		enum drbd_io_error_p eh;
+
+		eh = EP_PASS_ON;
+		if (get_ldev_if_state(mdev, D_FAILED)) {
+			eh = mdev->ldev->dc.on_io_error;
+			put_ldev(mdev);
+		}
+
+		drbd_rs_cancel_all(mdev);
+		/* since get_ldev() only works as long as disk>=D_INCONSISTENT,
+		   and it is D_DISKLESS here, local_cnt can only go down, it can
+		   not increase... It will reach zero */
+		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+		mdev->rs_total = 0;
+		mdev->rs_failed = 0;
+		atomic_set(&mdev->rs_pending_cnt, 0);
+
+		spin_lock_irq(&mdev->req_lock);
+		_drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+
+		if (eh == EP_CALL_HELPER)
+			drbd_khelper(mdev, "local-io-error");
+	}
+
+	if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
+
+		if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ {
+			if (drbd_send_state(mdev))
+				dev_warn(DEV, "Notified peer that my disk is broken.\n");
+			else
+				dev_err(DEV, "Sending state in drbd_io_error() failed\n");
+		}
+
+		lc_destroy(mdev->resync);
+		mdev->resync = NULL;
+		lc_destroy(mdev->act_log);
+		mdev->act_log = NULL;
+		__no_warn(local,
+			drbd_free_bc(mdev->ldev);
+			mdev->ldev = NULL;);
+
+		if (mdev->md_io_tmpp)
+			__free_page(mdev->md_io_tmpp);
+	}
+
+	/* Disks got bigger while they were detached */
+	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
+	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
+		if (ns.conn == C_CONNECTED)
+			resync_after_online_grow(mdev);
+	}
+
+	/* A resync finished or aborted, wake paused devices... */
+	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
+	    (os.peer_isp && !ns.peer_isp) ||
+	    (os.user_isp && !ns.user_isp))
+		resume_next_sg(mdev);
+
+	/* Upon network connection, we need to start the receiver */
+	if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
+		drbd_thread_start(&mdev->receiver);
+
+	/* Terminate worker thread if we are unconfigured - it will be
+	   restarted as needed... */
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY) {
+		if (os.aftr_isp != ns.aftr_isp)
+			resume_next_sg(mdev);
+		/* set in __drbd_set_state, unless CONFIG_PENDING was set */
+		if (test_bit(DEVICE_DYING, &mdev->flags))
+			drbd_thread_stop_nowait(&mdev->worker);
+	}
+
+	drbd_md_sync(mdev);
+}
+
+
+static int drbd_thread_setup(void *arg)
+{
+	struct drbd_thread *thi = (struct drbd_thread *) arg;
+	struct drbd_conf *mdev = thi->mdev;
+	unsigned long flags;
+	int retval;
+
+restart:
+	retval = thi->function(thi);
+
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	/* if the receiver has been "Exiting", the last thing it did
+	 * was set the conn state to "StandAlone",
+	 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
+	 * and receiver thread will be "started".
+	 * drbd_thread_start needs to set "Restarting" in that case.
+	 * t_state check and assignment needs to be within the same spinlock,
+	 * so either thread_start sees Exiting, and can remap to Restarting,
+	 * or thread_start see None, and can proceed as normal.
+	 */
+
+	if (thi->t_state == Restarting) {
+		dev_info(DEV, "Restarting %s\n", current->comm);
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		goto restart;
+	}
+
+	thi->task = NULL;
+	thi->t_state = None;
+	smp_mb();
+	complete(&thi->stop);
+	spin_unlock_irqrestore(&thi->t_lock, flags);
+
+	dev_info(DEV, "Terminating %s\n", current->comm);
+
+	/* Release mod reference taken when thread was started */
+	module_put(THIS_MODULE);
+	return retval;
+}
+
+static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
+		      int (*func) (struct drbd_thread *))
+{
+	spin_lock_init(&thi->t_lock);
+	thi->task    = NULL;
+	thi->t_state = None;
+	thi->function = func;
+	thi->mdev = mdev;
+}
+
+int drbd_thread_start(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct task_struct *nt;
+	unsigned long flags;
+
+	const char *me =
+		thi == &mdev->receiver ? "receiver" :
+		thi == &mdev->asender  ? "asender"  :
+		thi == &mdev->worker   ? "worker"   : "NONSENSE";
+
+	/* is used from state engine doing drbd_thread_stop_nowait,
+	 * while holding the req lock irqsave */
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	switch (thi->t_state) {
+	case None:
+		dev_info(DEV, "Starting %s thread (from %s [%d])\n",
+				me, current->comm, current->pid);
+
+		/* Get ref on module for thread - this is released when thread exits */
+		if (!try_module_get(THIS_MODULE)) {
+			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
+			spin_unlock_irqrestore(&thi->t_lock, flags);
+			return FALSE;
+		}
+
+		init_completion(&thi->stop);
+		D_ASSERT(thi->task == NULL);
+		thi->reset_cpu_mask = 1;
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
+
+		nt = kthread_create(drbd_thread_setup, (void *) thi,
+				    "drbd%d_%s", mdev_to_minor(mdev), me);
+
+		if (IS_ERR(nt)) {
+			dev_err(DEV, "Couldn't start thread\n");
+
+			module_put(THIS_MODULE);
+			return FALSE;
+		}
+		spin_lock_irqsave(&thi->t_lock, flags);
+		thi->task = nt;
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		wake_up_process(nt);
+		break;
+	case Exiting:
+		thi->t_state = Restarting;
+		dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
+				me, current->comm, current->pid);
+		/* fall through */
+	case Running:
+	case Restarting:
+	default:
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		break;
+	}
+
+	return TRUE;
+}
+
+
+void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
+{
+	unsigned long flags;
+
+	enum drbd_thread_state ns = restart ? Restarting : Exiting;
+
+	/* may be called from state engine, holding the req lock irqsave */
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	if (thi->t_state == None) {
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		if (restart)
+			drbd_thread_start(thi);
+		return;
+	}
+
+	if (thi->t_state != ns) {
+		if (thi->task == NULL) {
+			spin_unlock_irqrestore(&thi->t_lock, flags);
+			return;
+		}
+
+		thi->t_state = ns;
+		smp_mb();
+		init_completion(&thi->stop);
+		if (thi->task != current)
+			force_sig(DRBD_SIGKILL, thi->task);
+
+	}
+
+	spin_unlock_irqrestore(&thi->t_lock, flags);
+
+	if (wait)
+		wait_for_completion(&thi->stop);
+}
+
+#ifdef CONFIG_SMP
+/**
+ * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
+ * @mdev:	DRBD device.
+ *
+ * Forces all threads of a device onto the same CPU. This is beneficial for
+ * DRBD's performance. May be overwritten by user's configuration.
+ */
+void drbd_calc_cpu_mask(struct drbd_conf *mdev)
+{
+	int ord, cpu;
+
+	/* user override. */
+	if (cpumask_weight(mdev->cpu_mask))
+		return;
+
+	ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
+	for_each_online_cpu(cpu) {
+		if (ord-- == 0) {
+			cpumask_set_cpu(cpu, mdev->cpu_mask);
+			return;
+		}
+	}
+	/* should not be reached */
+	cpumask_setall(mdev->cpu_mask);
+}
+
+/**
+ * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
+ * @mdev:	DRBD device.
+ *
+ * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
+ * prematurely.
+ */
+void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
+{
+	struct task_struct *p = current;
+	struct drbd_thread *thi =
+		p == mdev->asender.task  ? &mdev->asender  :
+		p == mdev->receiver.task ? &mdev->receiver :
+		p == mdev->worker.task   ? &mdev->worker   :
+		NULL;
+	ERR_IF(thi == NULL)
+		return;
+	if (!thi->reset_cpu_mask)
+		return;
+	thi->reset_cpu_mask = 0;
+	set_cpus_allowed_ptr(p, mdev->cpu_mask);
+}
+#endif
+
+/* the appropriate socket mutex must be held already */
+int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+			  enum drbd_packets cmd, struct p_header *h,
+			  size_t size, unsigned msg_flags)
+{
+	int sent, ok;
+
+	ERR_IF(!h) return FALSE;
+	ERR_IF(!size) return FALSE;
+
+	h->magic   = BE_DRBD_MAGIC;
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size-sizeof(struct p_header));
+
+	trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__);
+	sent = drbd_send(mdev, sock, h, size, msg_flags);
+
+	ok = (sent == size);
+	if (!ok)
+		dev_err(DEV, "short sent %s size=%d sent=%d\n",
+		    cmdname(cmd), (int)size, sent);
+	return ok;
+}
+
+/* don't pass the socket. we may only look at it
+ * when we hold the appropriate socket mutex.
+ */
+int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+		  enum drbd_packets cmd, struct p_header *h, size_t size)
+{
+	int ok = 0;
+	struct socket *sock;
+
+	if (use_data_socket) {
+		mutex_lock(&mdev->data.mutex);
+		sock = mdev->data.socket;
+	} else {
+		mutex_lock(&mdev->meta.mutex);
+		sock = mdev->meta.socket;
+	}
+
+	/* drbd_disconnect() could have called drbd_free_sock()
+	 * while we were waiting in down()... */
+	if (likely(sock != NULL))
+		ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+
+	if (use_data_socket)
+		mutex_unlock(&mdev->data.mutex);
+	else
+		mutex_unlock(&mdev->meta.mutex);
+	return ok;
+}
+
+int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
+		   size_t size)
+{
+	struct p_header h;
+	int ok;
+
+	h.magic   = BE_DRBD_MAGIC;
+	h.command = cpu_to_be16(cmd);
+	h.length  = cpu_to_be16(size);
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__);
+
+	ok = (sizeof(h) ==
+		drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
+	ok = ok && (size ==
+		drbd_send(mdev, mdev->data.socket, data, size, 0));
+
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+{
+	struct p_rs_param_89 *p;
+	struct socket *sock;
+	int size, rv;
+	const int apv = mdev->agreed_pro_version;
+
+	size = apv <= 87 ? sizeof(struct p_rs_param)
+		: apv == 88 ? sizeof(struct p_rs_param)
+			+ strlen(mdev->sync_conf.verify_alg) + 1
+		: /* 89 */    sizeof(struct p_rs_param_89);
+
+	/* used from admin command context and receiver/worker context.
+	 * to avoid kmalloc, grab the socket right here,
+	 * then use the pre-allocated sbuf there */
+	mutex_lock(&mdev->data.mutex);
+	sock = mdev->data.socket;
+
+	if (likely(sock != NULL)) {
+		enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
+
+		p = &mdev->data.sbuf.rs_param_89;
+
+		/* initialize verify_alg and csums_alg */
+		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+		p->rate = cpu_to_be32(sc->rate);
+
+		if (apv >= 88)
+			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
+		if (apv >= 89)
+			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+
+		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
+	} else
+		rv = 0; /* not ok */
+
+	mutex_unlock(&mdev->data.mutex);
+
+	return rv;
+}
+
+int drbd_send_protocol(struct drbd_conf *mdev)
+{
+	struct p_protocol *p;
+	int size, rv;
+
+	size = sizeof(struct p_protocol);
+
+	if (mdev->agreed_pro_version >= 87)
+		size += strlen(mdev->net_conf->integrity_alg) + 1;
+
+	/* we must not recurse into our own queue,
+	 * as that is blocked during handshake */
+	p = kmalloc(size, GFP_NOIO);
+	if (p == NULL)
+		return 0;
+
+	p->protocol      = cpu_to_be32(mdev->net_conf->wire_protocol);
+	p->after_sb_0p   = cpu_to_be32(mdev->net_conf->after_sb_0p);
+	p->after_sb_1p   = cpu_to_be32(mdev->net_conf->after_sb_1p);
+	p->after_sb_2p   = cpu_to_be32(mdev->net_conf->after_sb_2p);
+	p->want_lose     = cpu_to_be32(mdev->net_conf->want_lose);
+	p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
+
+	if (mdev->agreed_pro_version >= 87)
+		strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
+
+	rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
+			   (struct p_header *)p, size);
+	kfree(p);
+	return rv;
+}
+
+int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
+{
+	struct p_uuids p;
+	int i;
+
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+		return 1;
+
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+
+	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
+	p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
+	uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
+	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
+	p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
+
+	put_ldev(mdev);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
+			     (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_uuids(struct drbd_conf *mdev)
+{
+	return _drbd_send_uuids(mdev, 0);
+}
+
+int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
+{
+	return _drbd_send_uuids(mdev, 8);
+}
+
+
+int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+{
+	struct p_rs_uuid p;
+
+	p.uuid = cpu_to_be64(val);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
+			     (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
+{
+	struct p_sizes p;
+	sector_t d_size, u_size;
+	int q_order_type;
+	int ok;
+
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		D_ASSERT(mdev->ldev->backing_bdev);
+		d_size = drbd_get_max_capacity(mdev->ldev);
+		u_size = mdev->ldev->dc.disk_size;
+		q_order_type = drbd_queue_order_type(mdev);
+		p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
+		put_ldev(mdev);
+	} else {
+		d_size = 0;
+		u_size = 0;
+		q_order_type = QUEUE_ORDERED_NONE;
+	}
+
+	p.d_size = cpu_to_be64(d_size);
+	p.u_size = cpu_to_be64(u_size);
+	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
+	p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+	p.queue_order_type = cpu_to_be32(q_order_type);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
+			   (struct p_header *)&p, sizeof(p));
+	return ok;
+}
+
+/**
+ * drbd_send_state() - Sends the drbd state to the peer
+ * @mdev:	DRBD device.
+ */
+int drbd_send_state(struct drbd_conf *mdev)
+{
+	struct socket *sock;
+	struct p_state p;
+	int ok = 0;
+
+	/* Grab state lock so we wont send state if we're in the middle
+	 * of a cluster wide state change on another thread */
+	drbd_state_lock(mdev);
+
+	mutex_lock(&mdev->data.mutex);
+
+	p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
+	sock = mdev->data.socket;
+
+	if (likely(sock != NULL)) {
+		ok = _drbd_send_cmd(mdev, sock, P_STATE,
+				    (struct p_header *)&p, sizeof(p), 0);
+	}
+
+	mutex_unlock(&mdev->data.mutex);
+
+	drbd_state_unlock(mdev);
+	return ok;
+}
+
+int drbd_send_state_req(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	struct p_req_state p;
+
+	p.mask    = cpu_to_be32(mask.i);
+	p.val     = cpu_to_be32(val.i);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
+			     (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+{
+	struct p_req_state_reply p;
+
+	p.retcode    = cpu_to_be32(retcode);
+
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
+			     (struct p_header *)&p, sizeof(p));
+}
+
+int fill_bitmap_rle_bits(struct drbd_conf *mdev,
+	struct p_compressed_bm *p,
+	struct bm_xfer_ctx *c)
+{
+	struct bitstream bs;
+	unsigned long plain_bits;
+	unsigned long tmp;
+	unsigned long rl;
+	unsigned len;
+	unsigned toggle;
+	int bits;
+
+	/* may we use this feature? */
+	if ((mdev->sync_conf.use_rle == 0) ||
+		(mdev->agreed_pro_version < 90))
+			return 0;
+
+	if (c->bit_offset >= c->bm_bits)
+		return 0; /* nothing to do. */
+
+	/* use at most thus many bytes */
+	bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
+	memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
+	/* plain bits covered in this code string */
+	plain_bits = 0;
+
+	/* p->encoding & 0x80 stores whether the first run length is set.
+	 * bit offset is implicit.
+	 * start with toggle == 2 to be able to tell the first iteration */
+	toggle = 2;
+
+	/* see how much plain bits we can stuff into one packet
+	 * using RLE and VLI. */
+	do {
+		tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
+				    : _drbd_bm_find_next(mdev, c->bit_offset);
+		if (tmp == -1UL)
+			tmp = c->bm_bits;
+		rl = tmp - c->bit_offset;
+
+		if (toggle == 2) { /* first iteration */
+			if (rl == 0) {
+				/* the first checked bit was set,
+				 * store start value, */
+				DCBP_set_start(p, 1);
+				/* but skip encoding of zero run length */
+				toggle = !toggle;
+				continue;
+			}
+			DCBP_set_start(p, 0);
+		}
+
+		/* paranoia: catch zero runlength.
+		 * can only happen if bitmap is modified while we scan it. */
+		if (rl == 0) {
+			dev_err(DEV, "unexpected zero runlength while encoding bitmap "
+			    "t:%u bo:%lu\n", toggle, c->bit_offset);
+			return -1;
+		}
+
+		bits = vli_encode_bits(&bs, rl);
+		if (bits == -ENOBUFS) /* buffer full */
+			break;
+		if (bits <= 0) {
+			dev_err(DEV, "error while encoding bitmap: %d\n", bits);
+			return 0;
+		}
+
+		toggle = !toggle;
+		plain_bits += rl;
+		c->bit_offset = tmp;
+	} while (c->bit_offset < c->bm_bits);
+
+	len = bs.cur.b - p->code + !!bs.cur.bit;
+
+	if (plain_bits < (len << 3)) {
+		/* incompressible with this method.
+		 * we need to rewind both word and bit position. */
+		c->bit_offset -= plain_bits;
+		bm_xfer_ctx_bit_to_word_offset(c);
+		c->bit_offset = c->word_offset * BITS_PER_LONG;
+		return 0;
+	}
+
+	/* RLE + VLI was able to compress it just fine.
+	 * update c->word_offset. */
+	bm_xfer_ctx_bit_to_word_offset(c);
+
+	/* store pad_bits */
+	DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
+
+	return len;
+}
+
+enum { OK, FAILED, DONE }
+send_bitmap_rle_or_plain(struct drbd_conf *mdev,
+	struct p_header *h, struct bm_xfer_ctx *c)
+{
+	struct p_compressed_bm *p = (void*)h;
+	unsigned long num_words;
+	int len;
+	int ok;
+
+	len = fill_bitmap_rle_bits(mdev, p, c);
+
+	if (len < 0)
+		return FAILED;
+
+	if (len) {
+		DCBP_set_code(p, RLE_VLI_Bits);
+		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
+			sizeof(*p) + len, 0);
+
+		c->packets[0]++;
+		c->bytes[0] += sizeof(*p) + len;
+
+		if (c->bit_offset >= c->bm_bits)
+			len = 0; /* DONE */
+	} else {
+		/* was not compressible.
+		 * send a buffer full of plain text bits instead. */
+		num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+		len = num_words * sizeof(long);
+		if (len)
+			drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
+		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
+				   h, sizeof(struct p_header) + len, 0);
+		c->word_offset += num_words;
+		c->bit_offset = c->word_offset * BITS_PER_LONG;
+
+		c->packets[1]++;
+		c->bytes[1] += sizeof(struct p_header) + len;
+
+		if (c->bit_offset > c->bm_bits)
+			c->bit_offset = c->bm_bits;
+	}
+	ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
+
+	if (ok == DONE)
+		INFO_bm_xfer_stats(mdev, "send", c);
+	return ok;
+}
+
+/* See the comment at receive_bitmap() */
+int _drbd_send_bitmap(struct drbd_conf *mdev)
+{
+	struct bm_xfer_ctx c;
+	struct p_header *p;
+	int ret;
+
+	ERR_IF(!mdev->bitmap) return FALSE;
+
+	/* maybe we should use some per thread scratch page,
+	 * and allocate that during initial device creation? */
+	p = (struct p_header *) __get_free_page(GFP_NOIO);
+	if (!p) {
+		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+		return FALSE;
+	}
+
+	if (get_ldev(mdev)) {
+		if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+			dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
+			drbd_bm_set_all(mdev);
+			if (drbd_bm_write(mdev)) {
+				/* write_bm did fail! Leave full sync flag set in Meta P_DATA
+				 * but otherwise process as per normal - need to tell other
+				 * side that a full resync is required! */
+				dev_err(DEV, "Failed to write bitmap to disk!\n");
+			} else {
+				drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+				drbd_md_sync(mdev);
+			}
+		}
+		put_ldev(mdev);
+	}
+
+	c = (struct bm_xfer_ctx) {
+		.bm_bits = drbd_bm_bits(mdev),
+		.bm_words = drbd_bm_words(mdev),
+	};
+
+	do {
+		ret = send_bitmap_rle_or_plain(mdev, p, &c);
+	} while (ret == OK);
+
+	free_page((unsigned long) p);
+	return (ret == DONE);
+}
+
+int drbd_send_bitmap(struct drbd_conf *mdev)
+{
+	int err;
+
+	if (!drbd_get_data_sock(mdev))
+		return -1;
+	err = !_drbd_send_bitmap(mdev);
+	drbd_put_data_sock(mdev);
+	return err;
+}
+
+int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
+{
+	int ok;
+	struct p_barrier_ack p;
+
+	p.barrier  = barrier_nr;
+	p.set_size = cpu_to_be32(set_size);
+
+	if (mdev->state.conn < C_CONNECTED)
+		return FALSE;
+	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
+			(struct p_header *)&p, sizeof(p));
+	return ok;
+}
+
+/**
+ * _drbd_send_ack() - Sends an ack packet
+ * @mdev:	DRBD device.
+ * @cmd:	Packet command code.
+ * @sector:	sector, needs to be in big endian byte order
+ * @blksize:	size in byte, needs to be in big endian byte order
+ * @block_id:	Id, big endian byte order
+ */
+static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+			  u64 sector,
+			  u32 blksize,
+			  u64 block_id)
+{
+	int ok;
+	struct p_block_ack p;
+
+	p.sector   = sector;
+	p.block_id = block_id;
+	p.blksize  = blksize;
+	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
+
+	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
+		return FALSE;
+	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
+				(struct p_header *)&p, sizeof(p));
+	return ok;
+}
+
+int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     struct p_data *dp)
+{
+	const int header_size = sizeof(struct p_data)
+			      - sizeof(struct p_header);
+	int data_size  = ((struct p_header *)dp)->length - header_size;
+
+	return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
+			      dp->block_id);
+}
+
+int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     struct p_block_req *rp)
+{
+	return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
+}
+
+/**
+ * drbd_send_ack() - Sends an ack packet
+ * @mdev:	DRBD device.
+ * @cmd:	Packet command code.
+ * @e:		Epoch entry.
+ */
+int drbd_send_ack(struct drbd_conf *mdev,
+	enum drbd_packets cmd, struct drbd_epoch_entry *e)
+{
+	return _drbd_send_ack(mdev, cmd,
+			      cpu_to_be64(e->sector),
+			      cpu_to_be32(e->size),
+			      e->block_id);
+}
+
+/* This function misuses the block_id field to signal if the blocks
+ * are is sync or not. */
+int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     sector_t sector, int blksize, u64 block_id)
+{
+	return _drbd_send_ack(mdev, cmd,
+			      cpu_to_be64(sector),
+			      cpu_to_be32(blksize),
+			      cpu_to_be64(block_id));
+}
+
+int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+		       sector_t sector, int size, u64 block_id)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = block_id;
+	p.blksize  = cpu_to_be32(size);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
+				(struct p_header *)&p, sizeof(p));
+	return ok;
+}
+
+int drbd_send_drequest_csum(struct drbd_conf *mdev,
+			    sector_t sector, int size,
+			    void *digest, int digest_size,
+			    enum drbd_packets cmd)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = BE_DRBD_MAGIC + 0xbeef;
+	p.blksize  = cpu_to_be32(size);
+
+	p.head.magic   = BE_DRBD_MAGIC;
+	p.head.command = cpu_to_be16(cmd);
+	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size);
+
+	mutex_lock(&mdev->data.mutex);
+
+	ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
+	ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
+
+	mutex_unlock(&mdev->data.mutex);
+
+	return ok;
+}
+
+int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = BE_DRBD_MAGIC + 0xbabe;
+	p.blksize  = cpu_to_be32(size);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
+			   (struct p_header *)&p, sizeof(p));
+	return ok;
+}
+
+/* called on sndtimeo
+ * returns FALSE if we should retry,
+ * TRUE if we think connection is dead
+ */
+static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
+{
+	int drop_it;
+	/* long elapsed = (long)(jiffies - mdev->last_received); */
+
+	drop_it =   mdev->meta.socket == sock
+		|| !mdev->asender.task
+		|| get_t_state(&mdev->asender) != Running
+		|| mdev->state.conn < C_CONNECTED;
+
+	if (drop_it)
+		return TRUE;
+
+	drop_it = !--mdev->ko_count;
+	if (!drop_it) {
+		dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
+		       current->comm, current->pid, mdev->ko_count);
+		request_ping(mdev);
+	}
+
+	return drop_it; /* && (mdev->state == R_PRIMARY) */;
+}
+
+/* The idea of sendpage seems to be to put some kind of reference
+ * to the page into the skb, and to hand it over to the NIC. In
+ * this process get_page() gets called.
+ *
+ * As soon as the page was really sent over the network put_page()
+ * gets called by some part of the network layer. [ NIC driver? ]
+ *
+ * [ get_page() / put_page() increment/decrement the count. If count
+ *   reaches 0 the page will be freed. ]
+ *
+ * This works nicely with pages from FSs.
+ * But this means that in protocol A we might signal IO completion too early!
+ *
+ * In order not to corrupt data during a resync we must make sure
+ * that we do not reuse our own buffer pages (EEs) to early, therefore
+ * we have the net_ee list.
+ *
+ * XFS seems to have problems, still, it submits pages with page_count == 0!
+ * As a workaround, we disable sendpage on pages
+ * with page_count == 0 or PageSlab.
+ */
+static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
+		   int offset, size_t size)
+{
+	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+	kunmap(page);
+	if (sent == size)
+		mdev->send_cnt += size>>9;
+	return sent == size;
+}
+
+static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
+		    int offset, size_t size)
+{
+	mm_segment_t oldfs = get_fs();
+	int sent, ok;
+	int len = size;
+
+	/* e.g. XFS meta- & log-data is in slab pages, which have a
+	 * page_count of 0 and/or have PageSlab() set.
+	 * we cannot use send_page for those, as that does get_page();
+	 * put_page(); and would cause either a VM_BUG directly, or
+	 * __page_cache_release a page that would actually still be referenced
+	 * by someone, leading to some obscure delayed Oops somewhere else. */
+	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+		return _drbd_no_send_page(mdev, page, offset, size);
+
+	drbd_update_congested(mdev);
+	set_fs(KERNEL_DS);
+	do {
+		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
+							offset, len,
+							MSG_NOSIGNAL);
+		if (sent == -EAGAIN) {
+			if (we_should_drop_the_connection(mdev,
+							  mdev->data.socket))
+				break;
+			else
+				continue;
+		}
+		if (sent <= 0) {
+			dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
+			     __func__, (int)size, len, sent);
+			break;
+		}
+		len    -= sent;
+		offset += sent;
+	} while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
+	set_fs(oldfs);
+	clear_bit(NET_CONGESTED, &mdev->flags);
+
+	ok = (len == 0);
+	if (likely(ok))
+		mdev->send_cnt += size>>9;
+	return ok;
+}
+
+static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (!_drbd_no_send_page(mdev, bvec->bv_page,
+				     bvec->bv_offset, bvec->bv_len))
+			return 0;
+	}
+	return 1;
+}
+
+static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (!_drbd_send_page(mdev, bvec->bv_page,
+				     bvec->bv_offset, bvec->bv_len))
+			return 0;
+	}
+
+	return 1;
+}
+
+/* Used to send write requests
+ * R_PRIMARY -> Peer	(P_DATA)
+ */
+int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	int ok = 1;
+	struct p_data p;
+	unsigned int dp_flags = 0;
+	void *dgb;
+	int dgs;
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+	p.head.magic   = BE_DRBD_MAGIC;
+	p.head.command = cpu_to_be16(P_DATA);
+	p.head.length  =
+		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size);
+
+	p.sector   = cpu_to_be64(req->sector);
+	p.block_id = (unsigned long)req;
+	p.seq_num  = cpu_to_be32(req->seq_num =
+				 atomic_add_return(1, &mdev->packet_seq));
+	dp_flags = 0;
+
+	/* NOTE: no need to check if barriers supported here as we would
+	 *       not pass the test in make_request_common in that case
+	 */
+	if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) {
+		dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
+		/* dp_flags |= DP_HARDBARRIER; */
+	}
+	if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO))
+		dp_flags |= DP_RW_SYNC;
+	/* for now handle SYNCIO and UNPLUG
+	 * as if they still were one and the same flag */
+	if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG))
+		dp_flags |= DP_RW_SYNC;
+	if (mdev->state.conn >= C_SYNC_SOURCE &&
+	    mdev->state.conn <= C_PAUSED_SYNC_T)
+		dp_flags |= DP_MAY_SET_IN_SYNC;
+
+	p.dp_flags = cpu_to_be32(dp_flags);
+	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__);
+	set_bit(UNPLUG_REMOTE, &mdev->flags);
+	ok = (sizeof(p) ==
+		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
+	if (ok && dgs) {
+		dgb = mdev->int_dig_out;
+		drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
+		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+	}
+	if (ok) {
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+			ok = _drbd_send_bio(mdev, req->master_bio);
+		else
+			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+	}
+
+	drbd_put_data_sock(mdev);
+	return ok;
+}
+
+/* answer packet, used to send data back for read requests:
+ *  Peer       -> (diskless) R_PRIMARY   (P_DATA_REPLY)
+ *  C_SYNC_SOURCE -> C_SYNC_TARGET         (P_RS_DATA_REPLY)
+ */
+int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+		    struct drbd_epoch_entry *e)
+{
+	int ok;
+	struct p_data p;
+	void *dgb;
+	int dgs;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+	p.head.magic   = BE_DRBD_MAGIC;
+	p.head.command = cpu_to_be16(cmd);
+	p.head.length  =
+		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size);
+
+	p.sector   = cpu_to_be64(e->sector);
+	p.block_id = e->block_id;
+	/* p.seq_num  = 0;    No sequence numbers here.. */
+
+	/* Only called by our kernel thread.
+	 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
+	 * in response to admin command or module unload.
+	 */
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__);
+	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
+					sizeof(p), MSG_MORE);
+	if (ok && dgs) {
+		dgb = mdev->int_dig_out;
+		drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb);
+		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+	}
+	if (ok)
+		ok = _drbd_send_zc_bio(mdev, e->private_bio);
+
+	drbd_put_data_sock(mdev);
+	return ok;
+}
+
+/*
+  drbd_send distinguishes two cases:
+
+  Packets sent via the data socket "sock"
+  and packets sent via the meta data socket "msock"
+
+		    sock                      msock
+  -----------------+-------------------------+------------------------------
+  timeout           conf.timeout / 2          conf.timeout / 2
+  timeout action    send a ping via msock     Abort communication
+					      and close all sockets
+*/
+
+/*
+ * you must have down()ed the appropriate [m]sock_mutex elsewhere!
+ */
+int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+	      void *buf, size_t size, unsigned msg_flags)
+{
+	struct kvec iov;
+	struct msghdr msg;
+	int rv, sent = 0;
+
+	if (!sock)
+		return -1000;
+
+	/* THINK  if (signal_pending) return ... ? */
+
+	iov.iov_base = buf;
+	iov.iov_len  = size;
+
+	msg.msg_name       = NULL;
+	msg.msg_namelen    = 0;
+	msg.msg_control    = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
+
+	if (sock == mdev->data.socket) {
+		mdev->ko_count = mdev->net_conf->ko_count;
+		drbd_update_congested(mdev);
+	}
+	do {
+		/* STRANGE
+		 * tcp_sendmsg does _not_ use its size parameter at all ?
+		 *
+		 * -EAGAIN on timeout, -EINTR on signal.
+		 */
+/* THINK
+ * do we need to block DRBD_SIG if sock == &meta.socket ??
+ * otherwise wake_asender() might interrupt some send_*Ack !
+ */
+		rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
+		if (rv == -EAGAIN) {
+			if (we_should_drop_the_connection(mdev, sock))
+				break;
+			else
+				continue;
+		}
+		D_ASSERT(rv != 0);
+		if (rv == -EINTR) {
+			flush_signals(current);
+			rv = 0;
+		}
+		if (rv < 0)
+			break;
+		sent += rv;
+		iov.iov_base += rv;
+		iov.iov_len  -= rv;
+	} while (sent < size);
+
+	if (sock == mdev->data.socket)
+		clear_bit(NET_CONGESTED, &mdev->flags);
+
+	if (rv <= 0) {
+		if (rv != -EAGAIN) {
+			dev_err(DEV, "%s_sendmsg returned %d\n",
+			    sock == mdev->meta.socket ? "msock" : "sock",
+			    rv);
+			drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+		} else
+			drbd_force_state(mdev, NS(conn, C_TIMEOUT));
+	}
+
+	return sent;
+}
+
+static int drbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct drbd_conf *mdev = bdev->bd_disk->private_data;
+	unsigned long flags;
+	int rv = 0;
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	/* to have a stable mdev->state.role
+	 * and no race with updating open_cnt */
+
+	if (mdev->state.role != R_PRIMARY) {
+		if (mode & FMODE_WRITE)
+			rv = -EROFS;
+		else if (!allow_oos)
+			rv = -EMEDIUMTYPE;
+	}
+
+	if (!rv)
+		mdev->open_cnt++;
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	return rv;
+}
+
+static int drbd_release(struct gendisk *gd, fmode_t mode)
+{
+	struct drbd_conf *mdev = gd->private_data;
+	mdev->open_cnt--;
+	return 0;
+}
+
+static void drbd_unplug_fn(struct request_queue *q)
+{
+	struct drbd_conf *mdev = q->queuedata;
+
+	trace_drbd_unplug(mdev, "got unplugged");
+
+	/* unplug FIRST */
+	spin_lock_irq(q->queue_lock);
+	blk_remove_plug(q);
+	spin_unlock_irq(q->queue_lock);
+
+	/* only if connected */
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
+		D_ASSERT(mdev->state.role == R_PRIMARY);
+		if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
+			/* add to the data.work queue,
+			 * unless already queued.
+			 * XXX this might be a good addition to drbd_queue_work
+			 * anyways, to detect "double queuing" ... */
+			if (list_empty(&mdev->unplug_work.list))
+				drbd_queue_work(&mdev->data.work,
+						&mdev->unplug_work);
+		}
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (mdev->state.disk >= D_INCONSISTENT)
+		drbd_kick_lo(mdev);
+}
+
+static void drbd_set_defaults(struct drbd_conf *mdev)
+{
+	mdev->sync_conf.after      = DRBD_AFTER_DEF;
+	mdev->sync_conf.rate       = DRBD_RATE_DEF;
+	mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF;
+	mdev->state = (union drbd_state) {
+		{ .role = R_SECONDARY,
+		  .peer = R_UNKNOWN,
+		  .conn = C_STANDALONE,
+		  .disk = D_DISKLESS,
+		  .pdsk = D_UNKNOWN,
+		  .susp = 0
+		} };
+}
+
+void drbd_init_set_defaults(struct drbd_conf *mdev)
+{
+	/* the memset(,0,) did most of this.
+	 * note: only assignments, no allocation in here */
+
+	drbd_set_defaults(mdev);
+
+	/* for now, we do NOT yet support it,
+	 * even though we start some framework
+	 * to eventually support barriers */
+	set_bit(NO_BARRIER_SUPP, &mdev->flags);
+
+	atomic_set(&mdev->ap_bio_cnt, 0);
+	atomic_set(&mdev->ap_pending_cnt, 0);
+	atomic_set(&mdev->rs_pending_cnt, 0);
+	atomic_set(&mdev->unacked_cnt, 0);
+	atomic_set(&mdev->local_cnt, 0);
+	atomic_set(&mdev->net_cnt, 0);
+	atomic_set(&mdev->packet_seq, 0);
+	atomic_set(&mdev->pp_in_use, 0);
+
+	mutex_init(&mdev->md_io_mutex);
+	mutex_init(&mdev->data.mutex);
+	mutex_init(&mdev->meta.mutex);
+	sema_init(&mdev->data.work.s, 0);
+	sema_init(&mdev->meta.work.s, 0);
+	mutex_init(&mdev->state_mutex);
+
+	spin_lock_init(&mdev->data.work.q_lock);
+	spin_lock_init(&mdev->meta.work.q_lock);
+
+	spin_lock_init(&mdev->al_lock);
+	spin_lock_init(&mdev->req_lock);
+	spin_lock_init(&mdev->peer_seq_lock);
+	spin_lock_init(&mdev->epoch_lock);
+
+	INIT_LIST_HEAD(&mdev->active_ee);
+	INIT_LIST_HEAD(&mdev->sync_ee);
+	INIT_LIST_HEAD(&mdev->done_ee);
+	INIT_LIST_HEAD(&mdev->read_ee);
+	INIT_LIST_HEAD(&mdev->net_ee);
+	INIT_LIST_HEAD(&mdev->resync_reads);
+	INIT_LIST_HEAD(&mdev->data.work.q);
+	INIT_LIST_HEAD(&mdev->meta.work.q);
+	INIT_LIST_HEAD(&mdev->resync_work.list);
+	INIT_LIST_HEAD(&mdev->unplug_work.list);
+	INIT_LIST_HEAD(&mdev->md_sync_work.list);
+	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
+	mdev->resync_work.cb  = w_resync_inactive;
+	mdev->unplug_work.cb  = w_send_write_hint;
+	mdev->md_sync_work.cb = w_md_sync;
+	mdev->bm_io_work.w.cb = w_bitmap_io;
+	init_timer(&mdev->resync_timer);
+	init_timer(&mdev->md_sync_timer);
+	mdev->resync_timer.function = resync_timer_fn;
+	mdev->resync_timer.data = (unsigned long) mdev;
+	mdev->md_sync_timer.function = md_sync_timer_fn;
+	mdev->md_sync_timer.data = (unsigned long) mdev;
+
+	init_waitqueue_head(&mdev->misc_wait);
+	init_waitqueue_head(&mdev->state_wait);
+	init_waitqueue_head(&mdev->ee_wait);
+	init_waitqueue_head(&mdev->al_wait);
+	init_waitqueue_head(&mdev->seq_wait);
+
+	drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
+	drbd_thread_init(mdev, &mdev->worker, drbd_worker);
+	drbd_thread_init(mdev, &mdev->asender, drbd_asender);
+
+	mdev->agreed_pro_version = PRO_VERSION_MAX;
+	mdev->write_ordering = WO_bio_barrier;
+	mdev->resync_wenr = LC_FREE;
+}
+
+void drbd_mdev_cleanup(struct drbd_conf *mdev)
+{
+	if (mdev->receiver.t_state != None)
+		dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
+				mdev->receiver.t_state);
+
+	/* no need to lock it, I'm the only thread alive */
+	if (atomic_read(&mdev->current_epoch->epoch_size) !=  0)
+		dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
+	mdev->al_writ_cnt  =
+	mdev->bm_writ_cnt  =
+	mdev->read_cnt     =
+	mdev->recv_cnt     =
+	mdev->send_cnt     =
+	mdev->writ_cnt     =
+	mdev->p_size       =
+	mdev->rs_start     =
+	mdev->rs_total     =
+	mdev->rs_failed    =
+	mdev->rs_mark_left =
+	mdev->rs_mark_time = 0;
+	D_ASSERT(mdev->net_conf == NULL);
+
+	drbd_set_my_capacity(mdev, 0);
+	if (mdev->bitmap) {
+		/* maybe never allocated. */
+		drbd_bm_resize(mdev, 0);
+		drbd_bm_cleanup(mdev);
+	}
+
+	drbd_free_resources(mdev);
+
+	/*
+	 * currently we drbd_init_ee only on module load, so
+	 * we may do drbd_release_ee only on module unload!
+	 */
+	D_ASSERT(list_empty(&mdev->active_ee));
+	D_ASSERT(list_empty(&mdev->sync_ee));
+	D_ASSERT(list_empty(&mdev->done_ee));
+	D_ASSERT(list_empty(&mdev->read_ee));
+	D_ASSERT(list_empty(&mdev->net_ee));
+	D_ASSERT(list_empty(&mdev->resync_reads));
+	D_ASSERT(list_empty(&mdev->data.work.q));
+	D_ASSERT(list_empty(&mdev->meta.work.q));
+	D_ASSERT(list_empty(&mdev->resync_work.list));
+	D_ASSERT(list_empty(&mdev->unplug_work.list));
+
+}
+
+
+static void drbd_destroy_mempools(void)
+{
+	struct page *page;
+
+	while (drbd_pp_pool) {
+		page = drbd_pp_pool;
+		drbd_pp_pool = (struct page *)page_private(page);
+		__free_page(page);
+		drbd_pp_vacant--;
+	}
+
+	/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
+
+	if (drbd_ee_mempool)
+		mempool_destroy(drbd_ee_mempool);
+	if (drbd_request_mempool)
+		mempool_destroy(drbd_request_mempool);
+	if (drbd_ee_cache)
+		kmem_cache_destroy(drbd_ee_cache);
+	if (drbd_request_cache)
+		kmem_cache_destroy(drbd_request_cache);
+	if (drbd_bm_ext_cache)
+		kmem_cache_destroy(drbd_bm_ext_cache);
+	if (drbd_al_ext_cache)
+		kmem_cache_destroy(drbd_al_ext_cache);
+
+	drbd_ee_mempool      = NULL;
+	drbd_request_mempool = NULL;
+	drbd_ee_cache        = NULL;
+	drbd_request_cache   = NULL;
+	drbd_bm_ext_cache    = NULL;
+	drbd_al_ext_cache    = NULL;
+
+	return;
+}
+
+static int drbd_create_mempools(void)
+{
+	struct page *page;
+	const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+	int i;
+
+	/* prepare our caches and mempools */
+	drbd_request_mempool = NULL;
+	drbd_ee_cache        = NULL;
+	drbd_request_cache   = NULL;
+	drbd_bm_ext_cache    = NULL;
+	drbd_al_ext_cache    = NULL;
+	drbd_pp_pool         = NULL;
+
+	/* caches */
+	drbd_request_cache = kmem_cache_create(
+		"drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
+	if (drbd_request_cache == NULL)
+		goto Enomem;
+
+	drbd_ee_cache = kmem_cache_create(
+		"drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
+	if (drbd_ee_cache == NULL)
+		goto Enomem;
+
+	drbd_bm_ext_cache = kmem_cache_create(
+		"drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
+	if (drbd_bm_ext_cache == NULL)
+		goto Enomem;
+
+	drbd_al_ext_cache = kmem_cache_create(
+		"drbd_al", sizeof(struct lc_element), 0, 0, NULL);
+	if (drbd_al_ext_cache == NULL)
+		goto Enomem;
+
+	/* mempools */
+	drbd_request_mempool = mempool_create(number,
+		mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
+	if (drbd_request_mempool == NULL)
+		goto Enomem;
+
+	drbd_ee_mempool = mempool_create(number,
+		mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
+	if (drbd_request_mempool == NULL)
+		goto Enomem;
+
+	/* drbd's page pool */
+	spin_lock_init(&drbd_pp_lock);
+
+	for (i = 0; i < number; i++) {
+		page = alloc_page(GFP_HIGHUSER);
+		if (!page)
+			goto Enomem;
+		set_page_private(page, (unsigned long)drbd_pp_pool);
+		drbd_pp_pool = page;
+	}
+	drbd_pp_vacant = number;
+
+	return 0;
+
+Enomem:
+	drbd_destroy_mempools(); /* in case we allocated some */
+	return -ENOMEM;
+}
+
+static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
+	void *unused)
+{
+	/* just so we have it.  you never know what interesting things we
+	 * might want to do here some day...
+	 */
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block drbd_notifier = {
+	.notifier_call = drbd_notify_sys,
+};
+
+static void drbd_release_ee_lists(struct drbd_conf *mdev)
+{
+	int rr;
+
+	rr = drbd_release_ee(mdev, &mdev->active_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in active list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->sync_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in sync list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->read_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in read list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->done_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in done list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->net_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in net list found!\n", rr);
+}
+
+/* caution. no locking.
+ * currently only used from module cleanup code. */
+static void drbd_delete_device(unsigned int minor)
+{
+	struct drbd_conf *mdev = minor_to_mdev(minor);
+
+	if (!mdev)
+		return;
+
+	/* paranoia asserts */
+	if (mdev->open_cnt != 0)
+		dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
+				__FILE__ , __LINE__);
+
+	ERR_IF (!list_empty(&mdev->data.work.q)) {
+		struct list_head *lp;
+		list_for_each(lp, &mdev->data.work.q) {
+			dev_err(DEV, "lp = %p\n", lp);
+		}
+	};
+	/* end paranoia asserts */
+
+	del_gendisk(mdev->vdisk);
+
+	/* cleanup stuff that may have been allocated during
+	 * device (re-)configuration or state changes */
+
+	if (mdev->this_bdev)
+		bdput(mdev->this_bdev);
+
+	drbd_free_resources(mdev);
+
+	drbd_release_ee_lists(mdev);
+
+	/* should be free'd on disconnect? */
+	kfree(mdev->ee_hash);
+	/*
+	mdev->ee_hash_s = 0;
+	mdev->ee_hash = NULL;
+	*/
+
+	lc_destroy(mdev->act_log);
+	lc_destroy(mdev->resync);
+
+	kfree(mdev->p_uuid);
+	/* mdev->p_uuid = NULL; */
+
+	kfree(mdev->int_dig_out);
+	kfree(mdev->int_dig_in);
+	kfree(mdev->int_dig_vv);
+
+	/* cleanup the rest that has been
+	 * allocated from drbd_new_device
+	 * and actually free the mdev itself */
+	drbd_free_mdev(mdev);
+}
+
+static void drbd_cleanup(void)
+{
+	unsigned int i;
+
+	unregister_reboot_notifier(&drbd_notifier);
+
+	drbd_nl_cleanup();
+
+	if (minor_table) {
+		if (drbd_proc)
+			remove_proc_entry("drbd", NULL);
+		i = minor_count;
+		while (i--)
+			drbd_delete_device(i);
+		drbd_destroy_mempools();
+	}
+
+	kfree(minor_table);
+
+	unregister_blkdev(DRBD_MAJOR, "drbd");
+
+	printk(KERN_INFO "drbd: module cleanup done.\n");
+}
+
+/**
+ * drbd_congested() - Callback for pdflush
+ * @congested_data:	User data
+ * @bdi_bits:		Bits pdflush is currently interested in
+ *
+ * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ */
+static int drbd_congested(void *congested_data, int bdi_bits)
+{
+	struct drbd_conf *mdev = congested_data;
+	struct request_queue *q;
+	char reason = '-';
+	int r = 0;
+
+	if (!__inc_ap_bio_cond(mdev)) {
+		/* DRBD has frozen IO */
+		r = bdi_bits;
+		reason = 'd';
+		goto out;
+	}
+
+	if (get_ldev(mdev)) {
+		q = bdev_get_queue(mdev->ldev->backing_bdev);
+		r = bdi_congested(&q->backing_dev_info, bdi_bits);
+		put_ldev(mdev);
+		if (r)
+			reason = 'b';
+	}
+
+	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
+		r |= (1 << BDI_async_congested);
+		reason = reason == 'b' ? 'a' : 'n';
+	}
+
+out:
+	mdev->congestion_reason = reason;
+	return r;
+}
+
+struct drbd_conf *drbd_new_device(unsigned int minor)
+{
+	struct drbd_conf *mdev;
+	struct gendisk *disk;
+	struct request_queue *q;
+
+	/* GFP_KERNEL, we are outside of all write-out paths */
+	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
+	if (!mdev)
+		return NULL;
+	if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
+		goto out_no_cpumask;
+
+	mdev->minor = minor;
+
+	drbd_init_set_defaults(mdev);
+
+	q = blk_alloc_queue(GFP_KERNEL);
+	if (!q)
+		goto out_no_q;
+	mdev->rq_queue = q;
+	q->queuedata   = mdev;
+	blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+
+	disk = alloc_disk(1);
+	if (!disk)
+		goto out_no_disk;
+	mdev->vdisk = disk;
+
+	set_disk_ro(disk, TRUE);
+
+	disk->queue = q;
+	disk->major = DRBD_MAJOR;
+	disk->first_minor = minor;
+	disk->fops = &drbd_ops;
+	sprintf(disk->disk_name, "drbd%d", minor);
+	disk->private_data = mdev;
+
+	mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
+	/* we have no partitions. we contain only ourselves. */
+	mdev->this_bdev->bd_contains = mdev->this_bdev;
+
+	q->backing_dev_info.congested_fn = drbd_congested;
+	q->backing_dev_info.congested_data = mdev;
+
+	blk_queue_make_request(q, drbd_make_request_26);
+	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
+	blk_queue_merge_bvec(q, drbd_merge_bvec);
+	q->queue_lock = &mdev->req_lock; /* needed since we use */
+		/* plugging on a queue, that actually has no requests! */
+	q->unplug_fn = drbd_unplug_fn;
+
+	mdev->md_io_page = alloc_page(GFP_KERNEL);
+	if (!mdev->md_io_page)
+		goto out_no_io_page;
+
+	if (drbd_bm_init(mdev))
+		goto out_no_bitmap;
+	/* no need to lock access, we are still initializing this minor device. */
+	if (!tl_init(mdev))
+		goto out_no_tl;
+
+	mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
+	if (!mdev->app_reads_hash)
+		goto out_no_app_reads;
+
+	mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+	if (!mdev->current_epoch)
+		goto out_no_epoch;
+
+	INIT_LIST_HEAD(&mdev->current_epoch->list);
+	mdev->epochs = 1;
+
+	return mdev;
+
+/* out_whatever_else:
+	kfree(mdev->current_epoch); */
+out_no_epoch:
+	kfree(mdev->app_reads_hash);
+out_no_app_reads:
+	tl_cleanup(mdev);
+out_no_tl:
+	drbd_bm_cleanup(mdev);
+out_no_bitmap:
+	__free_page(mdev->md_io_page);
+out_no_io_page:
+	put_disk(disk);
+out_no_disk:
+	blk_cleanup_queue(q);
+out_no_q:
+	free_cpumask_var(mdev->cpu_mask);
+out_no_cpumask:
+	kfree(mdev);
+	return NULL;
+}
+
+/* counterpart of drbd_new_device.
+ * last part of drbd_delete_device. */
+void drbd_free_mdev(struct drbd_conf *mdev)
+{
+	kfree(mdev->current_epoch);
+	kfree(mdev->app_reads_hash);
+	tl_cleanup(mdev);
+	if (mdev->bitmap) /* should no longer be there. */
+		drbd_bm_cleanup(mdev);
+	__free_page(mdev->md_io_page);
+	put_disk(mdev->vdisk);
+	blk_cleanup_queue(mdev->rq_queue);
+	free_cpumask_var(mdev->cpu_mask);
+	kfree(mdev);
+}
+
+
+int __init drbd_init(void)
+{
+	int err;
+
+	if (sizeof(struct p_handshake) != 80) {
+		printk(KERN_ERR
+		       "drbd: never change the size or layout "
+		       "of the HandShake packet.\n");
+		return -EINVAL;
+	}
+
+	if (1 > minor_count || minor_count > 255) {
+		printk(KERN_ERR
+			"drbd: invalid minor_count (%d)\n", minor_count);
+#ifdef MODULE
+		return -EINVAL;
+#else
+		minor_count = 8;
+#endif
+	}
+
+	err = drbd_nl_init();
+	if (err)
+		return err;
+
+	err = register_blkdev(DRBD_MAJOR, "drbd");
+	if (err) {
+		printk(KERN_ERR
+		       "drbd: unable to register block device major %d\n",
+		       DRBD_MAJOR);
+		return err;
+	}
+
+	register_reboot_notifier(&drbd_notifier);
+
+	/*
+	 * allocate all necessary structs
+	 */
+	err = -ENOMEM;
+
+	init_waitqueue_head(&drbd_pp_wait);
+
+	drbd_proc = NULL; /* play safe for drbd_cleanup */
+	minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
+				GFP_KERNEL);
+	if (!minor_table)
+		goto Enomem;
+
+	err = drbd_create_mempools();
+	if (err)
+		goto Enomem;
+
+	drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops);
+	if (!drbd_proc)	{
+		printk(KERN_ERR "drbd: unable to register proc file\n");
+		goto Enomem;
+	}
+
+	rwlock_init(&global_state_lock);
+
+	printk(KERN_INFO "drbd: initialized. "
+	       "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
+	       API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
+	printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
+	printk(KERN_INFO "drbd: registered as block device major %d\n",
+		DRBD_MAJOR);
+	printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
+
+	return 0; /* Success! */
+
+Enomem:
+	drbd_cleanup();
+	if (err == -ENOMEM)
+		/* currently always the case */
+		printk(KERN_ERR "drbd: ran out of memory\n");
+	else
+		printk(KERN_ERR "drbd: initialization failure\n");
+	return err;
+}
+
+void drbd_free_bc(struct drbd_backing_dev *ldev)
+{
+	if (ldev == NULL)
+		return;
+
+	bd_release(ldev->backing_bdev);
+	bd_release(ldev->md_bdev);
+
+	fput(ldev->lo_file);
+	fput(ldev->md_file);
+
+	kfree(ldev);
+}
+
+void drbd_free_sock(struct drbd_conf *mdev)
+{
+	if (mdev->data.socket) {
+		kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
+		sock_release(mdev->data.socket);
+		mdev->data.socket = NULL;
+	}
+	if (mdev->meta.socket) {
+		kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
+		sock_release(mdev->meta.socket);
+		mdev->meta.socket = NULL;
+	}
+}
+
+
+void drbd_free_resources(struct drbd_conf *mdev)
+{
+	crypto_free_hash(mdev->csums_tfm);
+	mdev->csums_tfm = NULL;
+	crypto_free_hash(mdev->verify_tfm);
+	mdev->verify_tfm = NULL;
+	crypto_free_hash(mdev->cram_hmac_tfm);
+	mdev->cram_hmac_tfm = NULL;
+	crypto_free_hash(mdev->integrity_w_tfm);
+	mdev->integrity_w_tfm = NULL;
+	crypto_free_hash(mdev->integrity_r_tfm);
+	mdev->integrity_r_tfm = NULL;
+
+	drbd_free_sock(mdev);
+
+	__no_warn(local,
+		  drbd_free_bc(mdev->ldev);
+		  mdev->ldev = NULL;);
+}
+
+/* meta data management */
+
+struct meta_data_on_disk {
+	u64 la_size;           /* last agreed size. */
+	u64 uuid[UI_SIZE];   /* UUIDs. */
+	u64 device_uuid;
+	u64 reserved_u64_1;
+	u32 flags;             /* MDF */
+	u32 magic;
+	u32 md_size_sect;
+	u32 al_offset;         /* offset to this block */
+	u32 al_nr_extents;     /* important for restoring the AL */
+	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	u32 bm_offset;         /* offset to the bitmap, from here */
+	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
+	u32 reserved_u32[4];
+
+} __packed;
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+	sector_t sector;
+	int i;
+
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+	del_timer(&mdev->md_sync_timer);
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	trace_drbd_md_io(mdev, WRITE, mdev->ldev);
+
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	memset(buffer, 0, 512);
+
+	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
+	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+
+	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
+	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
+	buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
+	buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
+	buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
+
+	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+
+	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
+	sector = mdev->ldev->md.md_offset;
+
+	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		clear_bit(MD_DIRTY, &mdev->flags);
+	} else {
+		/* this was a try anyways ... */
+		dev_err(DEV, "meta data update failed!\n");
+
+		drbd_chk_io_error(mdev, 1, TRUE);
+	}
+
+	/* Update mdev->ldev->md.la_size_sect,
+	 * since we updated it on metadata. */
+	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
+
+	mutex_unlock(&mdev->md_io_mutex);
+	put_ldev(mdev);
+}
+
+/**
+ * drbd_md_read() - Reads in the meta data super block
+ * @mdev:	DRBD device.
+ * @bdev:	Device from which the meta data should be read in.
+ *
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ */
+int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	struct meta_data_on_disk *buffer;
+	int i, rv = NO_ERROR;
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING))
+		return ERR_IO_MD_DISK;
+
+	trace_drbd_md_io(mdev, READ, bdev);
+
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+
+	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
+		/* NOTE: cant do normal error processing here as this is
+		   called BEFORE disk is attached */
+		dev_err(DEV, "Error while reading metadata.\n");
+		rv = ERR_IO_MD_DISK;
+		goto err;
+	}
+
+	if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
+		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
+		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
+		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
+		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
+		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
+		dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
+		    be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+
+	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
+		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
+		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+
+	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
+	bdev->md.flags = be32_to_cpu(buffer->flags);
+	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
+	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
+
+	if (mdev->sync_conf.al_extents < 7)
+		mdev->sync_conf.al_extents = 127;
+
+ err:
+	mutex_unlock(&mdev->md_io_mutex);
+	put_ldev(mdev);
+
+	return rv;
+}
+
+/**
+ * drbd_md_mark_dirty() - Mark meta data super block as dirty
+ * @mdev:	DRBD device.
+ *
+ * Call this function if you change anything that should be written to
+ * the meta-data super block. This function sets MD_DIRTY, and starts a
+ * timer that ensures that within five seconds you have to call drbd_md_sync().
+ */
+void drbd_md_mark_dirty(struct drbd_conf *mdev)
+{
+	set_bit(MD_DIRTY, &mdev->flags);
+	mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
+}
+
+
+static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+{
+	int i;
+
+	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
+
+		trace_drbd_uuid(mdev, i+1);
+	}
+}
+
+void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+	if (idx == UI_CURRENT) {
+		if (mdev->state.role == R_PRIMARY)
+			val |= 1;
+		else
+			val &= ~((u64)1);
+
+		drbd_set_ed_uuid(mdev, val);
+	}
+
+	mdev->ldev->md.uuid[idx] = val;
+	trace_drbd_uuid(mdev, idx);
+	drbd_md_mark_dirty(mdev);
+}
+
+
+void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+	if (mdev->ldev->md.uuid[idx]) {
+		drbd_uuid_move_history(mdev);
+		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
+		trace_drbd_uuid(mdev, UI_HISTORY_START);
+	}
+	_drbd_uuid_set(mdev, idx, val);
+}
+
+/**
+ * drbd_uuid_new_current() - Creates a new current UUID
+ * @mdev:	DRBD device.
+ *
+ * Creates a new current UUID, and rotates the old current UUID into
+ * the bitmap slot. Causes an incremental resync upon next connect.
+ */
+void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
+{
+	u64 val;
+
+	dev_info(DEV, "Creating new current UUID\n");
+	D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
+	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
+	trace_drbd_uuid(mdev, UI_BITMAP);
+
+	get_random_bytes(&val, sizeof(u64));
+	_drbd_uuid_set(mdev, UI_CURRENT, val);
+}
+
+void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
+{
+	if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
+		return;
+
+	if (val == 0) {
+		drbd_uuid_move_history(mdev);
+		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+		mdev->ldev->md.uuid[UI_BITMAP] = 0;
+		trace_drbd_uuid(mdev, UI_HISTORY_START);
+		trace_drbd_uuid(mdev, UI_BITMAP);
+	} else {
+		if (mdev->ldev->md.uuid[UI_BITMAP])
+			dev_warn(DEV, "bm UUID already set");
+
+		mdev->ldev->md.uuid[UI_BITMAP] = val;
+		mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
+
+		trace_drbd_uuid(mdev, UI_BITMAP);
+	}
+	drbd_md_mark_dirty(mdev);
+}
+
+/**
+ * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev:	DRBD device.
+ *
+ * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_set_n_write(struct drbd_conf *mdev)
+{
+	int rv = -EIO;
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+		drbd_md_sync(mdev);
+		drbd_bm_set_all(mdev);
+
+		rv = drbd_bm_write(mdev);
+
+		if (!rv) {
+			drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+			drbd_md_sync(mdev);
+		}
+
+		put_ldev(mdev);
+	}
+
+	return rv;
+}
+
+/**
+ * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev:	DRBD device.
+ *
+ * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
+{
+	int rv = -EIO;
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		drbd_bm_clear_all(mdev);
+		rv = drbd_bm_write(mdev);
+		put_ldev(mdev);
+	}
+
+	return rv;
+}
+
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
+	int rv;
+
+	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
+
+	drbd_bm_lock(mdev, work->why);
+	rv = work->io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	clear_bit(BITMAP_IO, &mdev->flags);
+	wake_up(&mdev->misc_wait);
+
+	if (work->done)
+		work->done(mdev, rv);
+
+	clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
+	work->why = NULL;
+
+	return 1;
+}
+
+/**
+ * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
+ * @mdev:	DRBD device.
+ * @io_fn:	IO callback to be called when bitmap IO is possible
+ * @done:	callback to be called after the bitmap IO was performed
+ * @why:	Descriptive text of the reason for doing the IO
+ *
+ * While IO on the bitmap happens we freeze application IO thus we ensure
+ * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
+ * called from worker context. It MUST NOT be used while a previous such
+ * work is still pending!
+ */
+void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+			  int (*io_fn)(struct drbd_conf *),
+			  void (*done)(struct drbd_conf *, int),
+			  char *why)
+{
+	D_ASSERT(current == mdev->worker.task);
+
+	D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
+	D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
+	D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
+	if (mdev->bm_io_work.why)
+		dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
+			why, mdev->bm_io_work.why);
+
+	mdev->bm_io_work.io_fn = io_fn;
+	mdev->bm_io_work.done = done;
+	mdev->bm_io_work.why = why;
+
+	set_bit(BITMAP_IO, &mdev->flags);
+	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
+		if (list_empty(&mdev->bm_io_work.w.list)) {
+			set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+		} else
+			dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
+	}
+}
+
+/**
+ * drbd_bitmap_io() -  Does an IO operation on the whole bitmap
+ * @mdev:	DRBD device.
+ * @io_fn:	IO callback to be called when bitmap IO is possible
+ * @why:	Descriptive text of the reason for doing the IO
+ *
+ * freezes application IO while that the actual IO operations runs. This
+ * functions MAY NOT be called from worker context.
+ */
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+{
+	int rv;
+
+	D_ASSERT(current != mdev->worker.task);
+
+	drbd_suspend_io(mdev);
+
+	drbd_bm_lock(mdev, why);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+	if ((mdev->ldev->md.flags & flag) != flag) {
+		drbd_md_mark_dirty(mdev);
+		mdev->ldev->md.flags |= flag;
+	}
+}
+
+void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+	if ((mdev->ldev->md.flags & flag) != 0) {
+		drbd_md_mark_dirty(mdev);
+		mdev->ldev->md.flags &= ~flag;
+	}
+}
+int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
+{
+	return (bdev->md.flags & flag) != 0;
+}
+
+static void md_sync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
+}
+
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+	drbd_md_sync(mdev);
+
+	return 1;
+}
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+/* Fault insertion support including random number generator shamelessly
+ * stolen from kernel/rcutorture.c */
+struct fault_random_state {
+	unsigned long state;
+	unsigned long count;
+};
+
+#define FAULT_RANDOM_MULT 39916801  /* prime */
+#define FAULT_RANDOM_ADD	479001701 /* prime */
+#define FAULT_RANDOM_REFRESH 10000
+
+/*
+ * Crude but fast random-number generator.  Uses a linear congruential
+ * generator, with occasional help from get_random_bytes().
+ */
+static unsigned long
+_drbd_fault_random(struct fault_random_state *rsp)
+{
+	long refresh;
+
+	if (--rsp->count < 0) {
+		get_random_bytes(&refresh, sizeof(refresh));
+		rsp->state += refresh;
+		rsp->count = FAULT_RANDOM_REFRESH;
+	}
+	rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
+	return swahw32(rsp->state);
+}
+
+static char *
+_drbd_fault_str(unsigned int type) {
+	static char *_faults[] = {
+		[DRBD_FAULT_MD_WR] = "Meta-data write",
+		[DRBD_FAULT_MD_RD] = "Meta-data read",
+		[DRBD_FAULT_RS_WR] = "Resync write",
+		[DRBD_FAULT_RS_RD] = "Resync read",
+		[DRBD_FAULT_DT_WR] = "Data write",
+		[DRBD_FAULT_DT_RD] = "Data read",
+		[DRBD_FAULT_DT_RA] = "Data read ahead",
+		[DRBD_FAULT_BM_ALLOC] = "BM allocation",
+		[DRBD_FAULT_AL_EE] = "EE allocation"
+	};
+
+	return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
+}
+
+unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
+{
+	static struct fault_random_state rrs = {0, 0};
+
+	unsigned int ret = (
+		(fault_devs == 0 ||
+			((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
+		(((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+
+	if (ret) {
+		fault_count++;
+
+		if (printk_ratelimit())
+			dev_warn(DEV, "***Simulating %s failure\n",
+				_drbd_fault_str(type));
+	}
+
+	return ret;
+}
+#endif
+
+const char *drbd_buildtag(void)
+{
+	/* DRBD built from external sources has here a reference to the
+	   git hash of the source code. */
+
+	static char buildtag[38] = "\0uilt-in";
+
+	if (buildtag[0] == 0) {
+#ifdef CONFIG_MODULES
+		if (THIS_MODULE != NULL)
+			sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
+		else
+#endif
+			buildtag[0] = 'b';
+	}
+
+	return buildtag;
+}
+
+module_init(drbd_init)
+module_exit(drbd_cleanup)
+
+/* For drbd_tracing: */
+EXPORT_SYMBOL(drbd_conn_str);
+EXPORT_SYMBOL(drbd_role_str);
+EXPORT_SYMBOL(drbd_disk_str);
+EXPORT_SYMBOL(drbd_set_st_err_str);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
new file mode 100644
index 000000000000..1927acefe230
--- /dev/null
+++ b/drivers/block/drbd/drbd_nl.c
@@ -0,0 +1,2365 @@
+/*
+   drbd_nl.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/connector.h>
+#include <linux/blkpg.h>
+#include <linux/cpumask.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include "drbd_wrappers.h"
+#include <asm/unaligned.h>
+#include <linux/drbd_tag_magic.h>
+#include <linux/drbd_limits.h>
+
+static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
+static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
+static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
+
+/* see get_sb_bdev and bd_claim */
+static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
+
+/* Generate the tag_list to struct functions */
+#define NL_PACKET(name, number, fields) \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+	unsigned short *tags, struct name *arg) \
+{ \
+	int tag; \
+	int dlen; \
+	\
+	while ((tag = get_unaligned(tags++)) != TT_END) {	\
+		dlen = get_unaligned(tags++);			\
+		switch (tag_number(tag)) { \
+		fields \
+		default: \
+			if (tag & T_MANDATORY) { \
+				dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
+				return 0; \
+			} \
+		} \
+		tags = (unsigned short *)((char *)tags + dlen); \
+	} \
+	return 1; \
+}
+#define NL_INTEGER(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
+		arg->member = get_unaligned((int *)(tags));	\
+		break;
+#define NL_INT64(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
+		arg->member = get_unaligned((u64 *)(tags));	\
+		break;
+#define NL_BIT(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
+		arg->member = *(char *)(tags) ? 1 : 0; \
+		break;
+#define NL_STRING(pn, pr, member, len) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
+		if (dlen > len) { \
+			dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
+				#member, dlen, (unsigned int)len); \
+			return 0; \
+		} \
+		 arg->member ## _len = dlen; \
+		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
+		 break;
+#include "linux/drbd_nl.h"
+
+/* Generate the struct to tag_list functions */
+#define NL_PACKET(name, number, fields) \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+	struct name *arg, unsigned short *tags) \
+{ \
+	fields \
+	return tags; \
+}
+
+#define NL_INTEGER(pn, pr, member) \
+	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
+	put_unaligned(sizeof(int), tags++);		\
+	put_unaligned(arg->member, (int *)tags);	\
+	tags = (unsigned short *)((char *)tags+sizeof(int));
+#define NL_INT64(pn, pr, member) \
+	put_unaligned(pn | pr | TT_INT64, tags++);	\
+	put_unaligned(sizeof(u64), tags++);		\
+	put_unaligned(arg->member, (u64 *)tags);	\
+	tags = (unsigned short *)((char *)tags+sizeof(u64));
+#define NL_BIT(pn, pr, member) \
+	put_unaligned(pn | pr | TT_BIT, tags++);	\
+	put_unaligned(sizeof(char), tags++);		\
+	*(char *)tags = arg->member; \
+	tags = (unsigned short *)((char *)tags+sizeof(char));
+#define NL_STRING(pn, pr, member, len) \
+	put_unaligned(pn | pr | TT_STRING, tags++);	\
+	put_unaligned(arg->member ## _len, tags++);	\
+	memcpy(tags, arg->member, arg->member ## _len); \
+	tags = (unsigned short *)((char *)tags + arg->member ## _len);
+#include "linux/drbd_nl.h"
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
+void drbd_nl_send_reply(struct cn_msg *, int);
+
+int drbd_khelper(struct drbd_conf *mdev, char *cmd)
+{
+	char *envp[] = { "HOME=/",
+			"TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			NULL, /* Will be set to address family */
+			NULL, /* Will be set to address */
+			NULL };
+
+	char mb[12], af[20], ad[60], *afs;
+	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	int ret;
+
+	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
+
+	if (get_net_conf(mdev)) {
+		switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
+		case AF_INET6:
+			afs = "ipv6";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
+				 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
+			break;
+		case AF_INET:
+			afs = "ipv4";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+			break;
+		default:
+			afs = "ssocks";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+		}
+		snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
+		envp[3]=af;
+		envp[4]=ad;
+		put_net_conf(mdev);
+	}
+
+	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+
+	drbd_bcast_ev_helper(mdev, cmd);
+	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
+	if (ret)
+		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+				usermode_helper, cmd, mb,
+				(ret >> 8) & 0xff, ret);
+	else
+		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+				usermode_helper, cmd, mb,
+				(ret >> 8) & 0xff, ret);
+
+	if (ret < 0) /* Ignore any ERRNOs we got. */
+		ret = 0;
+
+	return ret;
+}
+
+enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+{
+	char *ex_to_string;
+	int r;
+	enum drbd_disk_state nps;
+	enum drbd_fencing_p fp;
+
+	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+
+	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	} else {
+		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
+		return mdev->state.pdsk;
+	}
+
+	if (fp == FP_STONITH)
+		_drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
+
+	r = drbd_khelper(mdev, "fence-peer");
+
+	switch ((r>>8) & 0xff) {
+	case 3: /* peer is inconsistent */
+		ex_to_string = "peer is inconsistent or worse";
+		nps = D_INCONSISTENT;
+		break;
+	case 4: /* peer got outdated, or was already outdated */
+		ex_to_string = "peer was fenced";
+		nps = D_OUTDATED;
+		break;
+	case 5: /* peer was down */
+		if (mdev->state.disk == D_UP_TO_DATE) {
+			/* we will(have) create(d) a new UUID anyways... */
+			ex_to_string = "peer is unreachable, assumed to be dead";
+			nps = D_OUTDATED;
+		} else {
+			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
+			nps = mdev->state.pdsk;
+		}
+		break;
+	case 6: /* Peer is primary, voluntarily outdate myself.
+		 * This is useful when an unconnected R_SECONDARY is asked to
+		 * become R_PRIMARY, but finds the other peer being active. */
+		ex_to_string = "peer is active";
+		dev_warn(DEV, "Peer is primary, outdating myself.\n");
+		nps = D_UNKNOWN;
+		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		break;
+	case 7:
+		if (fp != FP_STONITH)
+			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+		ex_to_string = "peer was stonithed";
+		nps = D_OUTDATED;
+		break;
+	default:
+		/* The script is broken ... */
+		nps = D_UNKNOWN;
+		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return nps;
+	}
+
+	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
+			(r>>8) & 0xff, ex_to_string);
+	return nps;
+}
+
+
+int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
+{
+	const int max_tries = 4;
+	int r = 0;
+	int try = 0;
+	int forced = 0;
+	union drbd_state mask, val;
+	enum drbd_disk_state nps;
+
+	if (new_role == R_PRIMARY)
+		request_ping(mdev); /* Detect a dead peer ASAP */
+
+	mutex_lock(&mdev->state_mutex);
+
+	mask.i = 0; mask.role = R_MASK;
+	val.i  = 0; val.role  = new_role;
+
+	while (try++ < max_tries) {
+		r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
+
+		/* in case we first succeeded to outdate,
+		 * but now suddenly could establish a connection */
+		if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
+			val.pdsk = 0;
+			mask.pdsk = 0;
+			continue;
+		}
+
+		if (r == SS_NO_UP_TO_DATE_DISK && force &&
+		    (mdev->state.disk == D_INCONSISTENT ||
+		     mdev->state.disk == D_OUTDATED)) {
+			mask.disk = D_MASK;
+			val.disk  = D_UP_TO_DATE;
+			forced = 1;
+			continue;
+		}
+
+		if (r == SS_NO_UP_TO_DATE_DISK &&
+		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
+			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+			nps = drbd_try_outdate_peer(mdev);
+
+			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+				val.disk = D_UP_TO_DATE;
+				mask.disk = D_MASK;
+			}
+
+			val.pdsk = nps;
+			mask.pdsk = D_MASK;
+
+			continue;
+		}
+
+		if (r == SS_NOTHING_TO_DO)
+			goto fail;
+		if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
+			nps = drbd_try_outdate_peer(mdev);
+
+			if (force && nps > D_OUTDATED) {
+				dev_warn(DEV, "Forced into split brain situation!\n");
+				nps = D_OUTDATED;
+			}
+
+			mask.pdsk = D_MASK;
+			val.pdsk  = nps;
+
+			continue;
+		}
+		if (r == SS_TWO_PRIMARIES) {
+			/* Maybe the peer is detected as dead very soon...
+			   retry at most once more in this case. */
+			__set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
+			if (try < max_tries)
+				try = max_tries - 1;
+			continue;
+		}
+		if (r < SS_SUCCESS) {
+			r = _drbd_request_state(mdev, mask, val,
+						CS_VERBOSE + CS_WAIT_COMPLETE);
+			if (r < SS_SUCCESS)
+				goto fail;
+		}
+		break;
+	}
+
+	if (r < SS_SUCCESS)
+		goto fail;
+
+	if (forced)
+		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
+
+	/* Wait until nothing is on the fly :) */
+	wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
+
+	if (new_role == R_SECONDARY) {
+		set_disk_ro(mdev->vdisk, TRUE);
+		if (get_ldev(mdev)) {
+			mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+			put_ldev(mdev);
+		}
+	} else {
+		if (get_net_conf(mdev)) {
+			mdev->net_conf->want_lose = 0;
+			put_net_conf(mdev);
+		}
+		set_disk_ro(mdev->vdisk, FALSE);
+		if (get_ldev(mdev)) {
+			if (((mdev->state.conn < C_CONNECTED ||
+			       mdev->state.pdsk <= D_FAILED)
+			      && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
+				drbd_uuid_new_current(mdev);
+
+			mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
+			put_ldev(mdev);
+		}
+	}
+
+	if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
+		drbd_al_to_on_disk_bm(mdev);
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
+		/* if this was forced, we should consider sync */
+		if (forced)
+			drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+
+	drbd_md_sync(mdev);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+ fail:
+	mutex_unlock(&mdev->state_mutex);
+	return r;
+}
+
+
+static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	struct primary primary_args;
+
+	memset(&primary_args, 0, sizeof(struct primary));
+	if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code =
+		drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer);
+
+	return 0;
+}
+
+static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+
+	return 0;
+}
+
+/* initializes the md.*_offset members, so we are able to find
+ * the on disk meta data */
+static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
+				       struct drbd_backing_dev *bdev)
+{
+	sector_t md_size_sect = 0;
+	switch (bdev->dc.meta_dev_idx) {
+	default:
+		/* v07 style fixed size indexed meta data */
+		bdev->md.md_size_sect = MD_RESERVED_SECT;
+		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+		bdev->md.al_offset = MD_AL_OFFSET;
+		bdev->md.bm_offset = MD_BM_OFFSET;
+		break;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		/* just occupy the full device; unit: sectors */
+		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
+		bdev->md.md_offset = 0;
+		bdev->md.al_offset = MD_AL_OFFSET;
+		bdev->md.bm_offset = MD_BM_OFFSET;
+		break;
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+		/* al size is still fixed */
+		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		/* we need (slightly less than) ~ this much bitmap sectors: */
+		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
+		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
+		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
+		md_size_sect = ALIGN(md_size_sect, 8);
+
+		/* plus the "drbd meta data super block",
+		 * and the activity log; */
+		md_size_sect += MD_BM_OFFSET;
+
+		bdev->md.md_size_sect = md_size_sect;
+		/* bitmap offset is adjusted by 'super' block size */
+		bdev->md.bm_offset   = -md_size_sect + MD_AL_OFFSET;
+		break;
+	}
+}
+
+char *ppsize(char *buf, unsigned long long size)
+{
+	/* Needs 9 bytes at max. */
+	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
+	int base = 0;
+	while (size >= 10000) {
+		/* shift + round */
+		size = (size >> 10) + !!(size & (1<<9));
+		base++;
+	}
+	sprintf(buf, "%lu %cB", (long)size, units[base]);
+
+	return buf;
+}
+
+/* there is still a theoretical deadlock when called from receiver
+ * on an D_INCONSISTENT R_PRIMARY:
+ *  remote READ does inc_ap_bio, receiver would need to receive answer
+ *  packet from remote to dec_ap_bio again.
+ *  receiver receive_sizes(), comes here,
+ *  waits for ap_bio_cnt == 0. -> deadlock.
+ * but this cannot happen, actually, because:
+ *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
+ *  (not connected, or bad/no disk on peer):
+ *  see drbd_fail_request_early, ap_bio_cnt is zero.
+ *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
+ *  peer may not initiate a resize.
+ */
+void drbd_suspend_io(struct drbd_conf *mdev)
+{
+	set_bit(SUSPEND_IO, &mdev->flags);
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+}
+
+void drbd_resume_io(struct drbd_conf *mdev)
+{
+	clear_bit(SUSPEND_IO, &mdev->flags);
+	wake_up(&mdev->misc_wait);
+}
+
+/**
+ * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
+ * @mdev:	DRBD device.
+ *
+ * Returns 0 on success, negative return values indicate errors.
+ * You should call drbd_md_sync() after calling this function.
+ */
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+{
+	sector_t prev_first_sect, prev_size; /* previous meta location */
+	sector_t la_size;
+	sector_t size;
+	char ppb[10];
+
+	int md_moved, la_size_changed;
+	enum determine_dev_size rv = unchanged;
+
+	/* race:
+	 * application request passes inc_ap_bio,
+	 * but then cannot get an AL-reference.
+	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+	 *
+	 * to avoid that:
+	 * Suspend IO right here.
+	 * still lock the act_log to not trigger ASSERTs there.
+	 */
+	drbd_suspend_io(mdev);
+
+	/* no wait necessary anymore, actually we could assert that */
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	prev_first_sect = drbd_md_first_sector(mdev->ldev);
+	prev_size = mdev->ldev->md.md_size_sect;
+	la_size = mdev->ldev->md.la_size_sect;
+
+	/* TODO: should only be some assert here, not (re)init... */
+	drbd_md_set_sector_offsets(mdev, mdev->ldev);
+
+	size = drbd_new_dev_size(mdev, mdev->ldev);
+
+	if (drbd_get_capacity(mdev->this_bdev) != size ||
+	    drbd_bm_capacity(mdev) != size) {
+		int err;
+		err = drbd_bm_resize(mdev, size);
+		if (unlikely(err)) {
+			/* currently there is only one error: ENOMEM! */
+			size = drbd_bm_capacity(mdev)>>1;
+			if (size == 0) {
+				dev_err(DEV, "OUT OF MEMORY! "
+				    "Could not allocate bitmap!\n");
+			} else {
+				dev_err(DEV, "BM resizing failed. "
+				    "Leaving size unchanged at size = %lu KB\n",
+				    (unsigned long)size);
+			}
+			rv = dev_size_error;
+		}
+		/* racy, see comments above. */
+		drbd_set_my_capacity(mdev, size);
+		mdev->ldev->md.la_size_sect = size;
+		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
+		     (unsigned long long)size>>1);
+	}
+	if (rv == dev_size_error)
+		goto out;
+
+	la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
+
+	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
+		|| prev_size	   != mdev->ldev->md.md_size_sect;
+
+	if (la_size_changed || md_moved) {
+		drbd_al_shrink(mdev); /* All extents inactive. */
+		dev_info(DEV, "Writing the whole bitmap, %s\n",
+			 la_size_changed && md_moved ? "size changed and md moved" :
+			 la_size_changed ? "size changed" : "md moved");
+		rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
+		drbd_md_mark_dirty(mdev);
+	}
+
+	if (size > la_size)
+		rv = grew;
+	if (size < la_size)
+		rv = shrunk;
+out:
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+sector_t
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	sector_t p_size = mdev->p_size;   /* partner's disk size. */
+	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
+	sector_t m_size; /* my size */
+	sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
+	sector_t size = 0;
+
+	m_size = drbd_get_max_capacity(bdev);
+
+	if (p_size && m_size) {
+		size = min_t(sector_t, p_size, m_size);
+	} else {
+		if (la_size) {
+			size = la_size;
+			if (m_size && m_size < size)
+				size = m_size;
+			if (p_size && p_size < size)
+				size = p_size;
+		} else {
+			if (m_size)
+				size = m_size;
+			if (p_size)
+				size = p_size;
+		}
+	}
+
+	if (size == 0)
+		dev_err(DEV, "Both nodes diskless!\n");
+
+	if (u_size) {
+		if (u_size > size)
+			dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
+			    (unsigned long)u_size>>1, (unsigned long)size>>1);
+		else
+			size = u_size;
+	}
+
+	return size;
+}
+
+/**
+ * drbd_check_al_size() - Ensures that the AL is of the right size
+ * @mdev:	DRBD device.
+ *
+ * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
+ * failed, and 0 on success. You should call drbd_md_sync() after you called
+ * this function.
+ */
+static int drbd_check_al_size(struct drbd_conf *mdev)
+{
+	struct lru_cache *n, *t;
+	struct lc_element *e;
+	unsigned int in_use;
+	int i;
+
+	ERR_IF(mdev->sync_conf.al_extents < 7)
+		mdev->sync_conf.al_extents = 127;
+
+	if (mdev->act_log &&
+	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+		return 0;
+
+	in_use = 0;
+	t = mdev->act_log;
+	n = lc_create("act_log", drbd_al_ext_cache,
+		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+
+	if (n == NULL) {
+		dev_err(DEV, "Cannot allocate act_log lru!\n");
+		return -ENOMEM;
+	}
+	spin_lock_irq(&mdev->al_lock);
+	if (t) {
+		for (i = 0; i < t->nr_elements; i++) {
+			e = lc_element_by_index(t, i);
+			if (e->refcnt)
+				dev_err(DEV, "refcnt(%d)==%d\n",
+				    e->lc_number, e->refcnt);
+			in_use += e->refcnt;
+		}
+	}
+	if (!in_use)
+		mdev->act_log = n;
+	spin_unlock_irq(&mdev->al_lock);
+	if (in_use) {
+		dev_err(DEV, "Activity log still in use!\n");
+		lc_destroy(n);
+		return -EBUSY;
+	} else {
+		if (t)
+			lc_destroy(t);
+	}
+	drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
+	return 0;
+}
+
+void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
+{
+	struct request_queue * const q = mdev->rq_queue;
+	struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+	int max_segments = mdev->ldev->dc.max_bio_bvecs;
+
+	if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv)
+		max_seg_s = PAGE_SIZE;
+
+	max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
+
+	blk_queue_max_sectors(q, max_seg_s >> 9);
+	blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS);
+	blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS);
+	blk_queue_max_segment_size(q, max_seg_s);
+	blk_queue_logical_block_size(q, 512);
+	blk_queue_segment_boundary(q, PAGE_SIZE-1);
+	blk_stack_limits(&q->limits, &b->limits, 0);
+
+	if (b->merge_bvec_fn)
+		dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
+		     b->merge_bvec_fn);
+	dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
+
+	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+		dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+		     q->backing_dev_info.ra_pages,
+		     b->backing_dev_info.ra_pages);
+		q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+	}
+}
+
+/* serialize deconfig (worker exiting, doing cleanup)
+ * and reconfig (drbdsetup disk, drbdsetup net)
+ *
+ * wait for a potentially exiting worker, then restart it,
+ * or start a new one.
+ */
+static void drbd_reconfig_start(struct drbd_conf *mdev)
+{
+	wait_event(mdev->state_wait, test_and_set_bit(CONFIG_PENDING, &mdev->flags));
+	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
+	drbd_thread_start(&mdev->worker);
+}
+
+/* if still unconfigured, stops worker again.
+ * if configured now, clears CONFIG_PENDING.
+ * wakes potential waiters */
+static void drbd_reconfig_done(struct drbd_conf *mdev)
+{
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		set_bit(DEVICE_DYING, &mdev->flags);
+		drbd_thread_stop_nowait(&mdev->worker);
+	} else
+		clear_bit(CONFIG_PENDING, &mdev->flags);
+	spin_unlock_irq(&mdev->req_lock);
+	wake_up(&mdev->state_wait);
+}
+
+/* does always return 0;
+ * interesting return code is in reply->ret_code */
+static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	enum drbd_ret_codes retcode;
+	enum determine_dev_size dd;
+	sector_t max_possible_sectors;
+	sector_t min_md_device_sectors;
+	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
+	struct inode *inode, *inode2;
+	struct lru_cache *resync_lru = NULL;
+	union drbd_state ns, os;
+	int rv;
+	int cp_discovered = 0;
+	int logical_block_size;
+
+	drbd_reconfig_start(mdev);
+
+	/* if you want to reconfigure, please tear down first */
+	if (mdev->state.disk > D_DISKLESS) {
+		retcode = ERR_DISK_CONFIGURED;
+		goto fail;
+	}
+
+	/* allocation not in the IO path, cqueue thread context */
+	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
+	if (!nbc) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
+	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
+	nbc->dc.fencing       = DRBD_FENCING_DEF;
+	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
+
+	if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+		retcode = ERR_MD_IDX_INVALID;
+		goto fail;
+	}
+
+	nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
+	if (IS_ERR(nbc->lo_file)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
+		    PTR_ERR(nbc->lo_file));
+		nbc->lo_file = NULL;
+		retcode = ERR_OPEN_DISK;
+		goto fail;
+	}
+
+	inode = nbc->lo_file->f_dentry->d_inode;
+
+	if (!S_ISBLK(inode->i_mode)) {
+		retcode = ERR_DISK_NOT_BDEV;
+		goto fail;
+	}
+
+	nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
+	if (IS_ERR(nbc->md_file)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
+		    PTR_ERR(nbc->md_file));
+		nbc->md_file = NULL;
+		retcode = ERR_OPEN_MD_DISK;
+		goto fail;
+	}
+
+	inode2 = nbc->md_file->f_dentry->d_inode;
+
+	if (!S_ISBLK(inode2->i_mode)) {
+		retcode = ERR_MD_NOT_BDEV;
+		goto fail;
+	}
+
+	nbc->backing_bdev = inode->i_bdev;
+	if (bd_claim(nbc->backing_bdev, mdev)) {
+		printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
+		       nbc->backing_bdev, mdev,
+		       nbc->backing_bdev->bd_holder,
+		       nbc->backing_bdev->bd_contains->bd_holder,
+		       nbc->backing_bdev->bd_holders);
+		retcode = ERR_BDCLAIM_DISK;
+		goto fail;
+	}
+
+	resync_lru = lc_create("resync", drbd_bm_ext_cache,
+			61, sizeof(struct bm_extent),
+			offsetof(struct bm_extent, lce));
+	if (!resync_lru) {
+		retcode = ERR_NOMEM;
+		goto release_bdev_fail;
+	}
+
+	/* meta_dev_idx >= 0: external fixed size,
+	 * possibly multiple drbd sharing one meta device.
+	 * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
+	 * not yet used by some other drbd minor!
+	 * (if you use drbd.conf + drbdadm,
+	 * that should check it for you already; but if you don't, or someone
+	 * fooled it, we need to double check here) */
+	nbc->md_bdev = inode2->i_bdev;
+	if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
+				: (void *) drbd_m_holder)) {
+		retcode = ERR_BDCLAIM_MD_DISK;
+		goto release_bdev_fail;
+	}
+
+	if ((nbc->backing_bdev == nbc->md_bdev) !=
+	    (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+	     nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+		retcode = ERR_MD_IDX_INVALID;
+		goto release_bdev2_fail;
+	}
+
+	/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
+	drbd_md_set_sector_offsets(mdev, nbc);
+
+	if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
+		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
+			(unsigned long long) drbd_get_max_capacity(nbc),
+			(unsigned long long) nbc->dc.disk_size);
+		retcode = ERR_DISK_TO_SMALL;
+		goto release_bdev2_fail;
+	}
+
+	if (nbc->dc.meta_dev_idx < 0) {
+		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
+		/* at least one MB, otherwise it does not make sense */
+		min_md_device_sectors = (2<<10);
+	} else {
+		max_possible_sectors = DRBD_MAX_SECTORS;
+		min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
+	}
+
+	if (drbd_get_capacity(nbc->md_bdev) > max_possible_sectors)
+		dev_warn(DEV, "truncating very big lower level device "
+		     "to currently maximum possible %llu sectors\n",
+		     (unsigned long long) max_possible_sectors);
+
+	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
+		retcode = ERR_MD_DISK_TO_SMALL;
+		dev_warn(DEV, "refusing attach: md-device too small, "
+		     "at least %llu sectors needed for this meta-disk type\n",
+		     (unsigned long long) min_md_device_sectors);
+		goto release_bdev2_fail;
+	}
+
+	/* Make sure the new disk is big enough
+	 * (we may currently be R_PRIMARY with no local disk...) */
+	if (drbd_get_max_capacity(nbc) <
+	    drbd_get_capacity(mdev->this_bdev)) {
+		retcode = ERR_DISK_TO_SMALL;
+		goto release_bdev2_fail;
+	}
+
+	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
+
+	drbd_suspend_io(mdev);
+	/* also wait for the last barrier ack. */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
+	/* and for any other previously queued work */
+	drbd_flush_workqueue(mdev);
+
+	retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	drbd_resume_io(mdev);
+	if (retcode < SS_SUCCESS)
+		goto release_bdev2_fail;
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING))
+		goto force_diskless;
+
+	drbd_md_set_sector_offsets(mdev, nbc);
+
+	if (!mdev->bitmap) {
+		if (drbd_bm_init(mdev)) {
+			retcode = ERR_NOMEM;
+			goto force_diskless_dec;
+		}
+	}
+
+	retcode = drbd_md_read(mdev, nbc);
+	if (retcode != NO_ERROR)
+		goto force_diskless_dec;
+
+	if (mdev->state.conn < C_CONNECTED &&
+	    mdev->state.role == R_PRIMARY &&
+	    (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
+		dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
+		    (unsigned long long)mdev->ed_uuid);
+		retcode = ERR_DATA_NOT_CURRENT;
+		goto force_diskless_dec;
+	}
+
+	/* Since we are diskless, fix the activity log first... */
+	if (drbd_check_al_size(mdev)) {
+		retcode = ERR_NOMEM;
+		goto force_diskless_dec;
+	}
+
+	/* Prevent shrinking of consistent devices ! */
+	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
+	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+		dev_warn(DEV, "refusing to truncate a consistent device\n");
+		retcode = ERR_DISK_TO_SMALL;
+		goto force_diskless_dec;
+	}
+
+	if (!drbd_al_read_log(mdev, nbc)) {
+		retcode = ERR_IO_MD_DISK;
+		goto force_diskless_dec;
+	}
+
+	/* allocate a second IO page if logical_block_size != 512 */
+	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
+	if (logical_block_size == 0)
+		logical_block_size = MD_SECTOR_SIZE;
+
+	if (logical_block_size != MD_SECTOR_SIZE) {
+		if (!mdev->md_io_tmpp) {
+			struct page *page = alloc_page(GFP_NOIO);
+			if (!page)
+				goto force_diskless_dec;
+
+			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
+			     logical_block_size, MD_SECTOR_SIZE);
+			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
+
+			mdev->md_io_tmpp = page;
+		}
+	}
+
+	/* Reset the "barriers don't work" bits here, then force meta data to
+	 * be written, to ensure we determine if barriers are supported. */
+	if (nbc->dc.no_md_flush)
+		set_bit(MD_NO_BARRIER, &mdev->flags);
+	else
+		clear_bit(MD_NO_BARRIER, &mdev->flags);
+
+	/* Point of no return reached.
+	 * Devices and memory are no longer released by error cleanup below.
+	 * now mdev takes over responsibility, and the state engine should
+	 * clean it up somewhere.  */
+	D_ASSERT(mdev->ldev == NULL);
+	mdev->ldev = nbc;
+	mdev->resync = resync_lru;
+	nbc = NULL;
+	resync_lru = NULL;
+
+	mdev->write_ordering = WO_bio_barrier;
+	drbd_bump_write_ordering(mdev, WO_bio_barrier);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
+		set_bit(CRASHED_PRIMARY, &mdev->flags);
+	else
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
+		set_bit(CRASHED_PRIMARY, &mdev->flags);
+		cp_discovered = 1;
+	}
+
+	mdev->send_cnt = 0;
+	mdev->recv_cnt = 0;
+	mdev->read_cnt = 0;
+	mdev->writ_cnt = 0;
+
+	drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
+
+	/* If I am currently not R_PRIMARY,
+	 * but meta data primary indicator is set,
+	 * I just now recover from a hard crash,
+	 * and have been R_PRIMARY before that crash.
+	 *
+	 * Now, if I had no connection before that crash
+	 * (have been degraded R_PRIMARY), chances are that
+	 * I won't find my peer now either.
+	 *
+	 * In that case, and _only_ in that case,
+	 * we use the degr-wfc-timeout instead of the default,
+	 * so we can automatically recover from a crash of a
+	 * degraded but active "cluster" after a certain timeout.
+	 */
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	if (mdev->state.role != R_PRIMARY &&
+	     drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
+		set_bit(USE_DEGR_WFC_T, &mdev->flags);
+
+	dd = drbd_determin_dev_size(mdev);
+	if (dd == dev_size_error) {
+		retcode = ERR_NOMEM_BITMAP;
+		goto force_diskless_dec;
+	} else if (dd == grew)
+		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+		dev_info(DEV, "Assuming that all blocks are out of sync "
+		     "(aka FullSync)\n");
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
+	} else {
+		if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
+	}
+
+	if (cp_discovered) {
+		drbd_al_apply_to_bm(mdev);
+		drbd_al_to_on_disk_bm(mdev);
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+	ns.i = os.i;
+	/* If MDF_CONSISTENT is not set go into inconsistent state,
+	   otherwise investigate MDF_WasUpToDate...
+	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
+	   otherwise into D_CONSISTENT state.
+	*/
+	if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
+		if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
+			ns.disk = D_CONSISTENT;
+		else
+			ns.disk = D_OUTDATED;
+	} else {
+		ns.disk = D_INCONSISTENT;
+	}
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
+		ns.pdsk = D_OUTDATED;
+
+	if ( ns.disk == D_CONSISTENT &&
+	    (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
+		ns.disk = D_UP_TO_DATE;
+
+	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
+	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
+	   this point, because drbd_request_state() modifies these
+	   flags. */
+
+	/* In case we are C_CONNECTED postpone any decision on the new disk
+	   state after the negotiation phase. */
+	if (mdev->state.conn == C_CONNECTED) {
+		mdev->new_state_tmp.i = ns.i;
+		ns.i = os.i;
+		ns.disk = D_NEGOTIATING;
+	}
+
+	rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	ns = mdev->state;
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (rv < SS_SUCCESS)
+		goto force_diskless_dec;
+
+	if (mdev->state.role == R_PRIMARY)
+		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
+	else
+		mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+
+	drbd_md_mark_dirty(mdev);
+	drbd_md_sync(mdev);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+	put_ldev(mdev);
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+
+ force_diskless_dec:
+	put_ldev(mdev);
+ force_diskless:
+	drbd_force_state(mdev, NS(disk, D_DISKLESS));
+	drbd_md_sync(mdev);
+ release_bdev2_fail:
+	if (nbc)
+		bd_release(nbc->md_bdev);
+ release_bdev_fail:
+	if (nbc)
+		bd_release(nbc->backing_bdev);
+ fail:
+	if (nbc) {
+		if (nbc->lo_file)
+			fput(nbc->lo_file);
+		if (nbc->md_file)
+			fput(nbc->md_file);
+		kfree(nbc);
+	}
+	lc_destroy(resync_lru);
+
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+}
+
+static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			  struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+	return 0;
+}
+
+static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			    struct drbd_nl_cfg_reply *reply)
+{
+	int i, ns;
+	enum drbd_ret_codes retcode;
+	struct net_conf *new_conf = NULL;
+	struct crypto_hash *tfm = NULL;
+	struct crypto_hash *integrity_w_tfm = NULL;
+	struct crypto_hash *integrity_r_tfm = NULL;
+	struct hlist_head *new_tl_hash = NULL;
+	struct hlist_head *new_ee_hash = NULL;
+	struct drbd_conf *odev;
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	void *int_dig_out = NULL;
+	void *int_dig_in = NULL;
+	void *int_dig_vv = NULL;
+	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+
+	drbd_reconfig_start(mdev);
+
+	if (mdev->state.conn > C_STANDALONE) {
+		retcode = ERR_NET_CONFIGURED;
+		goto fail;
+	}
+
+	/* allocation not in the IO path, cqueue thread context */
+	new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	memset(new_conf, 0, sizeof(struct net_conf));
+	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
+	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
+	new_conf->ping_int	   = DRBD_PING_INT_DEF;
+	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
+	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
+	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
+	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
+	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
+	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
+	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
+	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
+	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
+	new_conf->want_lose	   = 0;
+	new_conf->two_primaries    = 0;
+	new_conf->wire_protocol    = DRBD_PROT_C;
+	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
+	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
+
+	if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (new_conf->two_primaries
+	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
+		retcode = ERR_NOT_PROTO_C;
+		goto fail;
+	};
+
+	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
+		retcode = ERR_DISCARD;
+		goto fail;
+	}
+
+	retcode = NO_ERROR;
+
+	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
+	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev || odev == mdev)
+			continue;
+		if (get_net_conf(odev)) {
+			taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
+			if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
+			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
+				retcode = ERR_LOCAL_ADDR;
+
+			taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
+			if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
+			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
+				retcode = ERR_PEER_ADDR;
+
+			put_net_conf(odev);
+			if (retcode != NO_ERROR)
+				goto fail;
+		}
+	}
+
+	if (new_conf->cram_hmac_alg[0] != 0) {
+		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+			new_conf->cram_hmac_alg);
+		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(tfm)) {
+			tfm = NULL;
+			retcode = ERR_AUTH_ALG;
+			goto fail;
+		}
+
+		if (crypto_tfm_alg_type(crypto_hash_tfm(tfm))
+						!= CRYPTO_ALG_TYPE_HASH) {
+			retcode = ERR_AUTH_ALG_ND;
+			goto fail;
+		}
+	}
+
+	if (new_conf->integrity_alg[0]) {
+		integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(integrity_w_tfm)) {
+			integrity_w_tfm = NULL;
+			retcode=ERR_INTEGRITY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
+			retcode=ERR_INTEGRITY_ALG_ND;
+			goto fail;
+		}
+
+		integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(integrity_r_tfm)) {
+			integrity_r_tfm = NULL;
+			retcode=ERR_INTEGRITY_ALG;
+			goto fail;
+		}
+	}
+
+	ns = new_conf->max_epoch_size/8;
+	if (mdev->tl_hash_s != ns) {
+		new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+		if (!new_tl_hash) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	ns = new_conf->max_buffers/8;
+	if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
+		new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+		if (!new_ee_hash) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
+
+	if (integrity_w_tfm) {
+		i = crypto_hash_digestsize(integrity_w_tfm);
+		int_dig_out = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_out) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+		int_dig_in = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_in) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+		int_dig_vv = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_vv) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (!mdev->bitmap) {
+		if(drbd_bm_init(mdev)) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->net_conf != NULL) {
+		retcode = ERR_NET_CONFIGURED;
+		spin_unlock_irq(&mdev->req_lock);
+		goto fail;
+	}
+	mdev->net_conf = new_conf;
+
+	mdev->send_cnt = 0;
+	mdev->recv_cnt = 0;
+
+	if (new_tl_hash) {
+		kfree(mdev->tl_hash);
+		mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
+		mdev->tl_hash = new_tl_hash;
+	}
+
+	if (new_ee_hash) {
+		kfree(mdev->ee_hash);
+		mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
+		mdev->ee_hash = new_ee_hash;
+	}
+
+	crypto_free_hash(mdev->cram_hmac_tfm);
+	mdev->cram_hmac_tfm = tfm;
+
+	crypto_free_hash(mdev->integrity_w_tfm);
+	mdev->integrity_w_tfm = integrity_w_tfm;
+
+	crypto_free_hash(mdev->integrity_r_tfm);
+	mdev->integrity_r_tfm = integrity_r_tfm;
+
+	kfree(mdev->int_dig_out);
+	kfree(mdev->int_dig_in);
+	kfree(mdev->int_dig_vv);
+	mdev->int_dig_out=int_dig_out;
+	mdev->int_dig_in=int_dig_in;
+	mdev->int_dig_vv=int_dig_vv;
+	spin_unlock_irq(&mdev->req_lock);
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+
+fail:
+	kfree(int_dig_out);
+	kfree(int_dig_in);
+	kfree(int_dig_vv);
+	crypto_free_hash(tfm);
+	crypto_free_hash(integrity_w_tfm);
+	crypto_free_hash(integrity_r_tfm);
+	kfree(new_tl_hash);
+	kfree(new_ee_hash);
+	kfree(new_conf);
+
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+}
+
+static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode;
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
+
+	if (retcode == SS_NOTHING_TO_DO)
+		goto done;
+	else if (retcode == SS_ALREADY_STANDALONE)
+		goto done;
+	else if (retcode == SS_PRIMARY_NOP) {
+		/* Our statche checking code wants to see the peer outdated. */
+		retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+						      pdsk, D_OUTDATED));
+	} else if (retcode == SS_CW_FAILED_BY_PEER) {
+		/* The peer probably wants to see us outdated. */
+		retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED),
+					      CS_ORDERED);
+		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			retcode = SS_SUCCESS;
+		}
+	}
+
+	if (retcode < SS_SUCCESS)
+		goto fail;
+
+	if (wait_event_interruptible(mdev->state_wait,
+				     mdev->state.conn != C_DISCONNECTING)) {
+		/* Do not test for mdev->state.conn == C_STANDALONE, since
+		   someone else might connect us in the mean time! */
+		retcode = ERR_INTR;
+		goto fail;
+	}
+
+ done:
+	retcode = NO_ERROR;
+ fail:
+	drbd_md_sync(mdev);
+	reply->ret_code = retcode;
+	return 0;
+}
+
+void resync_after_online_grow(struct drbd_conf *mdev)
+{
+	int iass; /* I am sync source */
+
+	dev_info(DEV, "Resync of new storage after online grow\n");
+	if (mdev->state.role != mdev->state.peer)
+		iass = (mdev->state.role == R_PRIMARY);
+	else
+		iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+	if (iass)
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+	else
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
+}
+
+static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			  struct drbd_nl_cfg_reply *reply)
+{
+	struct resize rs;
+	int retcode = NO_ERROR;
+	int ldsc = 0; /* local disk size changed */
+	enum determine_dev_size dd;
+
+	memset(&rs, 0, sizeof(struct resize));
+	if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (mdev->state.conn > C_CONNECTED) {
+		retcode = ERR_RESIZE_RESYNC;
+		goto fail;
+	}
+
+	if (mdev->state.role == R_SECONDARY &&
+	    mdev->state.peer == R_SECONDARY) {
+		retcode = ERR_NO_PRIMARY;
+		goto fail;
+	}
+
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
+	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+		ldsc = 1;
+	}
+
+	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
+	dd = drbd_determin_dev_size(mdev);
+	drbd_md_sync(mdev);
+	put_ldev(mdev);
+	if (dd == dev_size_error) {
+		retcode = ERR_NOMEM_BITMAP;
+		goto fail;
+	}
+
+	if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) {
+		if (dd == grew)
+			set_bit(RESIZE_PENDING, &mdev->flags);
+
+		drbd_send_uuids(mdev);
+		drbd_send_sizes(mdev, 1);
+	}
+
+ fail:
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			       struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+	struct syncer_conf sc;
+	cpumask_var_t new_cpu_mask;
+
+	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
+		memset(&sc, 0, sizeof(struct syncer_conf));
+		sc.rate       = DRBD_RATE_DEF;
+		sc.after      = DRBD_AFTER_DEF;
+		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+	} else
+		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+
+	if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	/* re-sync running */
+	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
+		mdev->state.conn == C_SYNC_TARGET ||
+		mdev->state.conn == C_PAUSED_SYNC_S ||
+		mdev->state.conn == C_PAUSED_SYNC_T );
+
+	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
+		goto fail;
+	}
+
+	if (!rsr && sc.csums_alg[0]) {
+		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(csums_tfm)) {
+			csums_tfm = NULL;
+			retcode = ERR_CSUMS_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
+			retcode = ERR_CSUMS_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* online verify running */
+	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
+
+	if (ovr) {
+		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
+			retcode = ERR_VERIFY_RUNNING;
+			goto fail;
+		}
+	}
+
+	if (!ovr && sc.verify_alg[0]) {
+		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(verify_tfm)) {
+			verify_tfm = NULL;
+			retcode = ERR_VERIFY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
+			retcode = ERR_VERIFY_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* silently ignore cpu mask on UP kernel */
+	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
+		err = __bitmap_parse(sc.cpu_mask, 32, 0,
+				cpumask_bits(new_cpu_mask), nr_cpu_ids);
+		if (err) {
+			dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+			retcode = ERR_CPU_MASK_PARSE;
+			goto fail;
+		}
+	}
+
+	ERR_IF (sc.rate < 1) sc.rate = 1;
+	ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
+#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
+	if (sc.al_extents > AL_MAX) {
+		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
+		sc.al_extents = AL_MAX;
+	}
+#undef AL_MAX
+
+	/* most sanity checks done, try to assign the new sync-after
+	 * dependency.  need to hold the global lock in there,
+	 * to avoid a race in the dependency loop check. */
+	retcode = drbd_alter_sa(mdev, sc.after);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	/* ok, assign the rest of it as well.
+	 * lock against receive_SyncParam() */
+	spin_lock(&mdev->peer_seq_lock);
+	mdev->sync_conf = sc;
+
+	if (!rsr) {
+		crypto_free_hash(mdev->csums_tfm);
+		mdev->csums_tfm = csums_tfm;
+		csums_tfm = NULL;
+	}
+
+	if (!ovr) {
+		crypto_free_hash(mdev->verify_tfm);
+		mdev->verify_tfm = verify_tfm;
+		verify_tfm = NULL;
+	}
+	spin_unlock(&mdev->peer_seq_lock);
+
+	if (get_ldev(mdev)) {
+		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+		drbd_al_shrink(mdev);
+		err = drbd_check_al_size(mdev);
+		lc_unlock(mdev->act_log);
+		wake_up(&mdev->al_wait);
+
+		put_ldev(mdev);
+		drbd_md_sync(mdev);
+
+		if (err) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev, &sc);
+
+	if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(mdev->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(mdev);
+		mdev->receiver.reset_cpu_mask = 1;
+		mdev->asender.reset_cpu_mask = 1;
+		mdev->worker.reset_cpu_mask = 1;
+	}
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+fail:
+	free_cpumask_var(new_cpu_mask);
+	crypto_free_hash(csums_tfm);
+	crypto_free_hash(verify_tfm);
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode;
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
+
+	if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
+		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+
+	while (retcode == SS_NEED_CONNECTION) {
+		spin_lock_irq(&mdev->req_lock);
+		if (mdev->state.conn < C_CONNECTED)
+			retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+
+		if (retcode != SS_NEED_CONNECTION)
+			break;
+
+		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+	}
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				   struct drbd_nl_cfg_reply *reply)
+{
+
+	reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+
+	return 0;
+}
+
+static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+
+	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_SET;
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			       struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+
+	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_CLEAR;
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
+
+	return 0;
+}
+
+static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+	return 0;
+}
+
+static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
+	return 0;
+}
+
+static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+
+	tl = reply->tag_list;
+
+	if (get_ldev(mdev)) {
+		tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
+		put_ldev(mdev);
+	}
+
+	if (get_net_conf(mdev)) {
+		tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
+		put_net_conf(mdev);
+	}
+	tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
+
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl = reply->tag_list;
+	union drbd_state s = mdev->state;
+	unsigned long rs_left;
+	unsigned int res;
+
+	tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
+
+	/* no local ref, no bitmap, no syncer progress. */
+	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
+		if (get_ldev(mdev)) {
+			drbd_get_syncer_progress(mdev, &rs_left, &res);
+			tl = tl_add_int(tl, T_sync_progress, &res);
+			put_ldev(mdev);
+		}
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+
+	tl = reply->tag_list;
+
+	if (get_ldev(mdev)) {
+		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
+		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
+		put_ldev(mdev);
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+/**
+ * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
+ * @mdev:	DRBD device.
+ * @nlp:	Netlink/connector packet from drbdsetup
+ * @reply:	Reply packet for drbdsetup
+ */
+static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				    struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+	char rv;
+
+	tl = reply->tag_list;
+
+	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
+	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+
+	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				    struct drbd_nl_cfg_reply *reply)
+{
+	/* default to resume from last known position, if possible */
+	struct start_ov args =
+		{ .start_sector = mdev->ov_start_sector };
+
+	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+	/* w_make_ov_request expects position to be aligned */
+	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
+	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	return 0;
+}
+
+
+static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+	int skip_initial_sync = 0;
+	int err;
+
+	struct new_c_uuid args;
+
+	memset(&args, 0, sizeof(struct new_c_uuid));
+	if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
+
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	/* this is "skip initial sync", assume to be clean */
+	if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
+	    mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
+		dev_info(DEV, "Preparing to skip initial sync\n");
+		skip_initial_sync = 1;
+	} else if (mdev->state.conn != C_STANDALONE) {
+		retcode = ERR_CONNECTED;
+		goto out_dec;
+	}
+
+	drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
+	drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
+
+	if (args.clear_bm) {
+		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
+		if (err) {
+			dev_err(DEV, "Writing bitmap failed with %d\n",err);
+			retcode = ERR_IO_MD_DISK;
+		}
+		if (skip_initial_sync) {
+			drbd_send_uuids_skip_initial_sync(mdev);
+			_drbd_uuid_set(mdev, UI_BITMAP, 0);
+			spin_lock_irq(&mdev->req_lock);
+			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+					CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->req_lock);
+		}
+	}
+
+	drbd_md_sync(mdev);
+out_dec:
+	put_ldev(mdev);
+out:
+	mutex_unlock(&mdev->state_mutex);
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
+{
+	struct drbd_conf *mdev;
+
+	if (nlp->drbd_minor >= minor_count)
+		return NULL;
+
+	mdev = minor_to_mdev(nlp->drbd_minor);
+
+	if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
+		struct gendisk *disk = NULL;
+		mdev = drbd_new_device(nlp->drbd_minor);
+
+		spin_lock_irq(&drbd_pp_lock);
+		if (minor_table[nlp->drbd_minor] == NULL) {
+			minor_table[nlp->drbd_minor] = mdev;
+			disk = mdev->vdisk;
+			mdev = NULL;
+		} /* else: we lost the race */
+		spin_unlock_irq(&drbd_pp_lock);
+
+		if (disk) /* we won the race above */
+			/* in case we ever add a drbd_delete_device(),
+			 * don't forget the del_gendisk! */
+			add_disk(disk);
+		else /* we lost the race above */
+			drbd_free_mdev(mdev);
+
+		mdev = minor_to_mdev(nlp->drbd_minor);
+	}
+
+	return mdev;
+}
+
+struct cn_handler_struct {
+	int (*function)(struct drbd_conf *,
+			 struct drbd_nl_cfg_req *,
+			 struct drbd_nl_cfg_reply *);
+	int reply_body_size;
+};
+
+static struct cn_handler_struct cnd_table[] = {
+	[ P_primary ]		= { &drbd_nl_primary,		0 },
+	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
+	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
+	[ P_detach ]		= { &drbd_nl_detach,		0 },
+	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
+	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
+	[ P_resize ]		= { &drbd_nl_resize,		0 },
+	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
+	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
+	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
+	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
+	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
+	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
+	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
+	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
+	[ P_get_config ]	= { &drbd_nl_get_config,
+				    sizeof(struct syncer_conf_tag_len_struct) +
+				    sizeof(struct disk_conf_tag_len_struct) +
+				    sizeof(struct net_conf_tag_len_struct) },
+	[ P_get_state ]		= { &drbd_nl_get_state,
+				    sizeof(struct get_state_tag_len_struct) +
+				    sizeof(struct sync_progress_tag_len_struct)	},
+	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
+				    sizeof(struct get_uuids_tag_len_struct) },
+	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
+				    sizeof(struct get_timeout_flag_tag_len_struct)},
+	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
+	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
+};
+
+static void drbd_connector_callback(struct cn_msg *req)
+{
+	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
+	struct cn_handler_struct *cm;
+	struct cn_msg *cn_reply;
+	struct drbd_nl_cfg_reply *reply;
+	struct drbd_conf *mdev;
+	int retcode, rr;
+	int reply_size = sizeof(struct cn_msg)
+		+ sizeof(struct drbd_nl_cfg_reply)
+		+ sizeof(short int);
+
+	if (!try_module_get(THIS_MODULE)) {
+		printk(KERN_ERR "drbd: try_module_get() failed!\n");
+		return;
+	}
+
+	mdev = ensure_mdev(nlp);
+	if (!mdev) {
+		retcode = ERR_MINOR_INVALID;
+		goto fail;
+	}
+
+	trace_drbd_netlink(req, 1);
+
+	if (nlp->packet_type >= P_nl_after_last_packet) {
+		retcode = ERR_PACKET_NR;
+		goto fail;
+	}
+
+	cm = cnd_table + nlp->packet_type;
+
+	/* This may happen if packet number is 0: */
+	if (cm->function == NULL) {
+		retcode = ERR_PACKET_NR;
+		goto fail;
+	}
+
+	reply_size += cm->reply_body_size;
+
+	/* allocation not in the IO path, cqueue thread context */
+	cn_reply = kmalloc(reply_size, GFP_KERNEL);
+	if (!cn_reply) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
+
+	reply->packet_type =
+		cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
+	reply->minor = nlp->drbd_minor;
+	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
+	/* reply->tag_list; might be modified by cm->function. */
+
+	rr = cm->function(mdev, nlp, reply);
+
+	cn_reply->id = req->id;
+	cn_reply->seq = req->seq;
+	cn_reply->ack = req->ack  + 1;
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
+	cn_reply->flags = 0;
+
+	trace_drbd_netlink(cn_reply, 0);
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
+	if (rr && rr != -ESRCH)
+		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+
+	kfree(cn_reply);
+	module_put(THIS_MODULE);
+	return;
+ fail:
+	drbd_nl_send_reply(req, retcode);
+	module_put(THIS_MODULE);
+}
+
+static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
+
+static unsigned short *
+__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
+	unsigned short len, int nul_terminated)
+{
+	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
+	len = (len < l) ? len :  l;
+	put_unaligned(tag, tl++);
+	put_unaligned(len, tl++);
+	memcpy(tl, data, len);
+	tl = (unsigned short*)((char*)tl + len);
+	if (nul_terminated)
+		*((char*)tl - 1) = 0;
+	return tl;
+}
+
+static unsigned short *
+tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
+{
+	return __tl_add_blob(tl, tag, data, len, 0);
+}
+
+static unsigned short *
+tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
+{
+	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
+}
+
+static unsigned short *
+tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
+{
+	put_unaligned(tag, tl++);
+	switch(tag_type(tag)) {
+	case TT_INTEGER:
+		put_unaligned(sizeof(int), tl++);
+		put_unaligned(*(int *)val, (int *)tl);
+		tl = (unsigned short*)((char*)tl+sizeof(int));
+		break;
+	case TT_INT64:
+		put_unaligned(sizeof(u64), tl++);
+		put_unaligned(*(u64 *)val, (u64 *)tl);
+		tl = (unsigned short*)((char*)tl+sizeof(u64));
+		break;
+	default:
+		/* someone did something stupid. */
+		;
+	}
+	return tl;
+}
+
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct get_state_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+
+	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+	tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
+
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_get_state;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	trace_drbd_netlink(cn_reply, 0);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct call_helper_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+
+	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+	tl = tl_add_str(tl, T_helper, helper_name);
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_call_helper;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	trace_drbd_netlink(cn_reply, 0);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ee(struct drbd_conf *mdev,
+		const char *reason, const int dgs,
+		const char* seen_hash, const char* calc_hash,
+		const struct drbd_epoch_entry* e)
+{
+	struct cn_msg *cn_reply;
+	struct drbd_nl_cfg_reply *reply;
+	struct bio_vec *bvec;
+	unsigned short *tl;
+	int i;
+
+	if (!e)
+		return;
+	if (!reason || !reason[0])
+		return;
+
+	/* apparently we have to memcpy twice, first to prepare the data for the
+	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
+	 * netlink skb. */
+	/* receiver thread context, which is not in the writeout path (of this node),
+	 * but may be in the writeout path of the _other_ node.
+	 * GFP_NOIO to avoid potential "distributed deadlock". */
+	cn_reply = kmalloc(
+		sizeof(struct cn_msg)+
+		sizeof(struct drbd_nl_cfg_reply)+
+		sizeof(struct dump_ee_tag_len_struct)+
+		sizeof(short int),
+		GFP_NOIO);
+
+	if (!cn_reply) {
+		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
+				(unsigned long long)e->sector, e->size);
+		return;
+	}
+
+	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
+	tl = reply->tag_list;
+
+	tl = tl_add_str(tl, T_dump_ee_reason, reason);
+	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
+	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
+	tl = tl_add_int(tl, T_ee_sector, &e->sector);
+	tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
+
+	put_unaligned(T_ee_data, tl++);
+	put_unaligned(e->size, tl++);
+
+	__bio_for_each_segment(bvec, e->private_bio, i, 0) {
+		void *d = kmap(bvec->bv_page);
+		memcpy(tl, d + bvec->bv_offset, bvec->bv_len);
+		kunmap(bvec->bv_page);
+		tl=(unsigned short*)((char*)tl + bvec->bv_len);
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
+	cn_reply->ack = 0; // not used here.
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char*)tl - (char*)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_dump_ee;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	trace_drbd_netlink(cn_reply, 0);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	kfree(cn_reply);
+}
+
+void drbd_bcast_sync_progress(struct drbd_conf *mdev)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct sync_progress_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+	unsigned long rs_left;
+	unsigned int res;
+
+	/* no local ref, no bitmap, no syncer progress, no broadcast. */
+	if (!get_ldev(mdev))
+		return;
+	drbd_get_syncer_progress(mdev, &rs_left, &res);
+	put_ldev(mdev);
+
+	tl = tl_add_int(tl, T_sync_progress, &res);
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_sync_progress;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	trace_drbd_netlink(cn_reply, 0);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+int __init drbd_nl_init(void)
+{
+	static struct cb_id cn_id_drbd;
+	int err, try=10;
+
+	cn_id_drbd.val = CN_VAL_DRBD;
+	do {
+		cn_id_drbd.idx = cn_idx;
+		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
+		if (!err)
+			break;
+		cn_idx = (cn_idx + CN_IDX_STEP);
+	} while (try--);
+
+	if (err) {
+		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
+		return err;
+	}
+
+	return 0;
+}
+
+void drbd_nl_cleanup(void)
+{
+	static struct cb_id cn_id_drbd;
+
+	cn_id_drbd.idx = cn_idx;
+	cn_id_drbd.val = CN_VAL_DRBD;
+
+	cn_del_callback(&cn_id_drbd);
+}
+
+void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
+{
+	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	int rr;
+
+	cn_reply->id = req->id;
+
+	cn_reply->seq = req->seq;
+	cn_reply->ack = req->ack  + 1;
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
+	cn_reply->flags = 0;
+
+	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
+	reply->ret_code = ret_code;
+
+	trace_drbd_netlink(cn_reply, 0);
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	if (rr && rr != -ESRCH)
+		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+}
+
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
new file mode 100644
index 000000000000..98fcb7450c76
--- /dev/null
+++ b/drivers/block/drbd/drbd_proc.c
@@ -0,0 +1,266 @@
+/*
+   drbd_proc.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+
+static int drbd_proc_open(struct inode *inode, struct file *file);
+
+
+struct proc_dir_entry *drbd_proc;
+struct file_operations drbd_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drbd_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+/*lge
+ * progress bars shamelessly adapted from driver/md/md.c
+ * output looks like
+ *	[=====>..............] 33.5% (23456/123456)
+ *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
+ */
+static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
+{
+	unsigned long db, dt, dbdt, rt, rs_left;
+	unsigned int res;
+	int i, x, y;
+
+	drbd_get_syncer_progress(mdev, &rs_left, &res);
+
+	x = res/50;
+	y = 20-x;
+	seq_printf(seq, "\t[");
+	for (i = 1; i < x; i++)
+		seq_printf(seq, "=");
+	seq_printf(seq, ">");
+	for (i = 0; i < y; i++)
+		seq_printf(seq, ".");
+	seq_printf(seq, "] ");
+
+	seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
+	/* if more than 1 GB display in MB */
+	if (mdev->rs_total > 0x100000L)
+		seq_printf(seq, "(%lu/%lu)M\n\t",
+			    (unsigned long) Bit2KB(rs_left >> 10),
+			    (unsigned long) Bit2KB(mdev->rs_total >> 10));
+	else
+		seq_printf(seq, "(%lu/%lu)K\n\t",
+			    (unsigned long) Bit2KB(rs_left),
+			    (unsigned long) Bit2KB(mdev->rs_total));
+
+	/* see drivers/md/md.c
+	 * We do not want to overflow, so the order of operands and
+	 * the * 100 / 100 trick are important. We do a +1 to be
+	 * safe against division by zero. We only estimate anyway.
+	 *
+	 * dt: time from mark until now
+	 * db: blocks written from mark until now
+	 * rt: remaining time
+	 */
+	dt = (jiffies - mdev->rs_mark_time) / HZ;
+
+	if (dt > 20) {
+		/* if we made no update to rs_mark_time for too long,
+		 * we are stalled. show that. */
+		seq_printf(seq, "stalled\n");
+		return;
+	}
+
+	if (!dt)
+		dt++;
+	db = mdev->rs_mark_left - rs_left;
+	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
+
+	seq_printf(seq, "finish: %lu:%02lu:%02lu",
+		rt / 3600, (rt % 3600) / 60, rt % 60);
+
+	/* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
+	dbdt = Bit2KB(db/dt);
+	if (dbdt > 1000)
+		seq_printf(seq, " speed: %ld,%03ld",
+			dbdt/1000, dbdt % 1000);
+	else
+		seq_printf(seq, " speed: %ld", dbdt);
+
+	/* mean speed since syncer started
+	 * we do account for PausedSync periods */
+	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+	if (dt <= 0)
+		dt = 1;
+	db = mdev->rs_total - rs_left;
+	dbdt = Bit2KB(db/dt);
+	if (dbdt > 1000)
+		seq_printf(seq, " (%ld,%03ld)",
+			dbdt/1000, dbdt % 1000);
+	else
+		seq_printf(seq, " (%ld)", dbdt);
+
+	seq_printf(seq, " K/sec\n");
+}
+
+static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
+{
+	struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+	seq_printf(seq, "%5d %s %s\n", bme->rs_left,
+		   bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
+		   bme->flags & BME_LOCKED ? "LOCKED" : "------"
+		   );
+}
+
+static int drbd_seq_show(struct seq_file *seq, void *v)
+{
+	int i, hole = 0;
+	const char *sn;
+	struct drbd_conf *mdev;
+
+	static char write_ordering_chars[] = {
+		[WO_none] = 'n',
+		[WO_drain_io] = 'd',
+		[WO_bdev_flush] = 'f',
+		[WO_bio_barrier] = 'b',
+	};
+
+	seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
+		   API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
+
+	/*
+	  cs .. connection state
+	  ro .. node role (local/remote)
+	  ds .. disk state (local/remote)
+	     protocol
+	     various flags
+	  ns .. network send
+	  nr .. network receive
+	  dw .. disk write
+	  dr .. disk read
+	  al .. activity log write count
+	  bm .. bitmap update write count
+	  pe .. pending (waiting for ack or data reply)
+	  ua .. unack'd (still need to send ack or data reply)
+	  ap .. application requests accepted, but not yet completed
+	  ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
+	  wo .. write ordering mode currently in use
+	 oos .. known out-of-sync kB
+	*/
+
+	for (i = 0; i < minor_count; i++) {
+		mdev = minor_to_mdev(i);
+		if (!mdev) {
+			hole = 1;
+			continue;
+		}
+		if (hole) {
+			hole = 0;
+			seq_printf(seq, "\n");
+		}
+
+		sn = drbd_conn_str(mdev->state.conn);
+
+		if (mdev->state.conn == C_STANDALONE &&
+		    mdev->state.disk == D_DISKLESS &&
+		    mdev->state.role == R_SECONDARY) {
+			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
+		} else {
+			seq_printf(seq,
+			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
+			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
+			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
+			   i, sn,
+			   drbd_role_str(mdev->state.role),
+			   drbd_role_str(mdev->state.peer),
+			   drbd_disk_str(mdev->state.disk),
+			   drbd_disk_str(mdev->state.pdsk),
+			   (mdev->net_conf == NULL ? ' ' :
+			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
+			   mdev->state.susp ? 's' : 'r',
+			   mdev->state.aftr_isp ? 'a' : '-',
+			   mdev->state.peer_isp ? 'p' : '-',
+			   mdev->state.user_isp ? 'u' : '-',
+			   mdev->congestion_reason ?: '-',
+			   mdev->send_cnt/2,
+			   mdev->recv_cnt/2,
+			   mdev->writ_cnt/2,
+			   mdev->read_cnt/2,
+			   mdev->al_writ_cnt,
+			   mdev->bm_writ_cnt,
+			   atomic_read(&mdev->local_cnt),
+			   atomic_read(&mdev->ap_pending_cnt) +
+			   atomic_read(&mdev->rs_pending_cnt),
+			   atomic_read(&mdev->unacked_cnt),
+			   atomic_read(&mdev->ap_bio_cnt),
+			   mdev->epochs,
+			   write_ordering_chars[mdev->write_ordering]
+			);
+			seq_printf(seq, " oos:%lu\n",
+				   Bit2KB(drbd_bm_total_weight(mdev)));
+		}
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET)
+			drbd_syncer_progress(mdev, seq);
+
+		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+			seq_printf(seq, "\t%3d%%      %lu/%lu\n",
+				   (int)((mdev->rs_total-mdev->ov_left) /
+					 (mdev->rs_total/100+1)),
+				   mdev->rs_total - mdev->ov_left,
+				   mdev->rs_total);
+
+		if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
+			lc_seq_printf_stats(seq, mdev->resync);
+			lc_seq_printf_stats(seq, mdev->act_log);
+			put_ldev(mdev);
+		}
+
+		if (proc_details >= 2) {
+			if (mdev->resync) {
+				lc_seq_dump_details(seq, mdev->resync, "rs_left",
+					resync_dump_detail);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int drbd_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, drbd_seq_show, PDE(inode)->data);
+}
+
+/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
new file mode 100644
index 000000000000..63686c4d85cf
--- /dev/null
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -0,0 +1,4456 @@
+/*
+   drbd_receiver.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <net/sock.h>
+
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/pkt_sched.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include "drbd_req.h"
+
+#include "drbd_vli.h"
+
+struct flush_work {
+	struct drbd_work w;
+	struct drbd_epoch *epoch;
+};
+
+enum finish_epoch {
+	FE_STILL_LIVE,
+	FE_DESTROYED,
+	FE_RECYCLED,
+};
+
+static int drbd_do_handshake(struct drbd_conf *mdev);
+static int drbd_do_auth(struct drbd_conf *mdev);
+
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
+static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
+
+static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+{
+	struct drbd_epoch *prev;
+	spin_lock(&mdev->epoch_lock);
+	prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
+	if (prev == epoch || prev == mdev->current_epoch)
+		prev = NULL;
+	spin_unlock(&mdev->epoch_lock);
+	return prev;
+}
+
+#define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
+
+static struct page *drbd_pp_first_page_or_try_alloc(struct drbd_conf *mdev)
+{
+	struct page *page = NULL;
+
+	/* Yes, testing drbd_pp_vacant outside the lock is racy.
+	 * So what. It saves a spin_lock. */
+	if (drbd_pp_vacant > 0) {
+		spin_lock(&drbd_pp_lock);
+		page = drbd_pp_pool;
+		if (page) {
+			drbd_pp_pool = (struct page *)page_private(page);
+			set_page_private(page, 0); /* just to be polite */
+			drbd_pp_vacant--;
+		}
+		spin_unlock(&drbd_pp_lock);
+	}
+	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	if (!page)
+		page = alloc_page(GFP_TRY);
+	if (page)
+		atomic_inc(&mdev->pp_in_use);
+	return page;
+}
+
+/* kick lower level device, if we have more than (arbitrary number)
+ * reference counts on it, which typically are locally submitted io
+ * requests.  don't use unacked_cnt, so we speed up proto A and B, too. */
+static void maybe_kick_lo(struct drbd_conf *mdev)
+{
+	if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
+		drbd_kick_lo(mdev);
+}
+
+static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
+{
+	struct drbd_epoch_entry *e;
+	struct list_head *le, *tle;
+
+	/* The EEs are always appended to the end of the list. Since
+	   they are sent in order over the wire, they have to finish
+	   in order. As soon as we see the first not finished we can
+	   stop to examine the list... */
+
+	list_for_each_safe(le, tle, &mdev->net_ee) {
+		e = list_entry(le, struct drbd_epoch_entry, w.list);
+		if (drbd_bio_has_active_page(e->private_bio))
+			break;
+		list_move(le, to_be_freed);
+	}
+}
+
+static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
+{
+	LIST_HEAD(reclaimed);
+	struct drbd_epoch_entry *e, *t;
+
+	maybe_kick_lo(mdev);
+	spin_lock_irq(&mdev->req_lock);
+	reclaim_net_ee(mdev, &reclaimed);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &reclaimed, w.list)
+		drbd_free_ee(mdev, e);
+}
+
+/**
+ * drbd_pp_alloc() - Returns a page, fails only if a signal comes in
+ * @mdev:	DRBD device.
+ * @retry:	whether or not to retry allocation forever (or until signalled)
+ *
+ * Tries to allocate a page, first from our own page pool, then from the
+ * kernel, unless this allocation would exceed the max_buffers setting.
+ * If @retry is non-zero, retry until DRBD frees a page somewhere else.
+ */
+static struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry)
+{
+	struct page *page = NULL;
+	DEFINE_WAIT(wait);
+
+	if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
+		page = drbd_pp_first_page_or_try_alloc(mdev);
+		if (page)
+			return page;
+	}
+
+	for (;;) {
+		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
+
+		drbd_kick_lo_and_reclaim_net(mdev);
+
+		if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
+			page = drbd_pp_first_page_or_try_alloc(mdev);
+			if (page)
+				break;
+		}
+
+		if (!retry)
+			break;
+
+		if (signal_pending(current)) {
+			dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
+			break;
+		}
+
+		schedule();
+	}
+	finish_wait(&drbd_pp_wait, &wait);
+
+	return page;
+}
+
+/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
+ * Is also used from inside an other spin_lock_irq(&mdev->req_lock) */
+static void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
+{
+	int free_it;
+
+	spin_lock(&drbd_pp_lock);
+	if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
+		free_it = 1;
+	} else {
+		set_page_private(page, (unsigned long)drbd_pp_pool);
+		drbd_pp_pool = page;
+		drbd_pp_vacant++;
+		free_it = 0;
+	}
+	spin_unlock(&drbd_pp_lock);
+
+	atomic_dec(&mdev->pp_in_use);
+
+	if (free_it)
+		__free_page(page);
+
+	wake_up(&drbd_pp_wait);
+}
+
+static void drbd_pp_free_bio_pages(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct page *p_to_be_freed = NULL;
+	struct page *page;
+	struct bio_vec *bvec;
+	int i;
+
+	spin_lock(&drbd_pp_lock);
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
+			set_page_private(bvec->bv_page, (unsigned long)p_to_be_freed);
+			p_to_be_freed = bvec->bv_page;
+		} else {
+			set_page_private(bvec->bv_page, (unsigned long)drbd_pp_pool);
+			drbd_pp_pool = bvec->bv_page;
+			drbd_pp_vacant++;
+		}
+	}
+	spin_unlock(&drbd_pp_lock);
+	atomic_sub(bio->bi_vcnt, &mdev->pp_in_use);
+
+	while (p_to_be_freed) {
+		page = p_to_be_freed;
+		p_to_be_freed = (struct page *)page_private(page);
+		set_page_private(page, 0); /* just to be polite */
+		put_page(page);
+	}
+
+	wake_up(&drbd_pp_wait);
+}
+
+/*
+You need to hold the req_lock:
+ _drbd_wait_ee_list_empty()
+
+You must not have the req_lock:
+ drbd_free_ee()
+ drbd_alloc_ee()
+ drbd_init_ee()
+ drbd_release_ee()
+ drbd_ee_fix_bhs()
+ drbd_process_done_ee()
+ drbd_clear_done_ee()
+ drbd_wait_ee_list_empty()
+*/
+
+struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+				     u64 id,
+				     sector_t sector,
+				     unsigned int data_size,
+				     gfp_t gfp_mask) __must_hold(local)
+{
+	struct request_queue *q;
+	struct drbd_epoch_entry *e;
+	struct page *page;
+	struct bio *bio;
+	unsigned int ds;
+
+	if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE))
+		return NULL;
+
+	e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+	if (!e) {
+		if (!(gfp_mask & __GFP_NOWARN))
+			dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
+		return NULL;
+	}
+
+	bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE));
+	if (!bio) {
+		if (!(gfp_mask & __GFP_NOWARN))
+			dev_err(DEV, "alloc_ee: Allocation of a bio failed\n");
+		goto fail1;
+	}
+
+	bio->bi_bdev = mdev->ldev->backing_bdev;
+	bio->bi_sector = sector;
+
+	ds = data_size;
+	while (ds) {
+		page = drbd_pp_alloc(mdev, (gfp_mask & __GFP_WAIT));
+		if (!page) {
+			if (!(gfp_mask & __GFP_NOWARN))
+				dev_err(DEV, "alloc_ee: Allocation of a page failed\n");
+			goto fail2;
+		}
+		if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) {
+			drbd_pp_free(mdev, page);
+			dev_err(DEV, "alloc_ee: bio_add_page(s=%llu,"
+			    "data_size=%u,ds=%u) failed\n",
+			    (unsigned long long)sector, data_size, ds);
+
+			q = bdev_get_queue(bio->bi_bdev);
+			if (q->merge_bvec_fn) {
+				struct bvec_merge_data bvm = {
+					.bi_bdev = bio->bi_bdev,
+					.bi_sector = bio->bi_sector,
+					.bi_size = bio->bi_size,
+					.bi_rw = bio->bi_rw,
+				};
+				int l = q->merge_bvec_fn(q, &bvm,
+						&bio->bi_io_vec[bio->bi_vcnt]);
+				dev_err(DEV, "merge_bvec_fn() = %d\n", l);
+			}
+
+			/* dump more of the bio. */
+			dev_err(DEV, "bio->bi_max_vecs = %d\n", bio->bi_max_vecs);
+			dev_err(DEV, "bio->bi_vcnt = %d\n", bio->bi_vcnt);
+			dev_err(DEV, "bio->bi_size = %d\n", bio->bi_size);
+			dev_err(DEV, "bio->bi_phys_segments = %d\n", bio->bi_phys_segments);
+
+			goto fail2;
+			break;
+		}
+		ds -= min_t(int, ds, PAGE_SIZE);
+	}
+
+	D_ASSERT(data_size == bio->bi_size);
+
+	bio->bi_private = e;
+	e->mdev = mdev;
+	e->sector = sector;
+	e->size = bio->bi_size;
+
+	e->private_bio = bio;
+	e->block_id = id;
+	INIT_HLIST_NODE(&e->colision);
+	e->epoch = NULL;
+	e->flags = 0;
+
+	trace_drbd_ee(mdev, e, "allocated");
+
+	return e;
+
+ fail2:
+	drbd_pp_free_bio_pages(mdev, bio);
+	bio_put(bio);
+ fail1:
+	mempool_free(e, drbd_ee_mempool);
+
+	return NULL;
+}
+
+void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+	struct bio *bio = e->private_bio;
+	trace_drbd_ee(mdev, e, "freed");
+	drbd_pp_free_bio_pages(mdev, bio);
+	bio_put(bio);
+	D_ASSERT(hlist_unhashed(&e->colision));
+	mempool_free(e, drbd_ee_mempool);
+}
+
+int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
+{
+	LIST_HEAD(work_list);
+	struct drbd_epoch_entry *e, *t;
+	int count = 0;
+
+	spin_lock_irq(&mdev->req_lock);
+	list_splice_init(list, &work_list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &work_list, w.list) {
+		drbd_free_ee(mdev, e);
+		count++;
+	}
+	return count;
+}
+
+
+/*
+ * This function is called from _asender only_
+ * but see also comments in _req_mod(,barrier_acked)
+ * and receive_Barrier.
+ *
+ * Move entries from net_ee to done_ee, if ready.
+ * Grab done_ee, call all callbacks, free the entries.
+ * The callbacks typically send out ACKs.
+ */
+static int drbd_process_done_ee(struct drbd_conf *mdev)
+{
+	LIST_HEAD(work_list);
+	LIST_HEAD(reclaimed);
+	struct drbd_epoch_entry *e, *t;
+	int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
+
+	spin_lock_irq(&mdev->req_lock);
+	reclaim_net_ee(mdev, &reclaimed);
+	list_splice_init(&mdev->done_ee, &work_list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &reclaimed, w.list)
+		drbd_free_ee(mdev, e);
+
+	/* possible callbacks here:
+	 * e_end_block, and e_end_resync_block, e_send_discard_ack.
+	 * all ignore the last argument.
+	 */
+	list_for_each_entry_safe(e, t, &work_list, w.list) {
+		trace_drbd_ee(mdev, e, "process_done_ee");
+		/* list_del not necessary, next/prev members not touched */
+		ok = e->w.cb(mdev, &e->w, !ok) && ok;
+		drbd_free_ee(mdev, e);
+	}
+	wake_up(&mdev->ee_wait);
+
+	return ok;
+}
+
+void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+	DEFINE_WAIT(wait);
+
+	/* avoids spin_lock/unlock
+	 * and calling prepare_to_wait in the fast path */
+	while (!list_empty(head)) {
+		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock_irq(&mdev->req_lock);
+		drbd_kick_lo(mdev);
+		schedule();
+		finish_wait(&mdev->ee_wait, &wait);
+		spin_lock_irq(&mdev->req_lock);
+	}
+}
+
+void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+	spin_lock_irq(&mdev->req_lock);
+	_drbd_wait_ee_list_empty(mdev, head);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+/* see also kernel_accept; which is only present since 2.6.18.
+ * also we want to log which part of it failed, exactly */
+static int drbd_accept(struct drbd_conf *mdev, const char **what,
+		struct socket *sock, struct socket **newsock)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	*what = "listen";
+	err = sock->ops->listen(sock, 5);
+	if (err < 0)
+		goto out;
+
+	*what = "sock_create_lite";
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto out;
+
+	*what = "accept";
+	err = sock->ops->accept(sock, *newsock, 0);
+	if (err < 0) {
+		sock_release(*newsock);
+		*newsock = NULL;
+		goto out;
+	}
+	(*newsock)->ops  = sock->ops;
+
+out:
+	return err;
+}
+
+static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
+		    void *buf, size_t size, int flags)
+{
+	mm_segment_t oldfs;
+	struct kvec iov = {
+		.iov_base = buf,
+		.iov_len = size,
+	};
+	struct msghdr msg = {
+		.msg_iovlen = 1,
+		.msg_iov = (struct iovec *)&iov,
+		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
+	};
+	int rv;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
+	set_fs(oldfs);
+
+	return rv;
+}
+
+static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
+{
+	mm_segment_t oldfs;
+	struct kvec iov = {
+		.iov_base = buf,
+		.iov_len = size,
+	};
+	struct msghdr msg = {
+		.msg_iovlen = 1,
+		.msg_iov = (struct iovec *)&iov,
+		.msg_flags = MSG_WAITALL | MSG_NOSIGNAL
+	};
+	int rv;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+
+	for (;;) {
+		rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags);
+		if (rv == size)
+			break;
+
+		/* Note:
+		 * ECONNRESET	other side closed the connection
+		 * ERESTARTSYS	(on  sock) we got a signal
+		 */
+
+		if (rv < 0) {
+			if (rv == -ECONNRESET)
+				dev_info(DEV, "sock was reset by peer\n");
+			else if (rv != -ERESTARTSYS)
+				dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			break;
+		} else if (rv == 0) {
+			dev_info(DEV, "sock was shut down by peer\n");
+			break;
+		} else	{
+			/* signal came in, or peer/link went down,
+			 * after we read a partial message
+			 */
+			/* D_ASSERT(signal_pending(current)); */
+			break;
+		}
+	};
+
+	set_fs(oldfs);
+
+	if (rv != size)
+		drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+
+	return rv;
+}
+
+static struct socket *drbd_try_connect(struct drbd_conf *mdev)
+{
+	const char *what;
+	struct socket *sock;
+	struct sockaddr_in6 src_in6;
+	int err;
+	int disconnect_on_error = 1;
+
+	if (!get_net_conf(mdev))
+		return NULL;
+
+	what = "sock_create_kern";
+	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+		SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (err < 0) {
+		sock = NULL;
+		goto out;
+	}
+
+	sock->sk->sk_rcvtimeo =
+	sock->sk->sk_sndtimeo =  mdev->net_conf->try_connect_int*HZ;
+
+       /* explicitly bind to the configured IP as source IP
+	*  for the outgoing connections.
+	*  This is needed for multihomed hosts and to be
+	*  able to use lo: interfaces for drbd.
+	* Make sure to use 0 as port number, so linux selects
+	*  a free one dynamically.
+	*/
+	memcpy(&src_in6, mdev->net_conf->my_addr,
+	       min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
+	if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
+		src_in6.sin6_port = 0;
+	else
+		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
+
+	what = "bind before connect";
+	err = sock->ops->bind(sock,
+			      (struct sockaddr *) &src_in6,
+			      mdev->net_conf->my_addr_len);
+	if (err < 0)
+		goto out;
+
+	/* connect may fail, peer not yet available.
+	 * stay C_WF_CONNECTION, don't go Disconnecting! */
+	disconnect_on_error = 0;
+	what = "connect";
+	err = sock->ops->connect(sock,
+				 (struct sockaddr *)mdev->net_conf->peer_addr,
+				 mdev->net_conf->peer_addr_len, 0);
+
+out:
+	if (err < 0) {
+		if (sock) {
+			sock_release(sock);
+			sock = NULL;
+		}
+		switch (-err) {
+			/* timeout, busy, signal pending */
+		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
+		case EINTR: case ERESTARTSYS:
+			/* peer not (yet) available, network problem */
+		case ECONNREFUSED: case ENETUNREACH:
+		case EHOSTDOWN:    case EHOSTUNREACH:
+			disconnect_on_error = 0;
+			break;
+		default:
+			dev_err(DEV, "%s failed, err = %d\n", what, err);
+		}
+		if (disconnect_on_error)
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	}
+	put_net_conf(mdev);
+	return sock;
+}
+
+static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
+{
+	int timeo, err;
+	struct socket *s_estab = NULL, *s_listen;
+	const char *what;
+
+	if (!get_net_conf(mdev))
+		return NULL;
+
+	what = "sock_create_kern";
+	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+		SOCK_STREAM, IPPROTO_TCP, &s_listen);
+	if (err) {
+		s_listen = NULL;
+		goto out;
+	}
+
+	timeo = mdev->net_conf->try_connect_int * HZ;
+	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+
+	s_listen->sk->sk_reuse    = 1; /* SO_REUSEADDR */
+	s_listen->sk->sk_rcvtimeo = timeo;
+	s_listen->sk->sk_sndtimeo = timeo;
+
+	what = "bind before listen";
+	err = s_listen->ops->bind(s_listen,
+			      (struct sockaddr *) mdev->net_conf->my_addr,
+			      mdev->net_conf->my_addr_len);
+	if (err < 0)
+		goto out;
+
+	err = drbd_accept(mdev, &what, s_listen, &s_estab);
+
+out:
+	if (s_listen)
+		sock_release(s_listen);
+	if (err < 0) {
+		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
+			dev_err(DEV, "%s failed, err = %d\n", what, err);
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		}
+	}
+	put_net_conf(mdev);
+
+	return s_estab;
+}
+
+static int drbd_send_fp(struct drbd_conf *mdev,
+	struct socket *sock, enum drbd_packets cmd)
+{
+	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+
+	return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
+}
+
+static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
+{
+	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+	int rr;
+
+	rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
+
+	if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC)
+		return be16_to_cpu(h->command);
+
+	return 0xffff;
+}
+
+/**
+ * drbd_socket_okay() - Free the socket if its connection is not okay
+ * @mdev:	DRBD device.
+ * @sock:	pointer to the pointer to the socket.
+ */
+static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
+{
+	int rr;
+	char tb[4];
+
+	if (!*sock)
+		return FALSE;
+
+	rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
+
+	if (rr > 0 || rr == -EAGAIN) {
+		return TRUE;
+	} else {
+		sock_release(*sock);
+		*sock = NULL;
+		return FALSE;
+	}
+}
+
+/*
+ * return values:
+ *   1 yes, we have a valid connection
+ *   0 oops, did not work out, please try again
+ *  -1 peer talks different language,
+ *     no point in trying again, please go standalone.
+ *  -2 We do not have a network config...
+ */
+static int drbd_connect(struct drbd_conf *mdev)
+{
+	struct socket *s, *sock, *msock;
+	int try, h, ok;
+
+	D_ASSERT(!mdev->data.socket);
+
+	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
+		dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
+
+	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
+		return -2;
+
+	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+	sock  = NULL;
+	msock = NULL;
+
+	do {
+		for (try = 0;;) {
+			/* 3 tries, this should take less than a second! */
+			s = drbd_try_connect(mdev);
+			if (s || ++try >= 3)
+				break;
+			/* give the other side time to call bind() & listen() */
+			__set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ / 10);
+		}
+
+		if (s) {
+			if (!sock) {
+				drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
+				sock = s;
+				s = NULL;
+			} else if (!msock) {
+				drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
+				msock = s;
+				s = NULL;
+			} else {
+				dev_err(DEV, "Logic error in drbd_connect()\n");
+				goto out_release_sockets;
+			}
+		}
+
+		if (sock && msock) {
+			__set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ / 10);
+			ok = drbd_socket_okay(mdev, &sock);
+			ok = drbd_socket_okay(mdev, &msock) && ok;
+			if (ok)
+				break;
+		}
+
+retry:
+		s = drbd_wait_for_connect(mdev);
+		if (s) {
+			try = drbd_recv_fp(mdev, s);
+			drbd_socket_okay(mdev, &sock);
+			drbd_socket_okay(mdev, &msock);
+			switch (try) {
+			case P_HAND_SHAKE_S:
+				if (sock) {
+					dev_warn(DEV, "initial packet S crossed\n");
+					sock_release(sock);
+				}
+				sock = s;
+				break;
+			case P_HAND_SHAKE_M:
+				if (msock) {
+					dev_warn(DEV, "initial packet M crossed\n");
+					sock_release(msock);
+				}
+				msock = s;
+				set_bit(DISCARD_CONCURRENT, &mdev->flags);
+				break;
+			default:
+				dev_warn(DEV, "Error receiving initial packet\n");
+				sock_release(s);
+				if (random32() & 1)
+					goto retry;
+			}
+		}
+
+		if (mdev->state.conn <= C_DISCONNECTING)
+			goto out_release_sockets;
+		if (signal_pending(current)) {
+			flush_signals(current);
+			smp_rmb();
+			if (get_t_state(&mdev->receiver) == Exiting)
+				goto out_release_sockets;
+		}
+
+		if (sock && msock) {
+			ok = drbd_socket_okay(mdev, &sock);
+			ok = drbd_socket_okay(mdev, &msock) && ok;
+			if (ok)
+				break;
+		}
+	} while (1);
+
+	msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+	sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+
+	sock->sk->sk_allocation = GFP_NOIO;
+	msock->sk->sk_allocation = GFP_NOIO;
+
+	sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
+	msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
+
+	if (mdev->net_conf->sndbuf_size) {
+		sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
+		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	}
+
+	if (mdev->net_conf->rcvbuf_size) {
+		sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
+		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+
+	/* NOT YET ...
+	 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	 * first set it to the P_HAND_SHAKE timeout,
+	 * which we set to 4x the configured ping_timeout. */
+	sock->sk->sk_sndtimeo =
+	sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10;
+
+	msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+
+	/* we don't want delays.
+	 * we use TCP_CORK where apropriate, though */
+	drbd_tcp_nodelay(sock);
+	drbd_tcp_nodelay(msock);
+
+	mdev->data.socket = sock;
+	mdev->meta.socket = msock;
+	mdev->last_received = jiffies;
+
+	D_ASSERT(mdev->asender.task == NULL);
+
+	h = drbd_do_handshake(mdev);
+	if (h <= 0)
+		return h;
+
+	if (mdev->cram_hmac_tfm) {
+		/* drbd_request_state(mdev, NS(conn, WFAuth)); */
+		if (!drbd_do_auth(mdev)) {
+			dev_err(DEV, "Authentication of peer failed\n");
+			return -1;
+		}
+	}
+
+	if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
+		return 0;
+
+	sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+
+	atomic_set(&mdev->packet_seq, 0);
+	mdev->peer_seq = 0;
+
+	drbd_thread_start(&mdev->asender);
+
+	drbd_send_protocol(mdev);
+	drbd_send_sync_param(mdev, &mdev->sync_conf);
+	drbd_send_sizes(mdev, 0);
+	drbd_send_uuids(mdev);
+	drbd_send_state(mdev);
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	clear_bit(RESIZE_PENDING, &mdev->flags);
+
+	return 1;
+
+out_release_sockets:
+	if (sock)
+		sock_release(sock);
+	if (msock)
+		sock_release(msock);
+	return -1;
+}
+
+static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h)
+{
+	int r;
+
+	r = drbd_recv(mdev, h, sizeof(*h));
+
+	if (unlikely(r != sizeof(*h))) {
+		dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
+		return FALSE;
+	};
+	h->command = be16_to_cpu(h->command);
+	h->length  = be16_to_cpu(h->length);
+	if (unlikely(h->magic != BE_DRBD_MAGIC)) {
+		dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n",
+		    (long)be32_to_cpu(h->magic),
+		    h->command, h->length);
+		return FALSE;
+	}
+	mdev->last_received = jiffies;
+
+	return TRUE;
+}
+
+static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+{
+	int rv;
+
+	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
+		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
+		if (rv) {
+			dev_err(DEV, "local disk flush failed with status %d\n", rv);
+			/* would rather check on EOPNOTSUPP, but that is not reliable.
+			 * don't try again for ANY return value != 0
+			 * if (rv == -EOPNOTSUPP) */
+			drbd_bump_write_ordering(mdev, WO_drain_io);
+		}
+		put_ldev(mdev);
+	}
+
+	return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
+}
+
+static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct flush_work *fw = (struct flush_work *)w;
+	struct drbd_epoch *epoch = fw->epoch;
+
+	kfree(w);
+
+	if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
+		drbd_flush_after_epoch(mdev, epoch);
+
+	drbd_may_finish_epoch(mdev, epoch, EV_PUT |
+			      (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
+
+	return 1;
+}
+
+/**
+ * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
+ * @mdev:	DRBD device.
+ * @epoch:	Epoch object.
+ * @ev:		Epoch event.
+ */
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
+					       struct drbd_epoch *epoch,
+					       enum epoch_event ev)
+{
+	int finish, epoch_size;
+	struct drbd_epoch *next_epoch;
+	int schedule_flush = 0;
+	enum finish_epoch rv = FE_STILL_LIVE;
+
+	spin_lock(&mdev->epoch_lock);
+	do {
+		next_epoch = NULL;
+		finish = 0;
+
+		epoch_size = atomic_read(&epoch->epoch_size);
+
+		switch (ev & ~EV_CLEANUP) {
+		case EV_PUT:
+			atomic_dec(&epoch->active);
+			break;
+		case EV_GOT_BARRIER_NR:
+			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
+
+			/* Special case: If we just switched from WO_bio_barrier to
+			   WO_bdev_flush we should not finish the current epoch */
+			if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
+			    mdev->write_ordering != WO_bio_barrier &&
+			    epoch == mdev->current_epoch)
+				clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
+			break;
+		case EV_BARRIER_DONE:
+			set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
+			break;
+		case EV_BECAME_LAST:
+			/* nothing to do*/
+			break;
+		}
+
+		trace_drbd_epoch(mdev, epoch, ev);
+
+		if (epoch_size != 0 &&
+		    atomic_read(&epoch->active) == 0 &&
+		    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
+		    epoch->list.prev == &mdev->current_epoch->list &&
+		    !test_bit(DE_IS_FINISHING, &epoch->flags)) {
+			/* Nearly all conditions are met to finish that epoch... */
+			if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
+			    mdev->write_ordering == WO_none ||
+			    (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
+			    ev & EV_CLEANUP) {
+				finish = 1;
+				set_bit(DE_IS_FINISHING, &epoch->flags);
+			} else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
+				 mdev->write_ordering == WO_bio_barrier) {
+				atomic_inc(&epoch->active);
+				schedule_flush = 1;
+			}
+		}
+		if (finish) {
+			if (!(ev & EV_CLEANUP)) {
+				spin_unlock(&mdev->epoch_lock);
+				drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
+				spin_lock(&mdev->epoch_lock);
+			}
+			dec_unacked(mdev);
+
+			if (mdev->current_epoch != epoch) {
+				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
+				list_del(&epoch->list);
+				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
+				mdev->epochs--;
+				trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE);
+				kfree(epoch);
+
+				if (rv == FE_STILL_LIVE)
+					rv = FE_DESTROYED;
+			} else {
+				epoch->flags = 0;
+				atomic_set(&epoch->epoch_size, 0);
+				/* atomic_set(&epoch->active, 0); is alrady zero */
+				if (rv == FE_STILL_LIVE)
+					rv = FE_RECYCLED;
+			}
+		}
+
+		if (!next_epoch)
+			break;
+
+		epoch = next_epoch;
+	} while (1);
+
+	spin_unlock(&mdev->epoch_lock);
+
+	if (schedule_flush) {
+		struct flush_work *fw;
+		fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
+		if (fw) {
+			trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH);
+			fw->w.cb = w_flush;
+			fw->epoch = epoch;
+			drbd_queue_work(&mdev->data.work, &fw->w);
+		} else {
+			dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
+			set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+			/* That is not a recursion, only one level */
+			drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
+			drbd_may_finish_epoch(mdev, epoch, EV_PUT);
+		}
+	}
+
+	return rv;
+}
+
+/**
+ * drbd_bump_write_ordering() - Fall back to an other write ordering method
+ * @mdev:	DRBD device.
+ * @wo:		Write ordering method to try.
+ */
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
+{
+	enum write_ordering_e pwo;
+	static char *write_ordering_str[] = {
+		[WO_none] = "none",
+		[WO_drain_io] = "drain",
+		[WO_bdev_flush] = "flush",
+		[WO_bio_barrier] = "barrier",
+	};
+
+	pwo = mdev->write_ordering;
+	wo = min(pwo, wo);
+	if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
+		wo = WO_bdev_flush;
+	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
+		wo = WO_drain_io;
+	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
+		wo = WO_none;
+	mdev->write_ordering = wo;
+	if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
+		dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
+}
+
+/**
+ * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways (unused in this callback)
+ */
+int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	struct bio *bio = e->private_bio;
+
+	/* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
+	   (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
+	   so that we can finish that epoch in drbd_may_finish_epoch().
+	   That is necessary if we already have a long chain of Epochs, before
+	   we realize that BIO_RW_BARRIER is actually not supported */
+
+	/* As long as the -ENOTSUPP on the barrier is reported immediately
+	   that will never trigger. If it is reported late, we will just
+	   print that warning and continue correctly for all future requests
+	   with WO_bdev_flush */
+	if (previous_epoch(mdev, e->epoch))
+		dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
+
+	/* prepare bio for re-submit,
+	 * re-init volatile members */
+	/* we still have a local reference,
+	 * get_ldev was done in receive_Data. */
+	bio->bi_bdev = mdev->ldev->backing_bdev;
+	bio->bi_sector = e->sector;
+	bio->bi_size = e->size;
+	bio->bi_idx = 0;
+
+	bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+	bio->bi_flags |= 1 << BIO_UPTODATE;
+
+	/* don't know whether this is necessary: */
+	bio->bi_phys_segments = 0;
+	bio->bi_next = NULL;
+
+	/* these should be unchanged: */
+	/* bio->bi_end_io = drbd_endio_write_sec; */
+	/* bio->bi_vcnt = whatever; */
+
+	e->w.cb = e_end_block;
+
+	/* This is no longer a barrier request. */
+	bio->bi_rw &= ~(1UL << BIO_RW_BARRIER);
+
+	drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio);
+
+	return 1;
+}
+
+static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h)
+{
+	int rv, issue_flush;
+	struct p_barrier *p = (struct p_barrier *)h;
+	struct drbd_epoch *epoch;
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+
+	rv = drbd_recv(mdev, h->payload, h->length);
+	ERR_IF(rv != h->length) return FALSE;
+
+	inc_unacked(mdev);
+
+	if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
+		drbd_kick_lo(mdev);
+
+	mdev->current_epoch->barrier_nr = p->barrier;
+	rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
+
+	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
+	 * the activity log, which means it would not be resynced in case the
+	 * R_PRIMARY crashes now.
+	 * Therefore we must send the barrier_ack after the barrier request was
+	 * completed. */
+	switch (mdev->write_ordering) {
+	case WO_bio_barrier:
+	case WO_none:
+		if (rv == FE_RECYCLED)
+			return TRUE;
+		break;
+
+	case WO_bdev_flush:
+	case WO_drain_io:
+		D_ASSERT(rv == FE_STILL_LIVE);
+		set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
+		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+		if (rv == FE_RECYCLED)
+			return TRUE;
+
+		/* The asender will send all the ACKs and barrier ACKs out, since
+		   all EEs moved from the active_ee to the done_ee. We need to
+		   provide a new epoch object for the EEs that come in soon */
+		break;
+	}
+
+	/* receiver context, in the writeout path of the other node.
+	 * avoid potential distributed deadlock */
+	epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+	if (!epoch) {
+		dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+		issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		if (issue_flush) {
+			rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+			if (rv == FE_RECYCLED)
+				return TRUE;
+		}
+
+		drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
+
+		return TRUE;
+	}
+
+	epoch->flags = 0;
+	atomic_set(&epoch->epoch_size, 0);
+	atomic_set(&epoch->active, 0);
+
+	spin_lock(&mdev->epoch_lock);
+	if (atomic_read(&mdev->current_epoch->epoch_size)) {
+		list_add(&epoch->list, &mdev->current_epoch->list);
+		mdev->current_epoch = epoch;
+		mdev->epochs++;
+		trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC);
+	} else {
+		/* The current_epoch got recycled while we allocated this one... */
+		kfree(epoch);
+	}
+	spin_unlock(&mdev->epoch_lock);
+
+	return TRUE;
+}
+
+/* used from receive_RSDataReply (recv_resync_read)
+ * and from receive_Data */
+static struct drbd_epoch_entry *
+read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
+{
+	struct drbd_epoch_entry *e;
+	struct bio_vec *bvec;
+	struct page *page;
+	struct bio *bio;
+	int dgs, ds, i, rr;
+	void *dig_in = mdev->int_dig_in;
+	void *dig_vv = mdev->int_dig_vv;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+	if (dgs) {
+		rr = drbd_recv(mdev, dig_in, dgs);
+		if (rr != dgs) {
+			dev_warn(DEV, "short read receiving data digest: read %d expected %d\n",
+			     rr, dgs);
+			return NULL;
+		}
+	}
+
+	data_size -= dgs;
+
+	ERR_IF(data_size &  0x1ff) return NULL;
+	ERR_IF(data_size >  DRBD_MAX_SEGMENT_SIZE) return NULL;
+
+	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
+	if (!e)
+		return NULL;
+	bio = e->private_bio;
+	ds = data_size;
+	bio_for_each_segment(bvec, bio, i) {
+		page = bvec->bv_page;
+		rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE));
+		kunmap(page);
+		if (rr != min_t(int, ds, PAGE_SIZE)) {
+			drbd_free_ee(mdev, e);
+			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+			     rr, min_t(int, ds, PAGE_SIZE));
+			return NULL;
+		}
+		ds -= rr;
+	}
+
+	if (dgs) {
+		drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		if (memcmp(dig_in, dig_vv, dgs)) {
+			dev_err(DEV, "Digest integrity check FAILED.\n");
+			drbd_bcast_ee(mdev, "digest failed",
+					dgs, dig_in, dig_vv, e);
+			drbd_free_ee(mdev, e);
+			return NULL;
+		}
+	}
+	mdev->recv_cnt += data_size>>9;
+	return e;
+}
+
+/* drbd_drain_block() just takes a data block
+ * out of the socket input buffer, and discards it.
+ */
+static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
+{
+	struct page *page;
+	int rr, rv = 1;
+	void *data;
+
+	page = drbd_pp_alloc(mdev, 1);
+
+	data = kmap(page);
+	while (data_size) {
+		rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
+		if (rr != min_t(int, data_size, PAGE_SIZE)) {
+			rv = 0;
+			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+			     rr, min_t(int, data_size, PAGE_SIZE));
+			break;
+		}
+		data_size -= rr;
+	}
+	kunmap(page);
+	drbd_pp_free(mdev, page);
+	return rv;
+}
+
+static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
+			   sector_t sector, int data_size)
+{
+	struct bio_vec *bvec;
+	struct bio *bio;
+	int dgs, rr, i, expect;
+	void *dig_in = mdev->int_dig_in;
+	void *dig_vv = mdev->int_dig_vv;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+	if (dgs) {
+		rr = drbd_recv(mdev, dig_in, dgs);
+		if (rr != dgs) {
+			dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n",
+			     rr, dgs);
+			return 0;
+		}
+	}
+
+	data_size -= dgs;
+
+	/* optimistically update recv_cnt.  if receiving fails below,
+	 * we disconnect anyways, and counters will be reset. */
+	mdev->recv_cnt += data_size>>9;
+
+	bio = req->master_bio;
+	D_ASSERT(sector == bio->bi_sector);
+
+	bio_for_each_segment(bvec, bio, i) {
+		expect = min_t(int, data_size, bvec->bv_len);
+		rr = drbd_recv(mdev,
+			     kmap(bvec->bv_page)+bvec->bv_offset,
+			     expect);
+		kunmap(bvec->bv_page);
+		if (rr != expect) {
+			dev_warn(DEV, "short read receiving data reply: "
+			     "read %d expected %d\n",
+			     rr, expect);
+			return 0;
+		}
+		data_size -= rr;
+	}
+
+	if (dgs) {
+		drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		if (memcmp(dig_in, dig_vv, dgs)) {
+			dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
+			return 0;
+		}
+	}
+
+	D_ASSERT(data_size == 0);
+	return 1;
+}
+
+/* e_end_resync_block() is called via
+ * drbd_process_done_ee() by asender only */
+static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	sector_t sector = e->sector;
+	int ok;
+
+	D_ASSERT(hlist_unhashed(&e->colision));
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		drbd_set_in_sync(mdev, sector, e->size);
+		ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
+	} else {
+		/* Record failure to sync */
+		drbd_rs_failed_io(mdev, sector, e->size);
+
+		ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+	}
+	dec_unacked(mdev);
+
+	return ok;
+}
+
+static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
+{
+	struct drbd_epoch_entry *e;
+
+	e = read_in_block(mdev, ID_SYNCER, sector, data_size);
+	if (!e) {
+		put_ldev(mdev);
+		return FALSE;
+	}
+
+	dec_rs_pending(mdev);
+
+	e->private_bio->bi_end_io = drbd_endio_write_sec;
+	e->private_bio->bi_rw = WRITE;
+	e->w.cb = e_end_resync_block;
+
+	inc_unacked(mdev);
+	/* corresponding dec_unacked() in e_end_resync_block()
+	 * respective _drbd_clear_done_ee */
+
+	spin_lock_irq(&mdev->req_lock);
+	list_add(&e->w.list, &mdev->sync_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	trace_drbd_ee(mdev, e, "submitting for (rs)write");
+	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
+	drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio);
+	/* accounting done in endio */
+
+	maybe_kick_lo(mdev);
+	return TRUE;
+}
+
+static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct drbd_request *req;
+	sector_t sector;
+	unsigned int header_size, data_size;
+	int ok;
+	struct p_data *p = (struct p_data *)h;
+
+	header_size = sizeof(*p) - sizeof(*h);
+	data_size   = h->length  - header_size;
+
+	ERR_IF(data_size == 0) return FALSE;
+
+	if (drbd_recv(mdev, h->payload, header_size) != header_size)
+		return FALSE;
+
+	sector = be64_to_cpu(p->sector);
+
+	spin_lock_irq(&mdev->req_lock);
+	req = _ar_id_to_req(mdev, p->block_id, sector);
+	spin_unlock_irq(&mdev->req_lock);
+	if (unlikely(!req)) {
+		dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
+		return FALSE;
+	}
+
+	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+	 * special casing it there for the various failure cases.
+	 * still no race with drbd_fail_pending_reads */
+	ok = recv_dless_read(mdev, req, sector, data_size);
+
+	if (ok)
+		req_mod(req, data_received);
+	/* else: nothing. handled from drbd_disconnect...
+	 * I don't think we may complete this just yet
+	 * in case we are "on-disconnect: freeze" */
+
+	return ok;
+}
+
+static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
+{
+	sector_t sector;
+	unsigned int header_size, data_size;
+	int ok;
+	struct p_data *p = (struct p_data *)h;
+
+	header_size = sizeof(*p) - sizeof(*h);
+	data_size   = h->length  - header_size;
+
+	ERR_IF(data_size == 0) return FALSE;
+
+	if (drbd_recv(mdev, h->payload, header_size) != header_size)
+		return FALSE;
+
+	sector = be64_to_cpu(p->sector);
+	D_ASSERT(p->block_id == ID_SYNCER);
+
+	if (get_ldev(mdev)) {
+		/* data is submitted to disk within recv_resync_read.
+		 * corresponding put_ldev done below on error,
+		 * or in drbd_endio_write_sec. */
+		ok = recv_resync_read(mdev, sector, data_size);
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Can not write resync data to local disk.\n");
+
+		ok = drbd_drain_block(mdev, data_size);
+
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+	}
+
+	return ok;
+}
+
+/* e_end_block() is called via drbd_process_done_ee().
+ * this means this function only runs in the asender thread
+ */
+static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	sector_t sector = e->sector;
+	struct drbd_epoch *epoch;
+	int ok = 1, pcmd;
+
+	if (e->flags & EE_IS_BARRIER) {
+		epoch = previous_epoch(mdev, e->epoch);
+		if (epoch)
+			drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
+	}
+
+	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
+		if (likely(drbd_bio_uptodate(e->private_bio))) {
+			pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
+				mdev->state.conn <= C_PAUSED_SYNC_T &&
+				e->flags & EE_MAY_SET_IN_SYNC) ?
+				P_RS_WRITE_ACK : P_WRITE_ACK;
+			ok &= drbd_send_ack(mdev, pcmd, e);
+			if (pcmd == P_RS_WRITE_ACK)
+				drbd_set_in_sync(mdev, sector, e->size);
+		} else {
+			ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+			/* we expect it to be marked out of sync anyways...
+			 * maybe assert this?  */
+		}
+		dec_unacked(mdev);
+	}
+	/* we delete from the conflict detection hash _after_ we sent out the
+	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
+	if (mdev->net_conf->two_primaries) {
+		spin_lock_irq(&mdev->req_lock);
+		D_ASSERT(!hlist_unhashed(&e->colision));
+		hlist_del_init(&e->colision);
+		spin_unlock_irq(&mdev->req_lock);
+	} else {
+		D_ASSERT(hlist_unhashed(&e->colision));
+	}
+
+	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+
+	return ok;
+}
+
+static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	int ok = 1;
+
+	D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
+
+	spin_lock_irq(&mdev->req_lock);
+	D_ASSERT(!hlist_unhashed(&e->colision));
+	hlist_del_init(&e->colision);
+	spin_unlock_irq(&mdev->req_lock);
+
+	dec_unacked(mdev);
+
+	return ok;
+}
+
+/* Called from receive_Data.
+ * Synchronize packets on sock with packets on msock.
+ *
+ * This is here so even when a P_DATA packet traveling via sock overtook an Ack
+ * packet traveling on msock, they are still processed in the order they have
+ * been sent.
+ *
+ * Note: we don't care for Ack packets overtaking P_DATA packets.
+ *
+ * In case packet_seq is larger than mdev->peer_seq number, there are
+ * outstanding packets on the msock. We wait for them to arrive.
+ * In case we are the logically next packet, we update mdev->peer_seq
+ * ourselves. Correctly handles 32bit wrap around.
+ *
+ * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
+ * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
+ * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
+ * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
+ *
+ * returns 0 if we may process the packet,
+ * -ERESTARTSYS if we were interrupted (by disconnect signal). */
+static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
+{
+	DEFINE_WAIT(wait);
+	unsigned int p_seq;
+	long timeout;
+	int ret = 0;
+	spin_lock(&mdev->peer_seq_lock);
+	for (;;) {
+		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
+		if (seq_le(packet_seq, mdev->peer_seq+1))
+			break;
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		p_seq = mdev->peer_seq;
+		spin_unlock(&mdev->peer_seq_lock);
+		timeout = schedule_timeout(30*HZ);
+		spin_lock(&mdev->peer_seq_lock);
+		if (timeout == 0 && p_seq == mdev->peer_seq) {
+			ret = -ETIMEDOUT;
+			dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
+			break;
+		}
+	}
+	finish_wait(&mdev->seq_wait, &wait);
+	if (mdev->peer_seq+1 == packet_seq)
+		mdev->peer_seq++;
+	spin_unlock(&mdev->peer_seq_lock);
+	return ret;
+}
+
+/* mirrored write */
+static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
+{
+	sector_t sector;
+	struct drbd_epoch_entry *e;
+	struct p_data *p = (struct p_data *)h;
+	int header_size, data_size;
+	int rw = WRITE;
+	u32 dp_flags;
+
+	header_size = sizeof(*p) - sizeof(*h);
+	data_size   = h->length  - header_size;
+
+	ERR_IF(data_size == 0) return FALSE;
+
+	if (drbd_recv(mdev, h->payload, header_size) != header_size)
+		return FALSE;
+
+	if (!get_ldev(mdev)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Can not write mirrored data block "
+			    "to local disk.\n");
+		spin_lock(&mdev->peer_seq_lock);
+		if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
+			mdev->peer_seq++;
+		spin_unlock(&mdev->peer_seq_lock);
+
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+		atomic_inc(&mdev->current_epoch->epoch_size);
+		return drbd_drain_block(mdev, data_size);
+	}
+
+	/* get_ldev(mdev) successful.
+	 * Corresponding put_ldev done either below (on various errors),
+	 * or in drbd_endio_write_sec, if we successfully submit the data at
+	 * the end of this function. */
+
+	sector = be64_to_cpu(p->sector);
+	e = read_in_block(mdev, p->block_id, sector, data_size);
+	if (!e) {
+		put_ldev(mdev);
+		return FALSE;
+	}
+
+	e->private_bio->bi_end_io = drbd_endio_write_sec;
+	e->w.cb = e_end_block;
+
+	spin_lock(&mdev->epoch_lock);
+	e->epoch = mdev->current_epoch;
+	atomic_inc(&e->epoch->epoch_size);
+	atomic_inc(&e->epoch->active);
+
+	if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
+		struct drbd_epoch *epoch;
+		/* Issue a barrier if we start a new epoch, and the previous epoch
+		   was not a epoch containing a single request which already was
+		   a Barrier. */
+		epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
+		if (epoch == e->epoch) {
+			set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
+			trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER);
+			rw |= (1<<BIO_RW_BARRIER);
+			e->flags |= EE_IS_BARRIER;
+		} else {
+			if (atomic_read(&epoch->epoch_size) > 1 ||
+			    !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
+				set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+				trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI);
+				set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
+				trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER);
+				rw |= (1<<BIO_RW_BARRIER);
+				e->flags |= EE_IS_BARRIER;
+			}
+		}
+	}
+	spin_unlock(&mdev->epoch_lock);
+
+	dp_flags = be32_to_cpu(p->dp_flags);
+	if (dp_flags & DP_HARDBARRIER) {
+		dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
+		/* rw |= (1<<BIO_RW_BARRIER); */
+	}
+	if (dp_flags & DP_RW_SYNC)
+		rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+	if (dp_flags & DP_MAY_SET_IN_SYNC)
+		e->flags |= EE_MAY_SET_IN_SYNC;
+
+	/* I'm the receiver, I do hold a net_cnt reference. */
+	if (!mdev->net_conf->two_primaries) {
+		spin_lock_irq(&mdev->req_lock);
+	} else {
+		/* don't get the req_lock yet,
+		 * we may sleep in drbd_wait_peer_seq */
+		const int size = e->size;
+		const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+		DEFINE_WAIT(wait);
+		struct drbd_request *i;
+		struct hlist_node *n;
+		struct hlist_head *slot;
+		int first;
+
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		BUG_ON(mdev->ee_hash == NULL);
+		BUG_ON(mdev->tl_hash == NULL);
+
+		/* conflict detection and handling:
+		 * 1. wait on the sequence number,
+		 *    in case this data packet overtook ACK packets.
+		 * 2. check our hash tables for conflicting requests.
+		 *    we only need to walk the tl_hash, since an ee can not
+		 *    have a conflict with an other ee: on the submitting
+		 *    node, the corresponding req had already been conflicting,
+		 *    and a conflicting req is never sent.
+		 *
+		 * Note: for two_primaries, we are protocol C,
+		 * so there cannot be any request that is DONE
+		 * but still on the transfer log.
+		 *
+		 * unconditionally add to the ee_hash.
+		 *
+		 * if no conflicting request is found:
+		 *    submit.
+		 *
+		 * if any conflicting request is found
+		 * that has not yet been acked,
+		 * AND I have the "discard concurrent writes" flag:
+		 *	 queue (via done_ee) the P_DISCARD_ACK; OUT.
+		 *
+		 * if any conflicting request is found:
+		 *	 block the receiver, waiting on misc_wait
+		 *	 until no more conflicting requests are there,
+		 *	 or we get interrupted (disconnect).
+		 *
+		 *	 we do not just write after local io completion of those
+		 *	 requests, but only after req is done completely, i.e.
+		 *	 we wait for the P_DISCARD_ACK to arrive!
+		 *
+		 *	 then proceed normally, i.e. submit.
+		 */
+		if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
+			goto out_interrupted;
+
+		spin_lock_irq(&mdev->req_lock);
+
+		hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+		slot = tl_hash_slot(mdev, sector);
+		first = 1;
+		for (;;) {
+			int have_unacked = 0;
+			int have_conflict = 0;
+			prepare_to_wait(&mdev->misc_wait, &wait,
+				TASK_INTERRUPTIBLE);
+			hlist_for_each_entry(i, n, slot, colision) {
+				if (OVERLAPS) {
+					/* only ALERT on first iteration,
+					 * we may be woken up early... */
+					if (first)
+						dev_alert(DEV, "%s[%u] Concurrent local write detected!"
+						      "	new: %llus +%u; pending: %llus +%u\n",
+						      current->comm, current->pid,
+						      (unsigned long long)sector, size,
+						      (unsigned long long)i->sector, i->size);
+					if (i->rq_state & RQ_NET_PENDING)
+						++have_unacked;
+					++have_conflict;
+				}
+			}
+#undef OVERLAPS
+			if (!have_conflict)
+				break;
+
+			/* Discard Ack only for the _first_ iteration */
+			if (first && discard && have_unacked) {
+				dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
+				     (unsigned long long)sector);
+				inc_unacked(mdev);
+				e->w.cb = e_send_discard_ack;
+				list_add_tail(&e->w.list, &mdev->done_ee);
+
+				spin_unlock_irq(&mdev->req_lock);
+
+				/* we could probably send that P_DISCARD_ACK ourselves,
+				 * but I don't like the receiver using the msock */
+
+				put_ldev(mdev);
+				wake_asender(mdev);
+				finish_wait(&mdev->misc_wait, &wait);
+				return TRUE;
+			}
+
+			if (signal_pending(current)) {
+				hlist_del_init(&e->colision);
+
+				spin_unlock_irq(&mdev->req_lock);
+
+				finish_wait(&mdev->misc_wait, &wait);
+				goto out_interrupted;
+			}
+
+			spin_unlock_irq(&mdev->req_lock);
+			if (first) {
+				first = 0;
+				dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
+				     "sec=%llus\n", (unsigned long long)sector);
+			} else if (discard) {
+				/* we had none on the first iteration.
+				 * there must be none now. */
+				D_ASSERT(have_unacked == 0);
+			}
+			schedule();
+			spin_lock_irq(&mdev->req_lock);
+		}
+		finish_wait(&mdev->misc_wait, &wait);
+	}
+
+	list_add(&e->w.list, &mdev->active_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	switch (mdev->net_conf->wire_protocol) {
+	case DRBD_PROT_C:
+		inc_unacked(mdev);
+		/* corresponding dec_unacked() in e_end_block()
+		 * respective _drbd_clear_done_ee */
+		break;
+	case DRBD_PROT_B:
+		/* I really don't like it that the receiver thread
+		 * sends on the msock, but anyways */
+		drbd_send_ack(mdev, P_RECV_ACK, e);
+		break;
+	case DRBD_PROT_A:
+		/* nothing to do */
+		break;
+	}
+
+	if (mdev->state.pdsk == D_DISKLESS) {
+		/* In case we have the only disk of the cluster, */
+		drbd_set_out_of_sync(mdev, e->sector, e->size);
+		e->flags |= EE_CALL_AL_COMPLETE_IO;
+		drbd_al_begin_io(mdev, e->sector);
+	}
+
+	e->private_bio->bi_rw = rw;
+	trace_drbd_ee(mdev, e, "submitting for (data)write");
+	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
+	drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio);
+	/* accounting done in endio */
+
+	maybe_kick_lo(mdev);
+	return TRUE;
+
+out_interrupted:
+	/* yes, the epoch_size now is imbalanced.
+	 * but we drop the connection anyways, so we don't have a chance to
+	 * receive a barrier... atomic_inc(&mdev->epoch_size); */
+	put_ldev(mdev);
+	drbd_free_ee(mdev, e);
+	return FALSE;
+}
+
+static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
+{
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	struct drbd_epoch_entry *e;
+	struct digest_info *di = NULL;
+	int size, digest_size;
+	unsigned int fault_type;
+	struct p_block_req *p =
+		(struct p_block_req *)h;
+	const int brps = sizeof(*p)-sizeof(*h);
+
+	if (drbd_recv(mdev, h->payload, brps) != brps)
+		return FALSE;
+
+	sector = be64_to_cpu(p->sector);
+	size   = be32_to_cpu(p->blksize);
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+				(unsigned long long)sector, size);
+		return FALSE;
+	}
+	if (sector + (size>>9) > capacity) {
+		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+				(unsigned long long)sector, size);
+		return FALSE;
+	}
+
+	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Can not satisfy peer's read request, "
+			    "no local data.\n");
+		drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY :
+				 P_NEG_RS_DREPLY , p);
+		return TRUE;
+	}
+
+	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
+	if (!e) {
+		put_ldev(mdev);
+		return FALSE;
+	}
+
+	e->private_bio->bi_rw = READ;
+	e->private_bio->bi_end_io = drbd_endio_read_sec;
+
+	switch (h->command) {
+	case P_DATA_REQUEST:
+		e->w.cb = w_e_end_data_req;
+		fault_type = DRBD_FAULT_DT_RD;
+		break;
+	case P_RS_DATA_REQUEST:
+		e->w.cb = w_e_end_rsdata_req;
+		fault_type = DRBD_FAULT_RS_RD;
+		/* Eventually this should become asynchronously. Currently it
+		 * blocks the whole receiver just to delay the reading of a
+		 * resync data block.
+		 * the drbd_work_queue mechanism is made for this...
+		 */
+		if (!drbd_rs_begin_io(mdev, sector)) {
+			/* we have been interrupted,
+			 * probably connection lost! */
+			D_ASSERT(signal_pending(current));
+			goto out_free_e;
+		}
+		break;
+
+	case P_OV_REPLY:
+	case P_CSUM_RS_REQUEST:
+		fault_type = DRBD_FAULT_RS_RD;
+		digest_size = h->length - brps ;
+		di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
+		if (!di)
+			goto out_free_e;
+
+		di->digest_size = digest_size;
+		di->digest = (((char *)di)+sizeof(struct digest_info));
+
+		if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
+			goto out_free_e;
+
+		e->block_id = (u64)(unsigned long)di;
+		if (h->command == P_CSUM_RS_REQUEST) {
+			D_ASSERT(mdev->agreed_pro_version >= 89);
+			e->w.cb = w_e_end_csum_rs_req;
+		} else if (h->command == P_OV_REPLY) {
+			e->w.cb = w_e_end_ov_reply;
+			dec_rs_pending(mdev);
+			break;
+		}
+
+		if (!drbd_rs_begin_io(mdev, sector)) {
+			/* we have been interrupted, probably connection lost! */
+			D_ASSERT(signal_pending(current));
+			goto out_free_e;
+		}
+		break;
+
+	case P_OV_REQUEST:
+		if (mdev->state.conn >= C_CONNECTED &&
+		    mdev->state.conn != C_VERIFY_T)
+			dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
+				drbd_conn_str(mdev->state.conn));
+		if (mdev->ov_start_sector == ~(sector_t)0 &&
+		    mdev->agreed_pro_version >= 90) {
+			mdev->ov_start_sector = sector;
+			mdev->ov_position = sector;
+			mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector);
+			dev_info(DEV, "Online Verify start sector: %llu\n",
+					(unsigned long long)sector);
+		}
+		e->w.cb = w_e_end_ov_req;
+		fault_type = DRBD_FAULT_RS_RD;
+		/* Eventually this should become asynchronous. Currently it
+		 * blocks the whole receiver just to delay the reading of a
+		 * resync data block.
+		 * the drbd_work_queue mechanism is made for this...
+		 */
+		if (!drbd_rs_begin_io(mdev, sector)) {
+			/* we have been interrupted,
+			 * probably connection lost! */
+			D_ASSERT(signal_pending(current));
+			goto out_free_e;
+		}
+		break;
+
+
+	default:
+		dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+		    cmdname(h->command));
+		fault_type = DRBD_FAULT_MAX;
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	list_add(&e->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	inc_unacked(mdev);
+
+	trace_drbd_ee(mdev, e, "submitting for read");
+	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
+	drbd_generic_make_request(mdev, fault_type, e->private_bio);
+	maybe_kick_lo(mdev);
+
+	return TRUE;
+
+out_free_e:
+	kfree(di);
+	put_ldev(mdev);
+	drbd_free_ee(mdev, e);
+	return FALSE;
+}
+
+static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int self, peer, rv = -100;
+	unsigned long ch_self, ch_peer;
+
+	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+	peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+	ch_peer = mdev->p_uuid[UI_SIZE];
+	ch_self = mdev->comm_bm_set;
+
+	switch (mdev->net_conf->after_sb_0p) {
+	case ASB_CONSENSUS:
+	case ASB_DISCARD_SECONDARY:
+	case ASB_CALL_HELPER:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_DISCARD_YOUNGER_PRI:
+		if (self == 0 && peer == 1) {
+			rv = -1;
+			break;
+		}
+		if (self == 1 && peer == 0) {
+			rv =  1;
+			break;
+		}
+		/* Else fall through to one of the other strategies... */
+	case ASB_DISCARD_OLDER_PRI:
+		if (self == 0 && peer == 1) {
+			rv = 1;
+			break;
+		}
+		if (self == 1 && peer == 0) {
+			rv = -1;
+			break;
+		}
+		/* Else fall through to one of the other strategies... */
+		dev_warn(DEV, "Discard younger/older primary did not found a decision\n"
+		     "Using discard-least-changes instead\n");
+	case ASB_DISCARD_ZERO_CHG:
+		if (ch_peer == 0 && ch_self == 0) {
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+				? -1 : 1;
+			break;
+		} else {
+			if (ch_peer == 0) { rv =  1; break; }
+			if (ch_self == 0) { rv = -1; break; }
+		}
+		if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
+			break;
+	case ASB_DISCARD_LEAST_CHG:
+		if	(ch_self < ch_peer)
+			rv = -1;
+		else if (ch_self > ch_peer)
+			rv =  1;
+		else /* ( ch_self == ch_peer ) */
+		     /* Well, then use something else. */
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+				? -1 : 1;
+		break;
+	case ASB_DISCARD_LOCAL:
+		rv = -1;
+		break;
+	case ASB_DISCARD_REMOTE:
+		rv =  1;
+	}
+
+	return rv;
+}
+
+static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int self, peer, hg, rv = -100;
+
+	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+	peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+	switch (mdev->net_conf->after_sb_1p) {
+	case ASB_DISCARD_YOUNGER_PRI:
+	case ASB_DISCARD_OLDER_PRI:
+	case ASB_DISCARD_LEAST_CHG:
+	case ASB_DISCARD_LOCAL:
+	case ASB_DISCARD_REMOTE:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_CONSENSUS:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1 && mdev->state.role == R_SECONDARY)
+			rv = hg;
+		if (hg == 1  && mdev->state.role == R_PRIMARY)
+			rv = hg;
+		break;
+	case ASB_VIOLENTLY:
+		rv = drbd_asb_recover_0p(mdev);
+		break;
+	case ASB_DISCARD_SECONDARY:
+		return mdev->state.role == R_PRIMARY ? 1 : -1;
+	case ASB_CALL_HELPER:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1 && mdev->state.role == R_PRIMARY) {
+			self = drbd_set_role(mdev, R_SECONDARY, 0);
+			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+			  * we might be here in C_WF_REPORT_PARAMS which is transient.
+			  * we do not need to wait for the after state change work either. */
+			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (self != SS_SUCCESS) {
+				drbd_khelper(mdev, "pri-lost-after-sb");
+			} else {
+				dev_warn(DEV, "Successfully gave up primary role.\n");
+				rv = hg;
+			}
+		} else
+			rv = hg;
+	}
+
+	return rv;
+}
+
+static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int self, peer, hg, rv = -100;
+
+	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+	peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+	switch (mdev->net_conf->after_sb_2p) {
+	case ASB_DISCARD_YOUNGER_PRI:
+	case ASB_DISCARD_OLDER_PRI:
+	case ASB_DISCARD_LEAST_CHG:
+	case ASB_DISCARD_LOCAL:
+	case ASB_DISCARD_REMOTE:
+	case ASB_CONSENSUS:
+	case ASB_DISCARD_SECONDARY:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_VIOLENTLY:
+		rv = drbd_asb_recover_0p(mdev);
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_CALL_HELPER:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1) {
+			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+			  * we might be here in C_WF_REPORT_PARAMS which is transient.
+			  * we do not need to wait for the after state change work either. */
+			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (self != SS_SUCCESS) {
+				drbd_khelper(mdev, "pri-lost-after-sb");
+			} else {
+				dev_warn(DEV, "Successfully gave up primary role.\n");
+				rv = hg;
+			}
+		} else
+			rv = hg;
+	}
+
+	return rv;
+}
+
+static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
+			   u64 bits, u64 flags)
+{
+	if (!uuid) {
+		dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
+		return;
+	}
+	dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
+	     text,
+	     (unsigned long long)uuid[UI_CURRENT],
+	     (unsigned long long)uuid[UI_BITMAP],
+	     (unsigned long long)uuid[UI_HISTORY_START],
+	     (unsigned long long)uuid[UI_HISTORY_END],
+	     (unsigned long long)bits,
+	     (unsigned long long)flags);
+}
+
+/*
+  100	after split brain try auto recover
+    2	C_SYNC_SOURCE set BitMap
+    1	C_SYNC_SOURCE use BitMap
+    0	no Sync
+   -1	C_SYNC_TARGET use BitMap
+   -2	C_SYNC_TARGET set BitMap
+ -100	after split brain, disconnect
+-1000	unrelated data
+ */
+static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
+{
+	u64 self, peer;
+	int i, j;
+
+	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+
+	*rule_nr = 10;
+	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
+		return 0;
+
+	*rule_nr = 20;
+	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
+	     peer != UUID_JUST_CREATED)
+		return -2;
+
+	*rule_nr = 30;
+	if (self != UUID_JUST_CREATED &&
+	    (peer == UUID_JUST_CREATED || peer == (u64)0))
+		return 2;
+
+	if (self == peer) {
+		int rct, dc; /* roles at crash time */
+
+		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
+
+			if (mdev->agreed_pro_version < 91)
+				return -1001;
+
+			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
+			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
+				dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
+				drbd_uuid_set_bm(mdev, 0UL);
+
+				drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+					       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+				*rule_nr = 34;
+			} else {
+				dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
+				*rule_nr = 36;
+			}
+
+			return 1;
+		}
+
+		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
+
+			if (mdev->agreed_pro_version < 91)
+				return -1001;
+
+			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
+			    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
+				dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
+
+				mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
+				mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
+				mdev->p_uuid[UI_BITMAP] = 0UL;
+
+				drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+				*rule_nr = 35;
+			} else {
+				dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
+				*rule_nr = 37;
+			}
+
+			return -1;
+		}
+
+		/* Common power [off|failure] */
+		rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
+			(mdev->p_uuid[UI_FLAGS] & 2);
+		/* lowest bit is set when we were primary,
+		 * next bit (weight 2) is set when peer was primary */
+		*rule_nr = 40;
+
+		switch (rct) {
+		case 0: /* !self_pri && !peer_pri */ return 0;
+		case 1: /*  self_pri && !peer_pri */ return 1;
+		case 2: /* !self_pri &&  peer_pri */ return -1;
+		case 3: /*  self_pri &&  peer_pri */
+			dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+			return dc ? -1 : 1;
+		}
+	}
+
+	*rule_nr = 50;
+	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+	if (self == peer)
+		return -1;
+
+	*rule_nr = 51;
+	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
+	if (self == peer) {
+		self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
+		peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1);
+		if (self == peer) {
+			/* The last P_SYNC_UUID did not get though. Undo the last start of
+			   resync as sync source modifications of the peer's UUIDs. */
+
+			if (mdev->agreed_pro_version < 91)
+				return -1001;
+
+			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
+			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
+			return -1;
+		}
+	}
+
+	*rule_nr = 60;
+	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		peer = mdev->p_uuid[i] & ~((u64)1);
+		if (self == peer)
+			return -2;
+	}
+
+	*rule_nr = 70;
+	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+	if (self == peer)
+		return 1;
+
+	*rule_nr = 71;
+	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
+	if (self == peer) {
+		self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1);
+		peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
+		if (self == peer) {
+			/* The last P_SYNC_UUID did not get though. Undo the last start of
+			   resync as sync source modifications of our UUIDs. */
+
+			if (mdev->agreed_pro_version < 91)
+				return -1001;
+
+			_drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
+			_drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
+
+			dev_info(DEV, "Undid last start of resync:\n");
+
+			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+				       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+
+			return 1;
+		}
+	}
+
+
+	*rule_nr = 80;
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		self = mdev->ldev->md.uuid[i] & ~((u64)1);
+		if (self == peer)
+			return 2;
+	}
+
+	*rule_nr = 90;
+	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+	if (self == peer && self != ((u64)0))
+		return 100;
+
+	*rule_nr = 100;
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		self = mdev->ldev->md.uuid[i] & ~((u64)1);
+		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
+			peer = mdev->p_uuid[j] & ~((u64)1);
+			if (self == peer)
+				return -100;
+		}
+	}
+
+	return -1000;
+}
+
+/* drbd_sync_handshake() returns the new conn state on success, or
+   CONN_MASK (-1) on failure.
+ */
+static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
+					   enum drbd_disk_state peer_disk) __must_hold(local)
+{
+	int hg, rule_nr;
+	enum drbd_conns rv = C_MASK;
+	enum drbd_disk_state mydisk;
+
+	mydisk = mdev->state.disk;
+	if (mydisk == D_NEGOTIATING)
+		mydisk = mdev->new_state_tmp.disk;
+
+	dev_info(DEV, "drbd_sync_handshake:\n");
+	drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
+	drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
+		       mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
+	hg = drbd_uuid_compare(mdev, &rule_nr);
+
+	dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
+
+	if (hg == -1000) {
+		dev_alert(DEV, "Unrelated data, aborting!\n");
+		return C_MASK;
+	}
+	if (hg == -1001) {
+		dev_alert(DEV, "To resolve this both sides have to support at least protocol\n");
+		return C_MASK;
+	}
+
+	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
+	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
+		int f = (hg == -100) || abs(hg) == 2;
+		hg = mydisk > D_INCONSISTENT ? 1 : -1;
+		if (f)
+			hg = hg*2;
+		dev_info(DEV, "Becoming sync %s due to disk states.\n",
+		     hg > 0 ? "source" : "target");
+	}
+
+	if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) {
+		int pcount = (mdev->state.role == R_PRIMARY)
+			   + (peer_role == R_PRIMARY);
+		int forced = (hg == -100);
+
+		switch (pcount) {
+		case 0:
+			hg = drbd_asb_recover_0p(mdev);
+			break;
+		case 1:
+			hg = drbd_asb_recover_1p(mdev);
+			break;
+		case 2:
+			hg = drbd_asb_recover_2p(mdev);
+			break;
+		}
+		if (abs(hg) < 100) {
+			dev_warn(DEV, "Split-Brain detected, %d primaries, "
+			     "automatically solved. Sync from %s node\n",
+			     pcount, (hg < 0) ? "peer" : "this");
+			if (forced) {
+				dev_warn(DEV, "Doing a full sync, since"
+				     " UUIDs where ambiguous.\n");
+				hg = hg*2;
+			}
+		}
+	}
+
+	if (hg == -100) {
+		if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+			hg = -1;
+		if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+			hg = 1;
+
+		if (abs(hg) < 100)
+			dev_warn(DEV, "Split-Brain detected, manually solved. "
+			     "Sync from %s node\n",
+			     (hg < 0) ? "peer" : "this");
+	}
+
+	if (hg == -100) {
+		dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
+		drbd_khelper(mdev, "split-brain");
+		return C_MASK;
+	}
+
+	if (hg > 0 && mydisk <= D_INCONSISTENT) {
+		dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
+		return C_MASK;
+	}
+
+	if (hg < 0 && /* by intention we do not use mydisk here. */
+	    mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
+		switch (mdev->net_conf->rr_conflict) {
+		case ASB_CALL_HELPER:
+			drbd_khelper(mdev, "pri-lost");
+			/* fall through */
+		case ASB_DISCONNECT:
+			dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
+			return C_MASK;
+		case ASB_VIOLENTLY:
+			dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
+			     "assumption\n");
+		}
+	}
+
+	if (abs(hg) >= 2) {
+		dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
+			return C_MASK;
+	}
+
+	if (hg > 0) { /* become sync source. */
+		rv = C_WF_BITMAP_S;
+	} else if (hg < 0) { /* become sync target */
+		rv = C_WF_BITMAP_T;
+	} else {
+		rv = C_CONNECTED;
+		if (drbd_bm_total_weight(mdev)) {
+			dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
+			     drbd_bm_total_weight(mdev));
+		}
+	}
+
+	return rv;
+}
+
+/* returns 1 if invalid */
+static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
+{
+	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
+	if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
+	    (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
+		return 0;
+
+	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
+	if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
+	    self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
+		return 1;
+
+	/* everything else is valid if they are equal on both sides. */
+	if (peer == self)
+		return 0;
+
+	/* everything es is invalid. */
+	return 1;
+}
+
+static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_protocol *p = (struct p_protocol *)h;
+	int header_size, data_size;
+	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
+	int p_want_lose, p_two_primaries;
+	char p_integrity_alg[SHARED_SECRET_MAX] = "";
+
+	header_size = sizeof(*p) - sizeof(*h);
+	data_size   = h->length  - header_size;
+
+	if (drbd_recv(mdev, h->payload, header_size) != header_size)
+		return FALSE;
+
+	p_proto		= be32_to_cpu(p->protocol);
+	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
+	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
+	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
+	p_want_lose	= be32_to_cpu(p->want_lose);
+	p_two_primaries = be32_to_cpu(p->two_primaries);
+
+	if (p_proto != mdev->net_conf->wire_protocol) {
+		dev_err(DEV, "incompatible communication protocols\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) {
+		dev_err(DEV, "incompatible after-sb-0pri settings\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) {
+		dev_err(DEV, "incompatible after-sb-1pri settings\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) {
+		dev_err(DEV, "incompatible after-sb-2pri settings\n");
+		goto disconnect;
+	}
+
+	if (p_want_lose && mdev->net_conf->want_lose) {
+		dev_err(DEV, "both sides have the 'want_lose' flag set\n");
+		goto disconnect;
+	}
+
+	if (p_two_primaries != mdev->net_conf->two_primaries) {
+		dev_err(DEV, "incompatible setting of the two-primaries options\n");
+		goto disconnect;
+	}
+
+	if (mdev->agreed_pro_version >= 87) {
+		unsigned char *my_alg = mdev->net_conf->integrity_alg;
+
+		if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
+			return FALSE;
+
+		p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
+		if (strcmp(p_integrity_alg, my_alg)) {
+			dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
+			goto disconnect;
+		}
+		dev_info(DEV, "data-integrity-alg: %s\n",
+		     my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
+	}
+
+	return TRUE;
+
+disconnect:
+	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	return FALSE;
+}
+
+/* helper function
+ * input: alg name, feature name
+ * return: NULL (alg name was "")
+ *         ERR_PTR(error) if something goes wrong
+ *         or the crypto hash ptr, if it worked out ok. */
+struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
+		const char *alg, const char *name)
+{
+	struct crypto_hash *tfm;
+
+	if (!alg[0])
+		return NULL;
+
+	tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
+			alg, name, PTR_ERR(tfm));
+		return tfm;
+	}
+	if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
+		crypto_free_hash(tfm);
+		dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
+		return ERR_PTR(-EINVAL);
+	}
+	return tfm;
+}
+
+static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
+{
+	int ok = TRUE;
+	struct p_rs_param_89 *p = (struct p_rs_param_89 *)h;
+	unsigned int header_size, data_size, exp_max_sz;
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+	const int apv = mdev->agreed_pro_version;
+
+	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
+		    : apv == 88 ? sizeof(struct p_rs_param)
+					+ SHARED_SECRET_MAX
+		    : /* 89 */    sizeof(struct p_rs_param_89);
+
+	if (h->length > exp_max_sz) {
+		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
+		    h->length, exp_max_sz);
+		return FALSE;
+	}
+
+	if (apv <= 88) {
+		header_size = sizeof(struct p_rs_param) - sizeof(*h);
+		data_size   = h->length  - header_size;
+	} else /* apv >= 89 */ {
+		header_size = sizeof(struct p_rs_param_89) - sizeof(*h);
+		data_size   = h->length  - header_size;
+		D_ASSERT(data_size == 0);
+	}
+
+	/* initialize verify_alg and csums_alg */
+	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+	if (drbd_recv(mdev, h->payload, header_size) != header_size)
+		return FALSE;
+
+	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+
+	if (apv >= 88) {
+		if (apv == 88) {
+			if (data_size > SHARED_SECRET_MAX) {
+				dev_err(DEV, "verify-alg too long, "
+				    "peer wants %u, accepting only %u byte\n",
+						data_size, SHARED_SECRET_MAX);
+				return FALSE;
+			}
+
+			if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
+				return FALSE;
+
+			/* we expect NUL terminated string */
+			/* but just in case someone tries to be evil */
+			D_ASSERT(p->verify_alg[data_size-1] == 0);
+			p->verify_alg[data_size-1] = 0;
+
+		} else /* apv >= 89 */ {
+			/* we still expect NUL terminated strings */
+			/* but just in case someone tries to be evil */
+			D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
+			D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
+			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
+			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
+		}
+
+		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
+				    mdev->sync_conf.verify_alg, p->verify_alg);
+				goto disconnect;
+			}
+			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
+					p->verify_alg, "verify-alg");
+			if (IS_ERR(verify_tfm)) {
+				verify_tfm = NULL;
+				goto disconnect;
+			}
+		}
+
+		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
+				    mdev->sync_conf.csums_alg, p->csums_alg);
+				goto disconnect;
+			}
+			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
+					p->csums_alg, "csums-alg");
+			if (IS_ERR(csums_tfm)) {
+				csums_tfm = NULL;
+				goto disconnect;
+			}
+		}
+
+
+		spin_lock(&mdev->peer_seq_lock);
+		/* lock against drbd_nl_syncer_conf() */
+		if (verify_tfm) {
+			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
+			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
+			crypto_free_hash(mdev->verify_tfm);
+			mdev->verify_tfm = verify_tfm;
+			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+		}
+		if (csums_tfm) {
+			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
+			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
+			crypto_free_hash(mdev->csums_tfm);
+			mdev->csums_tfm = csums_tfm;
+			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+		}
+		spin_unlock(&mdev->peer_seq_lock);
+	}
+
+	return ok;
+disconnect:
+	/* just for completeness: actually not needed,
+	 * as this is not reached if csums_tfm was ok. */
+	crypto_free_hash(csums_tfm);
+	/* but free the verify_tfm again, if csums_tfm did not work out */
+	crypto_free_hash(verify_tfm);
+	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	return FALSE;
+}
+
+static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
+{
+	/* sorry, we currently have no working implementation
+	 * of distributed TCQ */
+}
+
+/* warn if the arguments differ by more than 12.5% */
+static void warn_if_differ_considerably(struct drbd_conf *mdev,
+	const char *s, sector_t a, sector_t b)
+{
+	sector_t d;
+	if (a == 0 || b == 0)
+		return;
+	d = (a > b) ? (a - b) : (b - a);
+	if (d > (a>>3) || d > (b>>3))
+		dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
+		     (unsigned long long)a, (unsigned long long)b);
+}
+
+static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_sizes *p = (struct p_sizes *)h;
+	enum determine_dev_size dd = unchanged;
+	unsigned int max_seg_s;
+	sector_t p_size, p_usize, my_usize;
+	int ldsc = 0; /* local disk size changed */
+	enum drbd_conns nconn;
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+	if (drbd_recv(mdev, h->payload, h->length) != h->length)
+		return FALSE;
+
+	p_size = be64_to_cpu(p->d_size);
+	p_usize = be64_to_cpu(p->u_size);
+
+	if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
+		dev_err(DEV, "some backing storage is needed\n");
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return FALSE;
+	}
+
+	/* just store the peer's disk size for now.
+	 * we still need to figure out whether we accept that. */
+	mdev->p_size = p_size;
+
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+	if (get_ldev(mdev)) {
+		warn_if_differ_considerably(mdev, "lower level device sizes",
+			   p_size, drbd_get_max_capacity(mdev->ldev));
+		warn_if_differ_considerably(mdev, "user requested size",
+					    p_usize, mdev->ldev->dc.disk_size);
+
+		/* if this is the first connect, or an otherwise expected
+		 * param exchange, choose the minimum */
+		if (mdev->state.conn == C_WF_REPORT_PARAMS)
+			p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
+					     p_usize);
+
+		my_usize = mdev->ldev->dc.disk_size;
+
+		if (mdev->ldev->dc.disk_size != p_usize) {
+			mdev->ldev->dc.disk_size = p_usize;
+			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
+			     (unsigned long)mdev->ldev->dc.disk_size);
+		}
+
+		/* Never shrink a device with usable data during connect.
+		   But allow online shrinking if we are connected. */
+		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		   drbd_get_capacity(mdev->this_bdev) &&
+		   mdev->state.disk >= D_OUTDATED &&
+		   mdev->state.conn < C_CONNECTED) {
+			dev_err(DEV, "The peer's disk size is too small!\n");
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			mdev->ldev->dc.disk_size = my_usize;
+			put_ldev(mdev);
+			return FALSE;
+		}
+		put_ldev(mdev);
+	}
+#undef min_not_zero
+
+	if (get_ldev(mdev)) {
+		dd = drbd_determin_dev_size(mdev);
+		put_ldev(mdev);
+		if (dd == dev_size_error)
+			return FALSE;
+		drbd_md_sync(mdev);
+	} else {
+		/* I am diskless, need to accept the peer's size. */
+		drbd_set_my_capacity(mdev, p_size);
+	}
+
+	if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		nconn = drbd_sync_handshake(mdev,
+				mdev->state.peer, mdev->state.pdsk);
+		put_ldev(mdev);
+
+		if (nconn == C_MASK) {
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			return FALSE;
+		}
+
+		if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) {
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			return FALSE;
+		}
+	}
+
+	if (get_ldev(mdev)) {
+		if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+			mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+			ldsc = 1;
+		}
+
+		max_seg_s = be32_to_cpu(p->max_segment_size);
+		if (max_seg_s != queue_max_segment_size(mdev->rq_queue))
+			drbd_setup_queue_param(mdev, max_seg_s);
+
+		drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type));
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.conn > C_WF_REPORT_PARAMS) {
+		if (be64_to_cpu(p->c_size) !=
+		    drbd_get_capacity(mdev->this_bdev) || ldsc) {
+			/* we have different sizes, probably peer
+			 * needs to know my new size... */
+			drbd_send_sizes(mdev, 0);
+		}
+		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
+		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+			if (mdev->state.pdsk >= D_INCONSISTENT &&
+			    mdev->state.disk >= D_INCONSISTENT)
+				resync_after_online_grow(mdev);
+			else
+				set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+		}
+	}
+
+	return TRUE;
+}
+
+static int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_uuids *p = (struct p_uuids *)h;
+	u64 *p_uuid;
+	int i;
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+	if (drbd_recv(mdev, h->payload, h->length) != h->length)
+		return FALSE;
+
+	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+
+	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
+		p_uuid[i] = be64_to_cpu(p->uuid[i]);
+
+	kfree(mdev->p_uuid);
+	mdev->p_uuid = p_uuid;
+
+	if (mdev->state.conn < C_CONNECTED &&
+	    mdev->state.disk < D_INCONSISTENT &&
+	    mdev->state.role == R_PRIMARY &&
+	    (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
+		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
+		    (unsigned long long)mdev->ed_uuid);
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return FALSE;
+	}
+
+	if (get_ldev(mdev)) {
+		int skip_initial_sync =
+			mdev->state.conn == C_CONNECTED &&
+			mdev->agreed_pro_version >= 90 &&
+			mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
+			(p_uuid[UI_FLAGS] & 8);
+		if (skip_initial_sync) {
+			dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
+			drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+					"clear_n_write from receive_uuids");
+			_drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
+			_drbd_uuid_set(mdev, UI_BITMAP, 0);
+			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+					CS_VERBOSE, NULL);
+			drbd_md_sync(mdev);
+		}
+		put_ldev(mdev);
+	}
+
+	/* Before we test for the disk state, we should wait until an eventually
+	   ongoing cluster wide state change is finished. That is important if
+	   we are primary and are detaching from our disk. We need to see the
+	   new disk state... */
+	wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
+		drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+
+	return TRUE;
+}
+
+/**
+ * convert_state() - Converts the peer's view of the cluster state to our point of view
+ * @ps:		The state as seen by the peer.
+ */
+static union drbd_state convert_state(union drbd_state ps)
+{
+	union drbd_state ms;
+
+	static enum drbd_conns c_tab[] = {
+		[C_CONNECTED] = C_CONNECTED,
+
+		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
+		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
+		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
+		[C_VERIFY_S]       = C_VERIFY_T,
+		[C_MASK]   = C_MASK,
+	};
+
+	ms.i = ps.i;
+
+	ms.conn = c_tab[ps.conn];
+	ms.peer = ps.role;
+	ms.role = ps.peer;
+	ms.pdsk = ps.disk;
+	ms.disk = ps.pdsk;
+	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
+
+	return ms;
+}
+
+static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_req_state *p = (struct p_req_state *)h;
+	union drbd_state mask, val;
+	int rv;
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+	if (drbd_recv(mdev, h->payload, h->length) != h->length)
+		return FALSE;
+
+	mask.i = be32_to_cpu(p->mask);
+	val.i = be32_to_cpu(p->val);
+
+	if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
+	    test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
+		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
+		return TRUE;
+	}
+
+	mask = convert_state(mask);
+	val = convert_state(val);
+
+	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
+
+	drbd_send_sr_reply(mdev, rv);
+	drbd_md_sync(mdev);
+
+	return TRUE;
+}
+
+static int receive_state(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_state *p = (struct p_state *)h;
+	enum drbd_conns nconn, oconn;
+	union drbd_state ns, peer_state;
+	enum drbd_disk_state real_peer_disk;
+	int rv;
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h)))
+		return FALSE;
+
+	if (drbd_recv(mdev, h->payload, h->length) != h->length)
+		return FALSE;
+
+	peer_state.i = be32_to_cpu(p->state);
+
+	real_peer_disk = peer_state.disk;
+	if (peer_state.disk == D_NEGOTIATING) {
+		real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
+		dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+ retry:
+	oconn = nconn = mdev->state.conn;
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (nconn == C_WF_REPORT_PARAMS)
+		nconn = C_CONNECTED;
+
+	if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		int cr; /* consider resync */
+
+		/* if we established a new connection */
+		cr  = (oconn < C_CONNECTED);
+		/* if we had an established connection
+		 * and one of the nodes newly attaches a disk */
+		cr |= (oconn == C_CONNECTED &&
+		       (peer_state.disk == D_NEGOTIATING ||
+			mdev->state.disk == D_NEGOTIATING));
+		/* if we have both been inconsistent, and the peer has been
+		 * forced to be UpToDate with --overwrite-data */
+		cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
+		/* if we had been plain connected, and the admin requested to
+		 * start a sync by "invalidate" or "invalidate-remote" */
+		cr |= (oconn == C_CONNECTED &&
+				(peer_state.conn >= C_STARTING_SYNC_S &&
+				 peer_state.conn <= C_WF_BITMAP_T));
+
+		if (cr)
+			nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+
+		put_ldev(mdev);
+		if (nconn == C_MASK) {
+			if (mdev->state.disk == D_NEGOTIATING) {
+				drbd_force_state(mdev, NS(disk, D_DISKLESS));
+				nconn = C_CONNECTED;
+			} else if (peer_state.disk == D_NEGOTIATING) {
+				dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
+				peer_state.disk = D_DISKLESS;
+			} else {
+				D_ASSERT(oconn == C_WF_REPORT_PARAMS);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return FALSE;
+			}
+		}
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.conn != oconn)
+		goto retry;
+	clear_bit(CONSIDER_RESYNC, &mdev->flags);
+	ns.i = mdev->state.i;
+	ns.conn = nconn;
+	ns.peer = peer_state.role;
+	ns.pdsk = real_peer_disk;
+	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
+	if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+		ns.disk = mdev->new_state_tmp.disk;
+
+	rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL);
+	ns = mdev->state;
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (rv < SS_SUCCESS) {
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return FALSE;
+	}
+
+	if (oconn > C_WF_REPORT_PARAMS) {
+		if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+		    peer_state.disk != D_NEGOTIATING ) {
+			/* we want resync, peer has not yet decided to sync... */
+			/* Nowadays only used when forcing a node into primary role and
+			   setting its disk to UpToDate with that */
+			drbd_send_uuids(mdev);
+			drbd_send_state(mdev);
+		}
+	}
+
+	mdev->net_conf->want_lose = 0;
+
+	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
+
+	return TRUE;
+}
+
+static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_rs_uuid *p = (struct p_rs_uuid *)h;
+
+	wait_event(mdev->misc_wait,
+		   mdev->state.conn == C_WF_SYNC_UUID ||
+		   mdev->state.conn < C_CONNECTED ||
+		   mdev->state.disk < D_NEGOTIATING);
+
+	/* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
+
+	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+	if (drbd_recv(mdev, h->payload, h->length) != h->length)
+		return FALSE;
+
+	/* Here the _drbd_uuid_ functions are right, current should
+	   _not_ be rotated into the history */
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		_drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
+		_drbd_uuid_set(mdev, UI_BITMAP, 0UL);
+
+		drbd_start_resync(mdev, C_SYNC_TARGET);
+
+		put_ldev(mdev);
+	} else
+		dev_err(DEV, "Ignoring SyncUUID packet!\n");
+
+	return TRUE;
+}
+
+enum receive_bitmap_ret { OK, DONE, FAILED };
+
+static enum receive_bitmap_ret
+receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h,
+	unsigned long *buffer, struct bm_xfer_ctx *c)
+{
+	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+	unsigned want = num_words * sizeof(long);
+
+	if (want != h->length) {
+		dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length);
+		return FAILED;
+	}
+	if (want == 0)
+		return DONE;
+	if (drbd_recv(mdev, buffer, want) != want)
+		return FAILED;
+
+	drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
+
+	c->word_offset += num_words;
+	c->bit_offset = c->word_offset * BITS_PER_LONG;
+	if (c->bit_offset > c->bm_bits)
+		c->bit_offset = c->bm_bits;
+
+	return OK;
+}
+
+static enum receive_bitmap_ret
+recv_bm_rle_bits(struct drbd_conf *mdev,
+		struct p_compressed_bm *p,
+		struct bm_xfer_ctx *c)
+{
+	struct bitstream bs;
+	u64 look_ahead;
+	u64 rl;
+	u64 tmp;
+	unsigned long s = c->bit_offset;
+	unsigned long e;
+	int len = p->head.length - (sizeof(*p) - sizeof(p->head));
+	int toggle = DCBP_get_start(p);
+	int have;
+	int bits;
+
+	bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
+
+	bits = bitstream_get_bits(&bs, &look_ahead, 64);
+	if (bits < 0)
+		return FAILED;
+
+	for (have = bits; have > 0; s += rl, toggle = !toggle) {
+		bits = vli_decode_bits(&rl, look_ahead);
+		if (bits <= 0)
+			return FAILED;
+
+		if (toggle) {
+			e = s + rl -1;
+			if (e >= c->bm_bits) {
+				dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
+				return FAILED;
+			}
+			_drbd_bm_set_bits(mdev, s, e);
+		}
+
+		if (have < bits) {
+			dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
+				have, bits, look_ahead,
+				(unsigned int)(bs.cur.b - p->code),
+				(unsigned int)bs.buf_len);
+			return FAILED;
+		}
+		look_ahead >>= bits;
+		have -= bits;
+
+		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
+		if (bits < 0)
+			return FAILED;
+		look_ahead |= tmp << have;
+		have += bits;
+	}
+
+	c->bit_offset = s;
+	bm_xfer_ctx_bit_to_word_offset(c);
+
+	return (s == c->bm_bits) ? DONE : OK;
+}
+
+static enum receive_bitmap_ret
+decode_bitmap_c(struct drbd_conf *mdev,
+		struct p_compressed_bm *p,
+		struct bm_xfer_ctx *c)
+{
+	if (DCBP_get_code(p) == RLE_VLI_Bits)
+		return recv_bm_rle_bits(mdev, p, c);
+
+	/* other variants had been implemented for evaluation,
+	 * but have been dropped as this one turned out to be "best"
+	 * during all our tests. */
+
+	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
+	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	return FAILED;
+}
+
+void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+		const char *direction, struct bm_xfer_ctx *c)
+{
+	/* what would it take to transfer it "plaintext" */
+	unsigned plain = sizeof(struct p_header) *
+		((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
+		+ c->bm_words * sizeof(long);
+	unsigned total = c->bytes[0] + c->bytes[1];
+	unsigned r;
+
+	/* total can not be zero. but just in case: */
+	if (total == 0)
+		return;
+
+	/* don't report if not compressed */
+	if (total >= plain)
+		return;
+
+	/* total < plain. check for overflow, still */
+	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
+		                    : (1000 * total / plain);
+
+	if (r > 1000)
+		r = 1000;
+
+	r = 1000 - r;
+	dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
+	     "total %u; compression: %u.%u%%\n",
+			direction,
+			c->bytes[1], c->packets[1],
+			c->bytes[0], c->packets[0],
+			total, r/10, r % 10);
+}
+
+/* Since we are processing the bitfield from lower addresses to higher,
+   it does not matter if the process it in 32 bit chunks or 64 bit
+   chunks as long as it is little endian. (Understand it as byte stream,
+   beginning with the lowest byte...) If we would use big endian
+   we would need to process it from the highest address to the lowest,
+   in order to be agnostic to the 32 vs 64 bits issue.
+
+   returns 0 on failure, 1 if we successfully received it. */
+static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct bm_xfer_ctx c;
+	void *buffer;
+	enum receive_bitmap_ret ret;
+	int ok = FALSE;
+
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+
+	drbd_bm_lock(mdev, "receive bitmap");
+
+	/* maybe we should use some per thread scratch page,
+	 * and allocate that during initial device creation? */
+	buffer	 = (unsigned long *) __get_free_page(GFP_NOIO);
+	if (!buffer) {
+		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+		goto out;
+	}
+
+	c = (struct bm_xfer_ctx) {
+		.bm_bits = drbd_bm_bits(mdev),
+		.bm_words = drbd_bm_words(mdev),
+	};
+
+	do {
+		if (h->command == P_BITMAP) {
+			ret = receive_bitmap_plain(mdev, h, buffer, &c);
+		} else if (h->command == P_COMPRESSED_BITMAP) {
+			/* MAYBE: sanity check that we speak proto >= 90,
+			 * and the feature is enabled! */
+			struct p_compressed_bm *p;
+
+			if (h->length > BM_PACKET_PAYLOAD_BYTES) {
+				dev_err(DEV, "ReportCBitmap packet too large\n");
+				goto out;
+			}
+			/* use the page buff */
+			p = buffer;
+			memcpy(p, h, sizeof(*h));
+			if (drbd_recv(mdev, p->head.payload, h->length) != h->length)
+				goto out;
+			if (p->head.length <= (sizeof(*p) - sizeof(p->head))) {
+				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length);
+				return FAILED;
+			}
+			ret = decode_bitmap_c(mdev, p, &c);
+		} else {
+			dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command);
+			goto out;
+		}
+
+		c.packets[h->command == P_BITMAP]++;
+		c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length;
+
+		if (ret != OK)
+			break;
+
+		if (!drbd_recv_header(mdev, h))
+			goto out;
+	} while (ret == OK);
+	if (ret == FAILED)
+		goto out;
+
+	INFO_bm_xfer_stats(mdev, "receive", &c);
+
+	if (mdev->state.conn == C_WF_BITMAP_T) {
+		ok = !drbd_send_bitmap(mdev);
+		if (!ok)
+			goto out;
+		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
+		ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		D_ASSERT(ok == SS_SUCCESS);
+	} else if (mdev->state.conn != C_WF_BITMAP_S) {
+		/* admin may have requested C_DISCONNECTING,
+		 * other threads may have noticed network errors */
+		dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
+		    drbd_conn_str(mdev->state.conn));
+	}
+
+	ok = TRUE;
+ out:
+	drbd_bm_unlock(mdev);
+	if (ok && mdev->state.conn == C_WF_BITMAP_S)
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+	free_page((unsigned long) buffer);
+	return ok;
+}
+
+static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+{
+	/* TODO zero copy sink :) */
+	static char sink[128];
+	int size, want, r;
+
+	dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
+	     h->command, h->length);
+
+	size = h->length;
+	while (size > 0) {
+		want = min_t(int, size, sizeof(sink));
+		r = drbd_recv(mdev, sink, want);
+		ERR_IF(r <= 0) break;
+		size -= r;
+	}
+	return size == 0;
+}
+
+static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
+{
+	if (mdev->state.disk >= D_INCONSISTENT)
+		drbd_kick_lo(mdev);
+
+	/* Make sure we've acked all the TCP data associated
+	 * with the data requests being unplugged */
+	drbd_tcp_quickack(mdev->data.socket);
+
+	return TRUE;
+}
+
+typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
+
+static drbd_cmd_handler_f drbd_default_handler[] = {
+	[P_DATA]	    = receive_Data,
+	[P_DATA_REPLY]	    = receive_DataReply,
+	[P_RS_DATA_REPLY]   = receive_RSDataReply,
+	[P_BARRIER]	    = receive_Barrier,
+	[P_BITMAP]	    = receive_bitmap,
+	[P_COMPRESSED_BITMAP]    = receive_bitmap,
+	[P_UNPLUG_REMOTE]   = receive_UnplugRemote,
+	[P_DATA_REQUEST]    = receive_DataRequest,
+	[P_RS_DATA_REQUEST] = receive_DataRequest,
+	[P_SYNC_PARAM]	    = receive_SyncParam,
+	[P_SYNC_PARAM89]	   = receive_SyncParam,
+	[P_PROTOCOL]        = receive_protocol,
+	[P_UUIDS]	    = receive_uuids,
+	[P_SIZES]	    = receive_sizes,
+	[P_STATE]	    = receive_state,
+	[P_STATE_CHG_REQ]   = receive_req_state,
+	[P_SYNC_UUID]       = receive_sync_uuid,
+	[P_OV_REQUEST]      = receive_DataRequest,
+	[P_OV_REPLY]        = receive_DataRequest,
+	[P_CSUM_RS_REQUEST]    = receive_DataRequest,
+	/* anything missing from this table is in
+	 * the asender_tbl, see get_asender_cmd */
+	[P_MAX_CMD]	    = NULL,
+};
+
+static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler;
+static drbd_cmd_handler_f *drbd_opt_cmd_handler;
+
+static void drbdd(struct drbd_conf *mdev)
+{
+	drbd_cmd_handler_f handler;
+	struct p_header *header = &mdev->data.rbuf.header;
+
+	while (get_t_state(&mdev->receiver) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+		if (!drbd_recv_header(mdev, header))
+			break;
+
+		if (header->command < P_MAX_CMD)
+			handler = drbd_cmd_handler[header->command];
+		else if (P_MAY_IGNORE < header->command
+		     && header->command < P_MAX_OPT_CMD)
+			handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE];
+		else if (header->command > P_MAX_OPT_CMD)
+			handler = receive_skip;
+		else
+			handler = NULL;
+
+		if (unlikely(!handler)) {
+			dev_err(DEV, "unknown packet type %d, l: %d!\n",
+			    header->command, header->length);
+			drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+			break;
+		}
+		if (unlikely(!handler(mdev, header))) {
+			dev_err(DEV, "error receiving %s, l: %d!\n",
+			    cmdname(header->command), header->length);
+			drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+			break;
+		}
+
+		trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf,
+				__FILE__, __LINE__);
+	}
+}
+
+static void drbd_fail_pending_reads(struct drbd_conf *mdev)
+{
+	struct hlist_head *slot;
+	struct hlist_node *pos;
+	struct hlist_node *tmp;
+	struct drbd_request *req;
+	int i;
+
+	/*
+	 * Application READ requests
+	 */
+	spin_lock_irq(&mdev->req_lock);
+	for (i = 0; i < APP_R_HSIZE; i++) {
+		slot = mdev->app_reads_hash+i;
+		hlist_for_each_entry_safe(req, pos, tmp, slot, colision) {
+			/* it may (but should not any longer!)
+			 * be on the work queue; if that assert triggers,
+			 * we need to also grab the
+			 * spin_lock_irq(&mdev->data.work.q_lock);
+			 * and list_del_init here. */
+			D_ASSERT(list_empty(&req->w.list));
+			/* It would be nice to complete outside of spinlock.
+			 * But this is easier for now. */
+			_req_mod(req, connection_lost_while_pending);
+		}
+	}
+	for (i = 0; i < APP_R_HSIZE; i++)
+		if (!hlist_empty(mdev->app_reads_hash+i))
+			dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: "
+				"%p, should be NULL\n", i, mdev->app_reads_hash[i].first);
+
+	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+void drbd_flush_workqueue(struct drbd_conf *mdev)
+{
+	struct drbd_wq_barrier barr;
+
+	barr.w.cb = w_prev_work_done;
+	init_completion(&barr.done);
+	drbd_queue_work(&mdev->data.work, &barr.w);
+	wait_for_completion(&barr.done);
+}
+
+static void drbd_disconnect(struct drbd_conf *mdev)
+{
+	enum drbd_fencing_p fp;
+	union drbd_state os, ns;
+	int rv = SS_UNKNOWN_ERROR;
+	unsigned int i;
+
+	if (mdev->state.conn == C_STANDALONE)
+		return;
+	if (mdev->state.conn >= C_WF_CONNECTION)
+		dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n",
+				drbd_conn_str(mdev->state.conn));
+
+	/* asender does not clean up anything. it must not interfere, either */
+	drbd_thread_stop(&mdev->asender);
+
+	mutex_lock(&mdev->data.mutex);
+	drbd_free_sock(mdev);
+	mutex_unlock(&mdev->data.mutex);
+
+	spin_lock_irq(&mdev->req_lock);
+	_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+	_drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
+	_drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	/* We do not have data structures that would allow us to
+	 * get the rs_pending_cnt down to 0 again.
+	 *  * On C_SYNC_TARGET we do not have any data structures describing
+	 *    the pending RSDataRequest's we have sent.
+	 *  * On C_SYNC_SOURCE there is no data structure that tracks
+	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
+	 *  And no, it is not the sum of the reference counts in the
+	 *  resync_LRU. The resync_LRU tracks the whole operation including
+	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
+	 *  on the fly. */
+	drbd_rs_cancel_all(mdev);
+	mdev->rs_total = 0;
+	mdev->rs_failed = 0;
+	atomic_set(&mdev->rs_pending_cnt, 0);
+	wake_up(&mdev->misc_wait);
+
+	/* make sure syncer is stopped and w_resume_next_sg queued */
+	del_timer_sync(&mdev->resync_timer);
+	set_bit(STOP_SYNC_TIMER, &mdev->flags);
+	resync_timer_fn((unsigned long)mdev);
+
+	/* so we can be sure that all remote or resync reads
+	 * made it at least to net_ee */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+
+	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
+	 * w_make_resync_request etc. which may still be on the worker queue
+	 * to be "canceled" */
+	drbd_flush_workqueue(mdev);
+
+	/* This also does reclaim_net_ee().  If we do this too early, we might
+	 * miss some resync ee and pages.*/
+	drbd_process_done_ee(mdev);
+
+	kfree(mdev->p_uuid);
+	mdev->p_uuid = NULL;
+
+	if (!mdev->state.susp)
+		tl_clear(mdev);
+
+	drbd_fail_pending_reads(mdev);
+
+	dev_info(DEV, "Connection closed\n");
+
+	drbd_md_sync(mdev);
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.role == R_PRIMARY) {
+		if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) {
+			enum drbd_disk_state nps = drbd_try_outdate_peer(mdev);
+			drbd_request_state(mdev, NS(pdsk, nps));
+		}
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+	if (os.conn >= C_UNCONNECTED) {
+		/* Do not restart in case we are C_DISCONNECTING */
+		ns = os;
+		ns.conn = C_UNCONNECTED;
+		rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (os.conn == C_DISCONNECTING) {
+		struct hlist_head *h;
+		wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0);
+
+		/* we must not free the tl_hash
+		 * while application io is still on the fly */
+		wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0);
+
+		spin_lock_irq(&mdev->req_lock);
+		/* paranoia code */
+		for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
+			if (h->first)
+				dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
+						(int)(h - mdev->ee_hash), h->first);
+		kfree(mdev->ee_hash);
+		mdev->ee_hash = NULL;
+		mdev->ee_hash_s = 0;
+
+		/* paranoia code */
+		for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
+			if (h->first)
+				dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
+						(int)(h - mdev->tl_hash), h->first);
+		kfree(mdev->tl_hash);
+		mdev->tl_hash = NULL;
+		mdev->tl_hash_s = 0;
+		spin_unlock_irq(&mdev->req_lock);
+
+		crypto_free_hash(mdev->cram_hmac_tfm);
+		mdev->cram_hmac_tfm = NULL;
+
+		kfree(mdev->net_conf);
+		mdev->net_conf = NULL;
+		drbd_request_state(mdev, NS(conn, C_STANDALONE));
+	}
+
+	/* tcp_close and release of sendpage pages can be deferred.  I don't
+	 * want to use SO_LINGER, because apparently it can be deferred for
+	 * more than 20 seconds (longest time I checked).
+	 *
+	 * Actually we don't care for exactly when the network stack does its
+	 * put_page(), but release our reference on these pages right here.
+	 */
+	i = drbd_release_ee(mdev, &mdev->net_ee);
+	if (i)
+		dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
+	i = atomic_read(&mdev->pp_in_use);
+	if (i)
+		dev_info(DEV, "pp_in_use = %u, expected 0\n", i);
+
+	D_ASSERT(list_empty(&mdev->read_ee));
+	D_ASSERT(list_empty(&mdev->active_ee));
+	D_ASSERT(list_empty(&mdev->sync_ee));
+	D_ASSERT(list_empty(&mdev->done_ee));
+
+	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
+	atomic_set(&mdev->current_epoch->epoch_size, 0);
+	D_ASSERT(list_empty(&mdev->current_epoch->list));
+}
+
+/*
+ * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
+ * we can agree on is stored in agreed_pro_version.
+ *
+ * feature flags and the reserved array should be enough room for future
+ * enhancements of the handshake protocol, and possible plugins...
+ *
+ * for now, they are expected to be zero, but ignored.
+ */
+static int drbd_send_handshake(struct drbd_conf *mdev)
+{
+	/* ASSERT current == mdev->receiver ... */
+	struct p_handshake *p = &mdev->data.sbuf.handshake;
+	int ok;
+
+	if (mutex_lock_interruptible(&mdev->data.mutex)) {
+		dev_err(DEV, "interrupted during initial handshake\n");
+		return 0; /* interrupted. not ok. */
+	}
+
+	if (mdev->data.socket == NULL) {
+		mutex_unlock(&mdev->data.mutex);
+		return 0;
+	}
+
+	memset(p, 0, sizeof(*p));
+	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
+	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
+	ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
+			     (struct p_header *)p, sizeof(*p), 0 );
+	mutex_unlock(&mdev->data.mutex);
+	return ok;
+}
+
+/*
+ * return values:
+ *   1 yes, we have a valid connection
+ *   0 oops, did not work out, please try again
+ *  -1 peer talks different language,
+ *     no point in trying again, please go standalone.
+ */
+static int drbd_do_handshake(struct drbd_conf *mdev)
+{
+	/* ASSERT current == mdev->receiver ... */
+	struct p_handshake *p = &mdev->data.rbuf.handshake;
+	const int expect = sizeof(struct p_handshake)
+			  -sizeof(struct p_header);
+	int rv;
+
+	rv = drbd_send_handshake(mdev);
+	if (!rv)
+		return 0;
+
+	rv = drbd_recv_header(mdev, &p->head);
+	if (!rv)
+		return 0;
+
+	if (p->head.command != P_HAND_SHAKE) {
+		dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
+		     cmdname(p->head.command), p->head.command);
+		return -1;
+	}
+
+	if (p->head.length != expect) {
+		dev_err(DEV, "expected HandShake length: %u, received: %u\n",
+		     expect, p->head.length);
+		return -1;
+	}
+
+	rv = drbd_recv(mdev, &p->head.payload, expect);
+
+	if (rv != expect) {
+		dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv);
+		return 0;
+	}
+
+	trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf,
+			__FILE__, __LINE__);
+
+	p->protocol_min = be32_to_cpu(p->protocol_min);
+	p->protocol_max = be32_to_cpu(p->protocol_max);
+	if (p->protocol_max == 0)
+		p->protocol_max = p->protocol_min;
+
+	if (PRO_VERSION_MAX < p->protocol_min ||
+	    PRO_VERSION_MIN > p->protocol_max)
+		goto incompat;
+
+	mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+
+	dev_info(DEV, "Handshake successful: "
+	     "Agreed network protocol version %d\n", mdev->agreed_pro_version);
+
+	return 1;
+
+ incompat:
+	dev_err(DEV, "incompatible DRBD dialects: "
+	    "I support %d-%d, peer supports %d-%d\n",
+	    PRO_VERSION_MIN, PRO_VERSION_MAX,
+	    p->protocol_min, p->protocol_max);
+	return -1;
+}
+
+#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+	dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+	dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+	return 0;
+}
+#else
+#define CHALLENGE_LEN 64
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
+	struct scatterlist sg;
+	char *response = NULL;
+	char *right_response = NULL;
+	char *peers_ch = NULL;
+	struct p_header p;
+	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
+	unsigned int resp_size;
+	struct hash_desc desc;
+	int rv;
+
+	desc.tfm = mdev->cram_hmac_tfm;
+	desc.flags = 0;
+
+	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
+				(u8 *)mdev->net_conf->shared_secret, key_len);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	get_random_bytes(my_challenge, CHALLENGE_LEN);
+
+	rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
+	if (!rv)
+		goto fail;
+
+	rv = drbd_recv_header(mdev, &p);
+	if (!rv)
+		goto fail;
+
+	if (p.command != P_AUTH_CHALLENGE) {
+		dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
+		    cmdname(p.command), p.command);
+		rv = 0;
+		goto fail;
+	}
+
+	if (p.length > CHALLENGE_LEN*2) {
+		dev_err(DEV, "expected AuthChallenge payload too big.\n");
+		rv = 0;
+		goto fail;
+	}
+
+	peers_ch = kmalloc(p.length, GFP_NOIO);
+	if (peers_ch == NULL) {
+		dev_err(DEV, "kmalloc of peers_ch failed\n");
+		rv = 0;
+		goto fail;
+	}
+
+	rv = drbd_recv(mdev, peers_ch, p.length);
+
+	if (rv != p.length) {
+		dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+	response = kmalloc(resp_size, GFP_NOIO);
+	if (response == NULL) {
+		dev_err(DEV, "kmalloc of response failed\n");
+		rv = 0;
+		goto fail;
+	}
+
+	sg_init_table(&sg, 1);
+	sg_set_buf(&sg, peers_ch, p.length);
+
+	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
+	if (!rv)
+		goto fail;
+
+	rv = drbd_recv_header(mdev, &p);
+	if (!rv)
+		goto fail;
+
+	if (p.command != P_AUTH_RESPONSE) {
+		dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
+		    cmdname(p.command), p.command);
+		rv = 0;
+		goto fail;
+	}
+
+	if (p.length != resp_size) {
+		dev_err(DEV, "expected AuthResponse payload of wrong size\n");
+		rv = 0;
+		goto fail;
+	}
+
+	rv = drbd_recv(mdev, response , resp_size);
+
+	if (rv != resp_size) {
+		dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	right_response = kmalloc(resp_size, GFP_NOIO);
+	if (response == NULL) {
+		dev_err(DEV, "kmalloc of right_response failed\n");
+		rv = 0;
+		goto fail;
+	}
+
+	sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
+
+	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	rv = !memcmp(response, right_response, resp_size);
+
+	if (rv)
+		dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
+		     resp_size, mdev->net_conf->cram_hmac_alg);
+
+ fail:
+	kfree(peers_ch);
+	kfree(response);
+	kfree(right_response);
+
+	return rv;
+}
+#endif
+
+int drbdd_init(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	unsigned int minor = mdev_to_minor(mdev);
+	int h;
+
+	sprintf(current->comm, "drbd%d_receiver", minor);
+
+	dev_info(DEV, "receiver (re)started\n");
+
+	do {
+		h = drbd_connect(mdev);
+		if (h == 0) {
+			drbd_disconnect(mdev);
+			__set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ);
+		}
+		if (h == -1) {
+			dev_warn(DEV, "Discarding network configuration.\n");
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		}
+	} while (h == 0);
+
+	if (h > 0) {
+		if (get_net_conf(mdev)) {
+			drbdd(mdev);
+			put_net_conf(mdev);
+		}
+	}
+
+	drbd_disconnect(mdev);
+
+	dev_info(DEV, "receiver terminated\n");
+	return 0;
+}
+
+/* ********* acknowledge sender ******** */
+
+static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_req_state_reply *p = (struct p_req_state_reply *)h;
+
+	int retcode = be32_to_cpu(p->retcode);
+
+	if (retcode >= SS_SUCCESS) {
+		set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
+	} else {
+		set_bit(CL_ST_CHG_FAIL, &mdev->flags);
+		dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
+		    drbd_set_st_err_str(retcode), retcode);
+	}
+	wake_up(&mdev->state_wait);
+
+	return TRUE;
+}
+
+static int got_Ping(struct drbd_conf *mdev, struct p_header *h)
+{
+	return drbd_send_ping_ack(mdev);
+
+}
+
+static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
+{
+	/* restore idle timeout */
+	mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+
+	return TRUE;
+}
+
+static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	int blksize = be32_to_cpu(p->blksize);
+
+	D_ASSERT(mdev->agreed_pro_version >= 89);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	drbd_rs_complete_io(mdev, sector);
+	drbd_set_in_sync(mdev, sector, blksize);
+	/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
+	mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+	dec_rs_pending(mdev);
+
+	return TRUE;
+}
+
+/* when we receive the ACK for a write request,
+ * verify that we actually know about it */
+static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
+	u64 id, sector_t sector)
+{
+	struct hlist_head *slot = tl_hash_slot(mdev, sector);
+	struct hlist_node *n;
+	struct drbd_request *req;
+
+	hlist_for_each_entry(req, n, slot, colision) {
+		if ((unsigned long)req == (unsigned long)id) {
+			if (req->sector != sector) {
+				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
+				    "wrong sector (%llus versus %llus)\n", req,
+				    (unsigned long long)req->sector,
+				    (unsigned long long)sector);
+				break;
+			}
+			return req;
+		}
+	}
+	dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n",
+		(void *)(unsigned long)id, (unsigned long long)sector);
+	return NULL;
+}
+
+typedef struct drbd_request *(req_validator_fn)
+	(struct drbd_conf *mdev, u64 id, sector_t sector);
+
+static int validate_req_change_req_state(struct drbd_conf *mdev,
+	u64 id, sector_t sector, req_validator_fn validator,
+	const char *func, enum drbd_req_event what)
+{
+	struct drbd_request *req;
+	struct bio_and_error m;
+
+	spin_lock_irq(&mdev->req_lock);
+	req = validator(mdev, id, sector);
+	if (unlikely(!req)) {
+		spin_unlock_irq(&mdev->req_lock);
+		dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func);
+		return FALSE;
+	}
+	__req_mod(req, what, &m);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+	return TRUE;
+}
+
+static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	int blksize = be32_to_cpu(p->blksize);
+	enum drbd_req_event what;
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (is_syncer_block_id(p->block_id)) {
+		drbd_set_in_sync(mdev, sector, blksize);
+		dec_rs_pending(mdev);
+		return TRUE;
+	}
+	switch (be16_to_cpu(h->command)) {
+	case P_RS_WRITE_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = write_acked_by_peer_and_sis;
+		break;
+	case P_WRITE_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = write_acked_by_peer;
+		break;
+	case P_RECV_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B);
+		what = recv_acked_by_peer;
+		break;
+	case P_DISCARD_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = conflict_discarded_by_peer;
+		break;
+	default:
+		D_ASSERT(0);
+		return FALSE;
+	}
+
+	return validate_req_change_req_state(mdev, p->block_id, sector,
+		_ack_id_to_req, __func__ , what);
+}
+
+static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+
+	if (__ratelimit(&drbd_ratelimit_state))
+		dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n");
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (is_syncer_block_id(p->block_id)) {
+		int size = be32_to_cpu(p->blksize);
+		dec_rs_pending(mdev);
+		drbd_rs_failed_io(mdev, sector, size);
+		return TRUE;
+	}
+	return validate_req_change_req_state(mdev, p->block_id, sector,
+		_ack_id_to_req, __func__ , neg_acked);
+}
+
+static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+	    (unsigned long long)sector, be32_to_cpu(p->blksize));
+
+	return validate_req_change_req_state(mdev, p->block_id, sector,
+		_ar_id_to_req, __func__ , neg_acked);
+}
+
+static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
+{
+	sector_t sector;
+	int size;
+	struct p_block_ack *p = (struct p_block_ack *)h;
+
+	sector = be64_to_cpu(p->sector);
+	size = be32_to_cpu(p->blksize);
+	D_ASSERT(p->block_id == ID_SYNCER);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	dec_rs_pending(mdev);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		drbd_rs_complete_io(mdev, sector);
+		drbd_rs_failed_io(mdev, sector, size);
+		put_ldev(mdev);
+	}
+
+	return TRUE;
+}
+
+static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_barrier_ack *p = (struct p_barrier_ack *)h;
+
+	tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
+
+	return TRUE;
+}
+
+static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_work *w;
+	sector_t sector;
+	int size;
+
+	sector = be64_to_cpu(p->sector);
+	size = be32_to_cpu(p->blksize);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
+		drbd_ov_oos_found(mdev, sector, size);
+	else
+		ov_oos_print(mdev);
+
+	drbd_rs_complete_io(mdev, sector);
+	dec_rs_pending(mdev);
+
+	if (--mdev->ov_left == 0) {
+		w = kmalloc(sizeof(*w), GFP_NOIO);
+		if (w) {
+			w->cb = w_ov_finished;
+			drbd_queue_work_front(&mdev->data.work, w);
+		} else {
+			dev_err(DEV, "kmalloc(w) failed.");
+			ov_oos_print(mdev);
+			drbd_resync_finished(mdev);
+		}
+	}
+	return TRUE;
+}
+
+struct asender_cmd {
+	size_t pkt_size;
+	int (*process)(struct drbd_conf *mdev, struct p_header *h);
+};
+
+static struct asender_cmd *get_asender_cmd(int cmd)
+{
+	static struct asender_cmd asender_tbl[] = {
+		/* anything missing from this table is in
+		 * the drbd_cmd_handler (drbd_default_handler) table,
+		 * see the beginning of drbdd() */
+	[P_PING]	    = { sizeof(struct p_header), got_Ping },
+	[P_PING_ACK]	    = { sizeof(struct p_header), got_PingAck },
+	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_DISCARD_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
+	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
+	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply},
+	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
+	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
+	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
+	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
+	[P_MAX_CMD]	    = { 0, NULL },
+	};
+	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
+		return NULL;
+	return &asender_tbl[cmd];
+}
+
+int drbd_asender(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct p_header *h = &mdev->meta.rbuf.header;
+	struct asender_cmd *cmd = NULL;
+
+	int rv, len;
+	void *buf    = h;
+	int received = 0;
+	int expect   = sizeof(struct p_header);
+	int empty;
+
+	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
+
+	current->policy = SCHED_RR;  /* Make this a realtime task! */
+	current->rt_priority = 2;    /* more important than all other tasks */
+
+	while (get_t_state(thi) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+		if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
+			ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
+			mdev->meta.socket->sk->sk_rcvtimeo =
+				mdev->net_conf->ping_timeo*HZ/10;
+		}
+
+		/* conditionally cork;
+		 * it may hurt latency if we cork without much to send */
+		if (!mdev->net_conf->no_cork &&
+			3 < atomic_read(&mdev->unacked_cnt))
+			drbd_tcp_cork(mdev->meta.socket);
+		while (1) {
+			clear_bit(SIGNAL_ASENDER, &mdev->flags);
+			flush_signals(current);
+			if (!drbd_process_done_ee(mdev)) {
+				dev_err(DEV, "process_done_ee() = NOT_OK\n");
+				goto reconnect;
+			}
+			/* to avoid race with newly queued ACKs */
+			set_bit(SIGNAL_ASENDER, &mdev->flags);
+			spin_lock_irq(&mdev->req_lock);
+			empty = list_empty(&mdev->done_ee);
+			spin_unlock_irq(&mdev->req_lock);
+			/* new ack may have been queued right here,
+			 * but then there is also a signal pending,
+			 * and we start over... */
+			if (empty)
+				break;
+		}
+		/* but unconditionally uncork unless disabled */
+		if (!mdev->net_conf->no_cork)
+			drbd_tcp_uncork(mdev->meta.socket);
+
+		/* short circuit, recv_msg would return EINTR anyways. */
+		if (signal_pending(current))
+			continue;
+
+		rv = drbd_recv_short(mdev, mdev->meta.socket,
+				     buf, expect-received, 0);
+		clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+		flush_signals(current);
+
+		/* Note:
+		 * -EINTR	 (on meta) we got a signal
+		 * -EAGAIN	 (on meta) rcvtimeo expired
+		 * -ECONNRESET	 other side closed the connection
+		 * -ERESTARTSYS  (on data) we got a signal
+		 * rv <  0	 other than above: unexpected error!
+		 * rv == expected: full header or command
+		 * rv <  expected: "woken" by signal during receive
+		 * rv == 0	 : "connection shut down by peer"
+		 */
+		if (likely(rv > 0)) {
+			received += rv;
+			buf	 += rv;
+		} else if (rv == 0) {
+			dev_err(DEV, "meta connection shut down by peer.\n");
+			goto reconnect;
+		} else if (rv == -EAGAIN) {
+			if (mdev->meta.socket->sk->sk_rcvtimeo ==
+			    mdev->net_conf->ping_timeo*HZ/10) {
+				dev_err(DEV, "PingAck did not arrive in time.\n");
+				goto reconnect;
+			}
+			set_bit(SEND_PING, &mdev->flags);
+			continue;
+		} else if (rv == -EINTR) {
+			continue;
+		} else {
+			dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			goto reconnect;
+		}
+
+		if (received == expect && cmd == NULL) {
+			if (unlikely(h->magic != BE_DRBD_MAGIC)) {
+				dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n",
+				    (long)be32_to_cpu(h->magic),
+				    h->command, h->length);
+				goto reconnect;
+			}
+			cmd = get_asender_cmd(be16_to_cpu(h->command));
+			len = be16_to_cpu(h->length);
+			if (unlikely(cmd == NULL)) {
+				dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n",
+				    (long)be32_to_cpu(h->magic),
+				    h->command, h->length);
+				goto disconnect;
+			}
+			expect = cmd->pkt_size;
+			ERR_IF(len != expect-sizeof(struct p_header)) {
+				trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__);
+				goto reconnect;
+			}
+		}
+		if (received == expect) {
+			D_ASSERT(cmd != NULL);
+			trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__);
+			if (!cmd->process(mdev, h))
+				goto reconnect;
+
+			buf	 = h;
+			received = 0;
+			expect	 = sizeof(struct p_header);
+			cmd	 = NULL;
+		}
+	}
+
+	if (0) {
+reconnect:
+		drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+	}
+	if (0) {
+disconnect:
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	}
+	clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+	D_ASSERT(mdev->state.conn < C_CONNECTED);
+	dev_info(DEV, "asender terminated\n");
+
+	return 0;
+}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
new file mode 100644
index 000000000000..0656cf1edd57
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.c
@@ -0,0 +1,1132 @@
+/*
+   drbd_req.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include "drbd_req.h"
+
+
+/* Update disk stats at start of I/O request */
+static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
+{
+	const int rw = bio_data_dir(bio);
+	int cpu;
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
+	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+	part_stat_unlock();
+	mdev->vdisk->part0.in_flight[rw]++;
+}
+
+/* Update disk stats when completing request upwards */
+static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	int rw = bio_data_dir(req->master_bio);
+	unsigned long duration = jiffies - req->start_time;
+	int cpu;
+	cpu = part_stat_lock();
+	part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
+	part_round_stats(cpu, &mdev->vdisk->part0);
+	part_stat_unlock();
+	mdev->vdisk->part0.in_flight[rw]--;
+}
+
+static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
+{
+	const unsigned long s = req->rq_state;
+	/* if it was a write, we may have to set the corresponding
+	 * bit(s) out-of-sync first. If it had a local part, we need to
+	 * release the reference to the activity log. */
+	if (rw == WRITE) {
+		/* remove it from the transfer log.
+		 * well, only if it had been there in the first
+		 * place... if it had not (local only or conflicting
+		 * and never sent), it should still be "empty" as
+		 * initialized in drbd_req_new(), so we can list_del() it
+		 * here unconditionally */
+		list_del(&req->tl_requests);
+		/* Set out-of-sync unless both OK flags are set
+		 * (local only or remote failed).
+		 * Other places where we set out-of-sync:
+		 * READ with local io-error */
+		if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
+			drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+		if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
+			drbd_set_in_sync(mdev, req->sector, req->size);
+
+		/* one might be tempted to move the drbd_al_complete_io
+		 * to the local io completion callback drbd_endio_pri.
+		 * but, if this was a mirror write, we may only
+		 * drbd_al_complete_io after this is RQ_NET_DONE,
+		 * otherwise the extent could be dropped from the al
+		 * before it has actually been written on the peer.
+		 * if we crash before our peer knows about the request,
+		 * but after the extent has been dropped from the al,
+		 * we would forget to resync the corresponding extent.
+		 */
+		if (s & RQ_LOCAL_MASK) {
+			if (get_ldev_if_state(mdev, D_FAILED)) {
+				drbd_al_complete_io(mdev, req->sector);
+				put_ldev(mdev);
+			} else if (__ratelimit(&drbd_ratelimit_state)) {
+				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
+				     "but my Disk seems to have failed :(\n",
+				     (unsigned long long) req->sector);
+			}
+		}
+	}
+
+	/* if it was a local io error, we want to notify our
+	 * peer about that, and see if we need to
+	 * detach the disk and stuff.
+	 * to avoid allocating some special work
+	 * struct, reuse the request. */
+
+	/* THINK
+	 * why do we do this not when we detect the error,
+	 * but delay it until it is "done", i.e. possibly
+	 * until the next barrier ack? */
+
+	if (rw == WRITE &&
+	    ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
+		if (!(req->w.list.next == LIST_POISON1 ||
+		      list_empty(&req->w.list))) {
+			/* DEBUG ASSERT only; if this triggers, we
+			 * probably corrupt the worker list here */
+			dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
+			dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
+		}
+		req->w.cb = w_io_error;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		/* drbd_req_free() is done in w_io_error */
+	} else {
+		drbd_req_free(req);
+	}
+}
+
+static void queue_barrier(struct drbd_conf *mdev)
+{
+	struct drbd_tl_epoch *b;
+
+	/* We are within the req_lock. Once we queued the barrier for sending,
+	 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
+	 * barrier/epoch object is added. This is the only place this bit is
+	 * set. It indicates that the barrier for this epoch is already queued,
+	 * and no new epoch has been created yet. */
+	if (test_bit(CREATE_BARRIER, &mdev->flags))
+		return;
+
+	b = mdev->newest_tle;
+	b->w.cb = w_send_barrier;
+	/* inc_ap_pending done here, so we won't
+	 * get imbalanced on connection loss.
+	 * dec_ap_pending will be done in got_BarrierAck
+	 * or (on connection loss) in tl_clear.  */
+	inc_ap_pending(mdev);
+	drbd_queue_work(&mdev->data.work, &b->w);
+	set_bit(CREATE_BARRIER, &mdev->flags);
+}
+
+static void _about_to_complete_local_write(struct drbd_conf *mdev,
+	struct drbd_request *req)
+{
+	const unsigned long s = req->rq_state;
+	struct drbd_request *i;
+	struct drbd_epoch_entry *e;
+	struct hlist_node *n;
+	struct hlist_head *slot;
+
+	/* before we can signal completion to the upper layers,
+	 * we may need to close the current epoch */
+	if (mdev->state.conn >= C_CONNECTED &&
+	    req->epoch == mdev->newest_tle->br_number)
+		queue_barrier(mdev);
+
+	/* we need to do the conflict detection stuff,
+	 * if we have the ee_hash (two_primaries) and
+	 * this has been on the network */
+	if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) {
+		const sector_t sector = req->sector;
+		const int size = req->size;
+
+		/* ASSERT:
+		 * there must be no conflicting requests, since
+		 * they must have been failed on the spot */
+#define OVERLAPS overlaps(sector, size, i->sector, i->size)
+		slot = tl_hash_slot(mdev, sector);
+		hlist_for_each_entry(i, n, slot, colision) {
+			if (OVERLAPS) {
+				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
+				      "other: %p %llus +%u\n",
+				      req, (unsigned long long)sector, size,
+				      i, (unsigned long long)i->sector, i->size);
+			}
+		}
+
+		/* maybe "wake" those conflicting epoch entries
+		 * that wait for this request to finish.
+		 *
+		 * currently, there can be only _one_ such ee
+		 * (well, or some more, which would be pending
+		 * P_DISCARD_ACK not yet sent by the asender...),
+		 * since we block the receiver thread upon the
+		 * first conflict detection, which will wait on
+		 * misc_wait.  maybe we want to assert that?
+		 *
+		 * anyways, if we found one,
+		 * we just have to do a wake_up.  */
+#undef OVERLAPS
+#define OVERLAPS overlaps(sector, size, e->sector, e->size)
+		slot = ee_hash_slot(mdev, req->sector);
+		hlist_for_each_entry(e, n, slot, colision) {
+			if (OVERLAPS) {
+				wake_up(&mdev->misc_wait);
+				break;
+			}
+		}
+	}
+#undef OVERLAPS
+}
+
+void complete_master_bio(struct drbd_conf *mdev,
+		struct bio_and_error *m)
+{
+	trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL);
+	bio_endio(m->bio, m->error);
+	dec_ap_bio(mdev);
+}
+
+/* Helper for __req_mod().
+ * Set m->bio to the master bio, if it is fit to be completed,
+ * or leave it alone (it is initialized to NULL in __req_mod),
+ * if it has already been completed, or cannot be completed yet.
+ * If m->bio is set, the error status to be returned is placed in m->error.
+ */
+void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
+{
+	const unsigned long s = req->rq_state;
+	struct drbd_conf *mdev = req->mdev;
+	/* only WRITES may end up here without a master bio (on barrier ack) */
+	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
+
+	trace_drbd_req(req, nothing, "_req_may_be_done");
+
+	/* we must not complete the master bio, while it is
+	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
+	 *	not yet acknowledged by the peer
+	 *	not yet completed by the local io subsystem
+	 * these flags may get cleared in any order by
+	 *	the worker,
+	 *	the receiver,
+	 *	the bio_endio completion callbacks.
+	 */
+	if (s & RQ_NET_QUEUED)
+		return;
+	if (s & RQ_NET_PENDING)
+		return;
+	if (s & RQ_LOCAL_PENDING)
+		return;
+
+	if (req->master_bio) {
+		/* this is data_received (remote read)
+		 * or protocol C P_WRITE_ACK
+		 * or protocol B P_RECV_ACK
+		 * or protocol A "handed_over_to_network" (SendAck)
+		 * or canceled or failed,
+		 * or killed from the transfer log due to connection loss.
+		 */
+
+		/*
+		 * figure out whether to report success or failure.
+		 *
+		 * report success when at least one of the operations succeeded.
+		 * or, to put the other way,
+		 * only report failure, when both operations failed.
+		 *
+		 * what to do about the failures is handled elsewhere.
+		 * what we need to do here is just: complete the master_bio.
+		 *
+		 * local completion error, if any, has been stored as ERR_PTR
+		 * in private_bio within drbd_endio_pri.
+		 */
+		int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
+		int error = PTR_ERR(req->private_bio);
+
+		/* remove the request from the conflict detection
+		 * respective block_id verification hash */
+		if (!hlist_unhashed(&req->colision))
+			hlist_del(&req->colision);
+		else
+			D_ASSERT((s & RQ_NET_MASK) == 0);
+
+		/* for writes we need to do some extra housekeeping */
+		if (rw == WRITE)
+			_about_to_complete_local_write(mdev, req);
+
+		/* Update disk stats */
+		_drbd_end_io_acct(mdev, req);
+
+		m->error = ok ? 0 : (error ?: -EIO);
+		m->bio = req->master_bio;
+		req->master_bio = NULL;
+	}
+
+	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
+		/* this is disconnected (local only) operation,
+		 * or protocol C P_WRITE_ACK,
+		 * or protocol A or B P_BARRIER_ACK,
+		 * or killed from the transfer log due to connection loss. */
+		_req_is_done(mdev, req, rw);
+	}
+	/* else: network part and not DONE yet. that is
+	 * protocol A or B, barrier ack still pending... */
+}
+
+/*
+ * checks whether there was an overlapping request
+ * or ee already registered.
+ *
+ * if so, return 1, in which case this request is completed on the spot,
+ * without ever being submitted or send.
+ *
+ * return 0 if it is ok to submit this request.
+ *
+ * NOTE:
+ * paranoia: assume something above us is broken, and issues different write
+ * requests for the same block simultaneously...
+ *
+ * To ensure these won't be reordered differently on both nodes, resulting in
+ * diverging data sets, we discard the later one(s). Not that this is supposed
+ * to happen, but this is the rationale why we also have to check for
+ * conflicting requests with local origin, and why we have to do so regardless
+ * of whether we allowed multiple primaries.
+ *
+ * BTW, in case we only have one primary, the ee_hash is empty anyways, and the
+ * second hlist_for_each_entry becomes a noop. This is even simpler than to
+ * grab a reference on the net_conf, and check for the two_primaries flag...
+ */
+static int _req_conflicts(struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->mdev;
+	const sector_t sector = req->sector;
+	const int size = req->size;
+	struct drbd_request *i;
+	struct drbd_epoch_entry *e;
+	struct hlist_node *n;
+	struct hlist_head *slot;
+
+	D_ASSERT(hlist_unhashed(&req->colision));
+
+	if (!get_net_conf(mdev))
+		return 0;
+
+	/* BUG_ON */
+	ERR_IF (mdev->tl_hash_s == 0)
+		goto out_no_conflict;
+	BUG_ON(mdev->tl_hash == NULL);
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+	slot = tl_hash_slot(mdev, sector);
+	hlist_for_each_entry(i, n, slot, colision) {
+		if (OVERLAPS) {
+			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
+			      "[DISCARD L] new: %llus +%u; "
+			      "pending: %llus +%u\n",
+			      current->comm, current->pid,
+			      (unsigned long long)sector, size,
+			      (unsigned long long)i->sector, i->size);
+			goto out_conflict;
+		}
+	}
+
+	if (mdev->ee_hash_s) {
+		/* now, check for overlapping requests with remote origin */
+		BUG_ON(mdev->ee_hash == NULL);
+#undef OVERLAPS
+#define OVERLAPS overlaps(e->sector, e->size, sector, size)
+		slot = ee_hash_slot(mdev, sector);
+		hlist_for_each_entry(e, n, slot, colision) {
+			if (OVERLAPS) {
+				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+				      " [DISCARD L] new: %llus +%u; "
+				      "pending: %llus +%u\n",
+				      current->comm, current->pid,
+				      (unsigned long long)sector, size,
+				      (unsigned long long)e->sector, e->size);
+				goto out_conflict;
+			}
+		}
+	}
+#undef OVERLAPS
+
+out_no_conflict:
+	/* this is like it should be, and what we expected.
+	 * our users do behave after all... */
+	put_net_conf(mdev);
+	return 0;
+
+out_conflict:
+	put_net_conf(mdev);
+	return 1;
+}
+
+/* obviously this could be coded as many single functions
+ * instead of one huge switch,
+ * or by putting the code directly in the respective locations
+ * (as it has been before).
+ *
+ * but having it this way
+ *  enforces that it is all in this one place, where it is easier to audit,
+ *  it makes it obvious that whatever "event" "happens" to a request should
+ *  happen "atomically" within the req_lock,
+ *  and it enforces that we have to think in a very structured manner
+ *  about the "events" that may happen to a request during its life time ...
+ */
+void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+		struct bio_and_error *m)
+{
+	struct drbd_conf *mdev = req->mdev;
+	m->bio = NULL;
+
+	trace_drbd_req(req, what, NULL);
+
+	switch (what) {
+	default:
+		dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
+		break;
+
+	/* does not happen...
+	 * initialization done in drbd_req_new
+	case created:
+		break;
+		*/
+
+	case to_be_send: /* via network */
+		/* reached via drbd_make_request_common
+		 * and from w_read_retry_remote */
+		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+		req->rq_state |= RQ_NET_PENDING;
+		inc_ap_pending(mdev);
+		break;
+
+	case to_be_submitted: /* locally */
+		/* reached via drbd_make_request_common */
+		D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
+		req->rq_state |= RQ_LOCAL_PENDING;
+		break;
+
+	case completed_ok:
+		if (bio_data_dir(req->master_bio) == WRITE)
+			mdev->writ_cnt += req->size>>9;
+		else
+			mdev->read_cnt += req->size>>9;
+
+		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		_req_may_be_done(req, m);
+		put_ldev(mdev);
+		break;
+
+	case write_completed_with_error:
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
+		      (unsigned long long)req->sector, req->size);
+		/* and now: check how to handle local io error. */
+		__drbd_chk_io_error(mdev, FALSE);
+		_req_may_be_done(req, m);
+		put_ldev(mdev);
+		break;
+
+	case read_ahead_completed_with_error:
+		/* it is legal to fail READA */
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+		_req_may_be_done(req, m);
+		put_ldev(mdev);
+		break;
+
+	case read_completed_with_error:
+		drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
+		      (unsigned long long)req->sector, req->size);
+		/* _req_mod(req,to_be_send); oops, recursion... */
+		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+		req->rq_state |= RQ_NET_PENDING;
+		inc_ap_pending(mdev);
+
+		__drbd_chk_io_error(mdev, FALSE);
+		put_ldev(mdev);
+		/* NOTE: if we have no connection,
+		 * or know the peer has no good data either,
+		 * then we don't actually need to "queue_for_net_read",
+		 * but we do so anyways, since the drbd_io_error()
+		 * and the potential state change to "Diskless"
+		 * needs to be done from process context */
+
+		/* fall through: _req_mod(req,queue_for_net_read); */
+
+	case queue_for_net_read:
+		/* READ or READA, and
+		 * no local disk,
+		 * or target area marked as invalid,
+		 * or just got an io-error. */
+		/* from drbd_make_request_common
+		 * or from bio_endio during read io-error recovery */
+
+		/* so we can verify the handle in the answer packet
+		 * corresponding hlist_del is in _req_may_be_done() */
+		hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+
+		set_bit(UNPLUG_REMOTE, &mdev->flags); /* why? */
+
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
+			? w_read_retry_remote
+			: w_send_read_req;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case queue_for_net_write:
+		/* assert something? */
+		/* from drbd_make_request_common only */
+
+		hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+		/* corresponding hlist_del is in _req_may_be_done() */
+
+		/* NOTE
+		 * In case the req ended up on the transfer log before being
+		 * queued on the worker, it could lead to this request being
+		 * missed during cleanup after connection loss.
+		 * So we have to do both operations here,
+		 * within the same lock that protects the transfer log.
+		 *
+		 * _req_add_to_epoch(req); this has to be after the
+		 * _maybe_start_new_epoch(req); which happened in
+		 * drbd_make_request_common, because we now may set the bit
+		 * again ourselves to close the current epoch.
+		 *
+		 * Add req to the (now) current epoch (barrier). */
+
+		/* see drbd_make_request_common,
+		 * just after it grabs the req_lock */
+		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
+
+		req->epoch = mdev->newest_tle->br_number;
+		list_add_tail(&req->tl_requests,
+				&mdev->newest_tle->requests);
+
+		/* increment size of current epoch */
+		mdev->newest_tle->n_req++;
+
+		/* queue work item to send data */
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb =  w_send_dblock;
+		drbd_queue_work(&mdev->data.work, &req->w);
+
+		/* close the epoch, in case it outgrew the limit */
+		if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
+			queue_barrier(mdev);
+
+		break;
+
+	case send_canceled:
+		/* treat it the same */
+	case send_failed:
+		/* real cleanup will be done from tl_clear.  just update flags
+		 * so it is no longer marked as on the worker queue */
+		req->rq_state &= ~RQ_NET_QUEUED;
+		/* if we did it right, tl_clear should be scheduled only after
+		 * this, so this should not be necessary! */
+		_req_may_be_done(req, m);
+		break;
+
+	case handed_over_to_network:
+		/* assert something? */
+		if (bio_data_dir(req->master_bio) == WRITE &&
+		    mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+			/* this is what is dangerous about protocol A:
+			 * pretend it was successfully written on the peer. */
+			if (req->rq_state & RQ_NET_PENDING) {
+				dec_ap_pending(mdev);
+				req->rq_state &= ~RQ_NET_PENDING;
+				req->rq_state |= RQ_NET_OK;
+			} /* else: neg-ack was faster... */
+			/* it is still not yet RQ_NET_DONE until the
+			 * corresponding epoch barrier got acked as well,
+			 * so we know what to dirty on connection loss */
+		}
+		req->rq_state &= ~RQ_NET_QUEUED;
+		req->rq_state |= RQ_NET_SENT;
+		/* because _drbd_send_zc_bio could sleep, and may want to
+		 * dereference the bio even after the "write_acked_by_peer" and
+		 * "completed_ok" events came in, once we return from
+		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
+		 * whether it is done already, and end it.  */
+		_req_may_be_done(req, m);
+		break;
+
+	case connection_lost_while_pending:
+		/* transfer log cleanup after connection loss */
+		/* assert something? */
+		if (req->rq_state & RQ_NET_PENDING)
+			dec_ap_pending(mdev);
+		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_DONE;
+		/* if it is still queued, we may not complete it here.
+		 * it will be canceled soon. */
+		if (!(req->rq_state & RQ_NET_QUEUED))
+			_req_may_be_done(req, m);
+		break;
+
+	case write_acked_by_peer_and_sis:
+		req->rq_state |= RQ_NET_SIS;
+	case conflict_discarded_by_peer:
+		/* for discarded conflicting writes of multiple primaries,
+		 * there is no need to keep anything in the tl, potential
+		 * node crashes are covered by the activity log. */
+		if (what == conflict_discarded_by_peer)
+			dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
+			      " DRBD is not a random data generator!\n",
+			      (unsigned long long)req->sector, req->size);
+		req->rq_state |= RQ_NET_DONE;
+		/* fall through */
+	case write_acked_by_peer:
+		/* protocol C; successfully written on peer.
+		 * Nothing to do here.
+		 * We want to keep the tl in place for all protocols, to cater
+		 * for volatile write-back caches on lower level devices.
+		 *
+		 * A barrier request is expected to have forced all prior
+		 * requests onto stable storage, so completion of a barrier
+		 * request could set NET_DONE right here, and not wait for the
+		 * P_BARRIER_ACK, but that is an unnecessary optimization. */
+
+		/* this makes it effectively the same as for: */
+	case recv_acked_by_peer:
+		/* protocol B; pretends to be successfully written on peer.
+		 * see also notes above in handed_over_to_network about
+		 * protocol != C */
+		req->rq_state |= RQ_NET_OK;
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		dec_ap_pending(mdev);
+		req->rq_state &= ~RQ_NET_PENDING;
+		_req_may_be_done(req, m);
+		break;
+
+	case neg_acked:
+		/* assert something? */
+		if (req->rq_state & RQ_NET_PENDING)
+			dec_ap_pending(mdev);
+		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+
+		req->rq_state |= RQ_NET_DONE;
+		_req_may_be_done(req, m);
+		/* else: done by handed_over_to_network */
+		break;
+
+	case barrier_acked:
+		if (req->rq_state & RQ_NET_PENDING) {
+			/* barrier came in before all requests have been acked.
+			 * this is bad, because if the connection is lost now,
+			 * we won't be able to clean them up... */
+			dev_err(DEV, "FIXME (barrier_acked but pending)\n");
+			trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)");
+			list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
+		}
+		D_ASSERT(req->rq_state & RQ_NET_SENT);
+		req->rq_state |= RQ_NET_DONE;
+		_req_may_be_done(req, m);
+		break;
+
+	case data_received:
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		dec_ap_pending(mdev);
+		req->rq_state &= ~RQ_NET_PENDING;
+		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
+		_req_may_be_done(req, m);
+		break;
+	};
+}
+
+/* we may do a local read if:
+ * - we are consistent (of course),
+ * - or we are generally inconsistent,
+ *   BUT we are still/already IN SYNC for this area.
+ *   since size may be bigger than BM_BLOCK_SIZE,
+ *   we may need to check several bits.
+ */
+static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	unsigned long sbnr, ebnr;
+	sector_t esector, nr_sectors;
+
+	if (mdev->state.disk == D_UP_TO_DATE)
+		return 1;
+	if (mdev->state.disk >= D_OUTDATED)
+		return 0;
+	if (mdev->state.disk <  D_INCONSISTENT)
+		return 0;
+	/* state.disk == D_INCONSISTENT   We will have a look at the BitMap */
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	D_ASSERT(sector  < nr_sectors);
+	D_ASSERT(esector < nr_sectors);
+
+	sbnr = BM_SECT_TO_BIT(sector);
+	ebnr = BM_SECT_TO_BIT(esector);
+
+	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
+}
+
+static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
+{
+	const int rw = bio_rw(bio);
+	const int size = bio->bi_size;
+	const sector_t sector = bio->bi_sector;
+	struct drbd_tl_epoch *b = NULL;
+	struct drbd_request *req;
+	int local, remote;
+	int err = -EIO;
+
+	/* allocate outside of all locks; */
+	req = drbd_req_new(mdev, bio);
+	if (!req) {
+		dec_ap_bio(mdev);
+		/* only pass the error to the upper layers.
+		 * if user cannot handle io errors, that's not our business. */
+		dev_err(DEV, "could not kmalloc() req\n");
+		bio_endio(bio, -ENOMEM);
+		return 0;
+	}
+
+	trace_drbd_bio(mdev, "Rq", bio, 0, req);
+
+	local = get_ldev(mdev);
+	if (!local) {
+		bio_put(req->private_bio); /* or we get a bio leak */
+		req->private_bio = NULL;
+	}
+	if (rw == WRITE) {
+		remote = 1;
+	} else {
+		/* READ || READA */
+		if (local) {
+			if (!drbd_may_do_local_read(mdev, sector, size)) {
+				/* we could kick the syncer to
+				 * sync this extent asap, wait for
+				 * it, then continue locally.
+				 * Or just issue the request remotely.
+				 */
+				local = 0;
+				bio_put(req->private_bio);
+				req->private_bio = NULL;
+				put_ldev(mdev);
+			}
+		}
+		remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
+	}
+
+	/* If we have a disk, but a READA request is mapped to remote,
+	 * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
+	 * Just fail that READA request right here.
+	 *
+	 * THINK: maybe fail all READA when not local?
+	 *        or make this configurable...
+	 *        if network is slow, READA won't do any good.
+	 */
+	if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
+		err = -EWOULDBLOCK;
+		goto fail_and_free_req;
+	}
+
+	/* For WRITES going to the local disk, grab a reference on the target
+	 * extent.  This waits for any resync activity in the corresponding
+	 * resync extent to finish, and, if necessary, pulls in the target
+	 * extent into the activity log, which involves further disk io because
+	 * of transactional on-disk meta data updates. */
+	if (rw == WRITE && local)
+		drbd_al_begin_io(mdev, sector);
+
+	remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
+			    (mdev->state.pdsk == D_INCONSISTENT &&
+			     mdev->state.conn >= C_CONNECTED));
+
+	if (!(local || remote)) {
+		dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+		goto fail_free_complete;
+	}
+
+	/* For WRITE request, we have to make sure that we have an
+	 * unused_spare_tle, in case we need to start a new epoch.
+	 * I try to be smart and avoid to pre-allocate always "just in case",
+	 * but there is a race between testing the bit and pointer outside the
+	 * spinlock, and grabbing the spinlock.
+	 * if we lost that race, we retry.  */
+	if (rw == WRITE && remote &&
+	    mdev->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+allocate_barrier:
+		b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
+		if (!b) {
+			dev_err(DEV, "Failed to alloc barrier.\n");
+			err = -ENOMEM;
+			goto fail_free_complete;
+		}
+	}
+
+	/* GOOD, everything prepared, grab the spin_lock */
+	spin_lock_irq(&mdev->req_lock);
+
+	if (remote) {
+		remote = (mdev->state.pdsk == D_UP_TO_DATE ||
+			    (mdev->state.pdsk == D_INCONSISTENT &&
+			     mdev->state.conn >= C_CONNECTED));
+		if (!remote)
+			dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
+		if (!(local || remote)) {
+			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+			spin_unlock_irq(&mdev->req_lock);
+			goto fail_free_complete;
+		}
+	}
+
+	if (b && mdev->unused_spare_tle == NULL) {
+		mdev->unused_spare_tle = b;
+		b = NULL;
+	}
+	if (rw == WRITE && remote &&
+	    mdev->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+		/* someone closed the current epoch
+		 * while we were grabbing the spinlock */
+		spin_unlock_irq(&mdev->req_lock);
+		goto allocate_barrier;
+	}
+
+
+	/* Update disk stats */
+	_drbd_start_io_acct(mdev, req, bio);
+
+	/* _maybe_start_new_epoch(mdev);
+	 * If we need to generate a write barrier packet, we have to add the
+	 * new epoch (barrier) object, and queue the barrier packet for sending,
+	 * and queue the req's data after it _within the same lock_, otherwise
+	 * we have race conditions were the reorder domains could be mixed up.
+	 *
+	 * Even read requests may start a new epoch and queue the corresponding
+	 * barrier packet.  To get the write ordering right, we only have to
+	 * make sure that, if this is a write request and it triggered a
+	 * barrier packet, this request is queued within the same spinlock. */
+	if (remote && mdev->unused_spare_tle &&
+	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+		_tl_add_barrier(mdev, mdev->unused_spare_tle);
+		mdev->unused_spare_tle = NULL;
+	} else {
+		D_ASSERT(!(remote && rw == WRITE &&
+			   test_bit(CREATE_BARRIER, &mdev->flags)));
+	}
+
+	/* NOTE
+	 * Actually, 'local' may be wrong here already, since we may have failed
+	 * to write to the meta data, and may become wrong anytime because of
+	 * local io-error for some other request, which would lead to us
+	 * "detaching" the local disk.
+	 *
+	 * 'remote' may become wrong any time because the network could fail.
+	 *
+	 * This is a harmless race condition, though, since it is handled
+	 * correctly at the appropriate places; so it just defers the failure
+	 * of the respective operation.
+	 */
+
+	/* mark them early for readability.
+	 * this just sets some state flags. */
+	if (remote)
+		_req_mod(req, to_be_send);
+	if (local)
+		_req_mod(req, to_be_submitted);
+
+	/* check this request on the collision detection hash tables.
+	 * if we have a conflict, just complete it here.
+	 * THINK do we want to check reads, too? (I don't think so...) */
+	if (rw == WRITE && _req_conflicts(req)) {
+		/* this is a conflicting request.
+		 * even though it may have been only _partially_
+		 * overlapping with one of the currently pending requests,
+		 * without even submitting or sending it, we will
+		 * pretend that it was successfully served right now.
+		 */
+		if (local) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			drbd_al_complete_io(mdev, req->sector);
+			put_ldev(mdev);
+			local = 0;
+		}
+		if (remote)
+			dec_ap_pending(mdev);
+		_drbd_end_io_acct(mdev, req);
+		/* THINK: do we want to fail it (-EIO), or pretend success? */
+		bio_endio(req->master_bio, 0);
+		req->master_bio = NULL;
+		dec_ap_bio(mdev);
+		drbd_req_free(req);
+		remote = 0;
+	}
+
+	/* NOTE remote first: to get the concurrent write detection right,
+	 * we must register the request before start of local IO.  */
+	if (remote) {
+		/* either WRITE and C_CONNECTED,
+		 * or READ, and no local disk,
+		 * or READ, but not in sync.
+		 */
+		_req_mod(req, (rw == WRITE)
+				? queue_for_net_write
+				: queue_for_net_read);
+	}
+	spin_unlock_irq(&mdev->req_lock);
+	kfree(b); /* if someone else has beaten us to it... */
+
+	if (local) {
+		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+
+		trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL);
+
+		if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
+				     : rw == READ  ? DRBD_FAULT_DT_RD
+				     :               DRBD_FAULT_DT_RA))
+			bio_endio(req->private_bio, -EIO);
+		else
+			generic_make_request(req->private_bio);
+	}
+
+	/* we need to plug ALWAYS since we possibly need to kick lo_dev.
+	 * we plug after submit, so we won't miss an unplug event */
+	drbd_plug_device(mdev);
+
+	return 0;
+
+fail_free_complete:
+	if (rw == WRITE && local)
+		drbd_al_complete_io(mdev, sector);
+fail_and_free_req:
+	if (local) {
+		bio_put(req->private_bio);
+		req->private_bio = NULL;
+		put_ldev(mdev);
+	}
+	bio_endio(bio, err);
+	drbd_req_free(req);
+	dec_ap_bio(mdev);
+	kfree(b);
+
+	return 0;
+}
+
+/* helper function for drbd_make_request
+ * if we can determine just by the mdev (state) that this request will fail,
+ * return 1
+ * otherwise return 0
+ */
+static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
+{
+	/* Unconfigured */
+	if (mdev->state.conn == C_DISCONNECTING &&
+	    mdev->state.disk == D_DISKLESS)
+		return 1;
+
+	if (mdev->state.role != R_PRIMARY &&
+		(!allow_oos || is_write)) {
+		if (__ratelimit(&drbd_ratelimit_state)) {
+			dev_err(DEV, "Process %s[%u] tried to %s; "
+			    "since we are not in Primary state, "
+			    "we cannot allow this\n",
+			    current->comm, current->pid,
+			    is_write ? "WRITE" : "READ");
+		}
+		return 1;
+	}
+
+	/*
+	 * Paranoia: we might have been primary, but sync target, or
+	 * even diskless, then lost the connection.
+	 * This should have been handled (panic? suspend?) somewhere
+	 * else. But maybe it was not, so check again here.
+	 * Caution: as long as we do not have a read/write lock on mdev,
+	 * to serialize state changes, this is racy, since we may lose
+	 * the connection *after* we test for the cstate.
+	 */
+	if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+int drbd_make_request_26(struct request_queue *q, struct bio *bio)
+{
+	unsigned int s_enr, e_enr;
+	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+
+	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
+		bio_endio(bio, -EPERM);
+		return 0;
+	}
+
+	/* Reject barrier requests if we know the underlying device does
+	 * not support them.
+	 * XXX: Need to get this info from peer as well some how so we
+	 * XXX: reject if EITHER side/data/metadata area does not support them.
+	 *
+	 * because of those XXX, this is not yet enabled,
+	 * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
+	 */
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) {
+		/* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
+	}
+
+	/*
+	 * what we "blindly" assume:
+	 */
+	D_ASSERT(bio->bi_size > 0);
+	D_ASSERT((bio->bi_size & 0x1ff) == 0);
+	D_ASSERT(bio->bi_idx == 0);
+
+	/* to make some things easier, force alignment of requests within the
+	 * granularity of our hash tables */
+	s_enr = bio->bi_sector >> HT_SHIFT;
+	e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+
+	if (likely(s_enr == e_enr)) {
+		inc_ap_bio(mdev, 1);
+		return drbd_make_request_common(mdev, bio);
+	}
+
+	/* can this bio be split generically?
+	 * Maybe add our own split-arbitrary-bios function. */
+	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) {
+		/* rather error out here than BUG in bio_split */
+		dev_err(DEV, "bio would need to, but cannot, be split: "
+		    "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
+		    bio->bi_vcnt, bio->bi_idx, bio->bi_size,
+		    (unsigned long long)bio->bi_sector);
+		bio_endio(bio, -EINVAL);
+	} else {
+		/* This bio crosses some boundary, so we have to split it. */
+		struct bio_pair *bp;
+		/* works for the "do not cross hash slot boundaries" case
+		 * e.g. sector 262269, size 4096
+		 * s_enr = 262269 >> 6 = 4097
+		 * e_enr = (262269+8-1) >> 6 = 4098
+		 * HT_SHIFT = 6
+		 * sps = 64, mask = 63
+		 * first_sectors = 64 - (262269 & 63) = 3
+		 */
+		const sector_t sect = bio->bi_sector;
+		const int sps = 1 << HT_SHIFT; /* sectors per slot */
+		const int mask = sps - 1;
+		const sector_t first_sectors = sps - (sect & mask);
+		bp = bio_split(bio,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+				bio_split_pool,
+#endif
+				first_sectors);
+
+		/* we need to get a "reference count" (ap_bio_cnt)
+		 * to avoid races with the disconnect/reconnect/suspend code.
+		 * In case we need to split the bio here, we need to get two references
+		 * atomically, otherwise we might deadlock when trying to submit the
+		 * second one! */
+		inc_ap_bio(mdev, 2);
+
+		D_ASSERT(e_enr == s_enr + 1);
+
+		drbd_make_request_common(mdev, &bp->bio1);
+		drbd_make_request_common(mdev, &bp->bio2);
+		bio_pair_release(bp);
+	}
+	return 0;
+}
+
+/* This is called by bio_add_page().  With this function we reduce
+ * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs
+ * units (was AL_EXTENTs).
+ *
+ * we do the calculation within the lower 32bit of the byte offsets,
+ * since we don't care for actual offset, but only check whether it
+ * would cross "activity log extent" boundaries.
+ *
+ * As long as the BIO is empty we have to allow at least one bvec,
+ * regardless of size and offset.  so the resulting bio may still
+ * cross extent boundaries.  those are dealt with (bio_split) in
+ * drbd_make_request_26.
+ */
+int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+	unsigned int bio_offset =
+		(unsigned int)bvm->bi_sector << 9; /* 32 bit */
+	unsigned int bio_size = bvm->bi_size;
+	int limit, backing_limit;
+
+	limit = DRBD_MAX_SEGMENT_SIZE
+	      - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size);
+	if (limit < 0)
+		limit = 0;
+	if (bio_size == 0) {
+		if (limit <= bvec->bv_len)
+			limit = bvec->bv_len;
+	} else if (limit && get_ldev(mdev)) {
+		struct request_queue * const b =
+			mdev->ldev->backing_bdev->bd_disk->queue;
+		if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) {
+			backing_limit = b->merge_bvec_fn(b, bvm, bvec);
+			limit = min(limit, backing_limit);
+		}
+		put_ldev(mdev);
+	}
+	return limit;
+}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
new file mode 100644
index 000000000000..d37ab57f1209
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.h
@@ -0,0 +1,327 @@
+/*
+   drbd_req.h
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+   Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+
+   DRBD is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   DRBD is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_REQ_H
+#define _DRBD_REQ_H
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+
+/* The request callbacks will be called in irq context by the IDE drivers,
+   and in Softirqs/Tasklets/BH context by the SCSI drivers,
+   and by the receiver and worker in kernel-thread context.
+   Try to get the locking right :) */
+
+/*
+ * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
+ * associated with IO requests originating from the block layer above us.
+ *
+ * There are quite a few things that may happen to a drbd request
+ * during its lifetime.
+ *
+ *  It will be created.
+ *  It will be marked with the intention to be
+ *    submitted to local disk and/or
+ *    send via the network.
+ *
+ *  It has to be placed on the transfer log and other housekeeping lists,
+ *  In case we have a network connection.
+ *
+ *  It may be identified as a concurrent (write) request
+ *    and be handled accordingly.
+ *
+ *  It may me handed over to the local disk subsystem.
+ *  It may be completed by the local disk subsystem,
+ *    either sucessfully or with io-error.
+ *  In case it is a READ request, and it failed locally,
+ *    it may be retried remotely.
+ *
+ *  It may be queued for sending.
+ *  It may be handed over to the network stack,
+ *    which may fail.
+ *  It may be acknowledged by the "peer" according to the wire_protocol in use.
+ *    this may be a negative ack.
+ *  It may receive a faked ack when the network connection is lost and the
+ *  transfer log is cleaned up.
+ *  Sending may be canceled due to network connection loss.
+ *  When it finally has outlived its time,
+ *    corresponding dirty bits in the resync-bitmap may be cleared or set,
+ *    it will be destroyed,
+ *    and completion will be signalled to the originator,
+ *      with or without "success".
+ */
+
+enum drbd_req_event {
+	created,
+	to_be_send,
+	to_be_submitted,
+
+	/* XXX yes, now I am inconsistent...
+	 * these two are not "events" but "actions"
+	 * oh, well... */
+	queue_for_net_write,
+	queue_for_net_read,
+
+	send_canceled,
+	send_failed,
+	handed_over_to_network,
+	connection_lost_while_pending,
+	recv_acked_by_peer,
+	write_acked_by_peer,
+	write_acked_by_peer_and_sis, /* and set_in_sync */
+	conflict_discarded_by_peer,
+	neg_acked,
+	barrier_acked, /* in protocol A and B */
+	data_received, /* (remote read) */
+
+	read_completed_with_error,
+	read_ahead_completed_with_error,
+	write_completed_with_error,
+	completed_ok,
+	nothing, /* for tracing only */
+};
+
+/* encoding of request states for now.  we don't actually need that many bits.
+ * we don't need to do atomic bit operations either, since most of the time we
+ * need to look at the connection state and/or manipulate some lists at the
+ * same time, so we should hold the request lock anyways.
+ */
+enum drbd_req_state_bits {
+	/* 210
+	 * 000: no local possible
+	 * 001: to be submitted
+	 *    UNUSED, we could map: 011: submitted, completion still pending
+	 * 110: completed ok
+	 * 010: completed with error
+	 */
+	__RQ_LOCAL_PENDING,
+	__RQ_LOCAL_COMPLETED,
+	__RQ_LOCAL_OK,
+
+	/* 76543
+	 * 00000: no network possible
+	 * 00001: to be send
+	 * 00011: to be send, on worker queue
+	 * 00101: sent, expecting recv_ack (B) or write_ack (C)
+	 * 11101: sent,
+	 *        recv_ack (B) or implicit "ack" (A),
+	 *        still waiting for the barrier ack.
+	 *        master_bio may already be completed and invalidated.
+	 * 11100: write_acked (C),
+	 *        data_received (for remote read, any protocol)
+	 *        or finally the barrier ack has arrived (B,A)...
+	 *        request can be freed
+	 * 01100: neg-acked (write, protocol C)
+	 *        or neg-d-acked (read, any protocol)
+	 *        or killed from the transfer log
+	 *        during cleanup after connection loss
+	 *        request can be freed
+	 * 01000: canceled or send failed...
+	 *        request can be freed
+	 */
+
+	/* if "SENT" is not set, yet, this can still fail or be canceled.
+	 * if "SENT" is set already, we still wait for an Ack packet.
+	 * when cleared, the master_bio may be completed.
+	 * in (B,A) the request object may still linger on the transaction log
+	 * until the corresponding barrier ack comes in */
+	__RQ_NET_PENDING,
+
+	/* If it is QUEUED, and it is a WRITE, it is also registered in the
+	 * transfer log. Currently we need this flag to avoid conflicts between
+	 * worker canceling the request and tl_clear_barrier killing it from
+	 * transfer log.  We should restructure the code so this conflict does
+	 * no longer occur. */
+	__RQ_NET_QUEUED,
+
+	/* well, actually only "handed over to the network stack".
+	 *
+	 * TODO can potentially be dropped because of the similar meaning
+	 * of RQ_NET_SENT and ~RQ_NET_QUEUED.
+	 * however it is not exactly the same. before we drop it
+	 * we must ensure that we can tell a request with network part
+	 * from a request without, regardless of what happens to it. */
+	__RQ_NET_SENT,
+
+	/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
+	 * basically this means the corresponding P_BARRIER_ACK was received */
+	__RQ_NET_DONE,
+
+	/* whether or not we know (C) or pretend (B,A) that the write
+	 * was successfully written on the peer.
+	 */
+	__RQ_NET_OK,
+
+	/* peer called drbd_set_in_sync() for this write */
+	__RQ_NET_SIS,
+
+	/* keep this last, its for the RQ_NET_MASK */
+	__RQ_NET_MAX,
+};
+
+#define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
+#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
+#define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
+
+#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+
+#define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
+#define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
+#define RQ_NET_SENT        (1UL << __RQ_NET_SENT)
+#define RQ_NET_DONE        (1UL << __RQ_NET_DONE)
+#define RQ_NET_OK          (1UL << __RQ_NET_OK)
+#define RQ_NET_SIS         (1UL << __RQ_NET_SIS)
+
+/* 0x1f8 */
+#define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
+
+/* epoch entries */
+static inline
+struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	BUG_ON(mdev->ee_hash_s == 0);
+	return mdev->ee_hash +
+		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
+}
+
+/* transfer log (drbd_request objects) */
+static inline
+struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	BUG_ON(mdev->tl_hash_s == 0);
+	return mdev->tl_hash +
+		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
+}
+
+/* application reads (drbd_request objects) */
+static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	return mdev->app_reads_hash
+		+ ((unsigned int)(sector) % APP_R_HSIZE);
+}
+
+/* when we receive the answer for a read request,
+ * verify that we actually know about it */
+static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
+	u64 id, sector_t sector)
+{
+	struct hlist_head *slot = ar_hash_slot(mdev, sector);
+	struct hlist_node *n;
+	struct drbd_request *req;
+
+	hlist_for_each_entry(req, n, slot, colision) {
+		if ((unsigned long)req == (unsigned long)id) {
+			D_ASSERT(req->sector == sector);
+			return req;
+		}
+	}
+	return NULL;
+}
+
+static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
+	struct bio *bio_src)
+{
+	struct bio *bio;
+	struct drbd_request *req =
+		mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	if (likely(req)) {
+		bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+
+		req->rq_state    = 0;
+		req->mdev        = mdev;
+		req->master_bio  = bio_src;
+		req->private_bio = bio;
+		req->epoch       = 0;
+		req->sector      = bio->bi_sector;
+		req->size        = bio->bi_size;
+		req->start_time  = jiffies;
+		INIT_HLIST_NODE(&req->colision);
+		INIT_LIST_HEAD(&req->tl_requests);
+		INIT_LIST_HEAD(&req->w.list);
+
+		bio->bi_private  = req;
+		bio->bi_end_io   = drbd_endio_pri;
+		bio->bi_next     = NULL;
+	}
+	return req;
+}
+
+static inline void drbd_req_free(struct drbd_request *req)
+{
+	mempool_free(req, drbd_request_mempool);
+}
+
+static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
+}
+
+/* Short lived temporary struct on the stack.
+ * We could squirrel the error to be returned into
+ * bio->bi_size, or similar. But that would be too ugly. */
+struct bio_and_error {
+	struct bio *bio;
+	int error;
+};
+
+extern void _req_may_be_done(struct drbd_request *req,
+		struct bio_and_error *m);
+extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+		struct bio_and_error *m);
+extern void complete_master_bio(struct drbd_conf *mdev,
+		struct bio_and_error *m);
+
+/* use this if you don't want to deal with calling complete_master_bio()
+ * outside the spinlock, e.g. when walking some list on cleanup. */
+static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
+{
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+
+	/* __req_mod possibly frees req, do not touch req after that! */
+	__req_mod(req, what, &m);
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+}
+
+/* completion of master bio is outside of spinlock.
+ * If you need it irqsave, do it your self! */
+static inline void req_mod(struct drbd_request *req,
+		enum drbd_req_event what)
+{
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+	spin_lock_irq(&mdev->req_lock);
+	__req_mod(req, what, &m);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+}
+#endif
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
new file mode 100644
index 000000000000..76863e3f05be
--- /dev/null
+++ b/drivers/block/drbd/drbd_strings.c
@@ -0,0 +1,113 @@
+/*
+  drbd.h
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/drbd.h>
+
+static const char *drbd_conn_s_names[] = {
+	[C_STANDALONE]       = "StandAlone",
+	[C_DISCONNECTING]    = "Disconnecting",
+	[C_UNCONNECTED]      = "Unconnected",
+	[C_TIMEOUT]          = "Timeout",
+	[C_BROKEN_PIPE]      = "BrokenPipe",
+	[C_NETWORK_FAILURE]  = "NetworkFailure",
+	[C_PROTOCOL_ERROR]   = "ProtocolError",
+	[C_WF_CONNECTION]    = "WFConnection",
+	[C_WF_REPORT_PARAMS] = "WFReportParams",
+	[C_TEAR_DOWN]        = "TearDown",
+	[C_CONNECTED]        = "Connected",
+	[C_STARTING_SYNC_S]  = "StartingSyncS",
+	[C_STARTING_SYNC_T]  = "StartingSyncT",
+	[C_WF_BITMAP_S]      = "WFBitMapS",
+	[C_WF_BITMAP_T]      = "WFBitMapT",
+	[C_WF_SYNC_UUID]     = "WFSyncUUID",
+	[C_SYNC_SOURCE]      = "SyncSource",
+	[C_SYNC_TARGET]      = "SyncTarget",
+	[C_PAUSED_SYNC_S]    = "PausedSyncS",
+	[C_PAUSED_SYNC_T]    = "PausedSyncT",
+	[C_VERIFY_S]         = "VerifyS",
+	[C_VERIFY_T]         = "VerifyT",
+};
+
+static const char *drbd_role_s_names[] = {
+	[R_PRIMARY]   = "Primary",
+	[R_SECONDARY] = "Secondary",
+	[R_UNKNOWN]   = "Unknown"
+};
+
+static const char *drbd_disk_s_names[] = {
+	[D_DISKLESS]     = "Diskless",
+	[D_ATTACHING]    = "Attaching",
+	[D_FAILED]       = "Failed",
+	[D_NEGOTIATING]  = "Negotiating",
+	[D_INCONSISTENT] = "Inconsistent",
+	[D_OUTDATED]     = "Outdated",
+	[D_UNKNOWN]      = "DUnknown",
+	[D_CONSISTENT]   = "Consistent",
+	[D_UP_TO_DATE]   = "UpToDate",
+};
+
+static const char *drbd_state_sw_errors[] = {
+	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
+	[-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk",
+	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
+	[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
+	[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
+	[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
+	[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
+	[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
+	[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
+	[-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
+	[-SS_DEVICE_IN_USE] = "Device is held open by someone",
+	[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
+	[-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
+	[-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
+	[-SS_NOT_SUPPORTED] = "Peer does not support protocol",
+	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
+	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
+	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+};
+
+const char *drbd_conn_str(enum drbd_conns s)
+{
+	/* enums are unsigned... */
+	return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s];
+}
+
+const char *drbd_role_str(enum drbd_role s)
+{
+	return s > R_SECONDARY   ? "TOO_LARGE" : drbd_role_s_names[s];
+}
+
+const char *drbd_disk_str(enum drbd_disk_state s)
+{
+	return s > D_UP_TO_DATE    ? "TOO_LARGE" : drbd_disk_s_names[s];
+}
+
+const char *drbd_set_st_err_str(enum drbd_state_ret_codes err)
+{
+	return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
+	       err > SS_TWO_PRIMARIES ? "TOO_LARGE"
+			: drbd_state_sw_errors[-err];
+}
diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c
new file mode 100644
index 000000000000..d18d4f7b4bef
--- /dev/null
+++ b/drivers/block/drbd/drbd_tracing.c
@@ -0,0 +1,752 @@
+/*
+   drbd_tracing.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <linux/ctype.h>
+#include "drbd_int.h"
+#include "drbd_tracing.h"
+#include <linux/drbd_tag_magic.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg");
+MODULE_DESCRIPTION("DRBD tracepoint probes");
+MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c");
+MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)");
+MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)");
+
+unsigned int trace_mask = 0;  /* Bitmap of events to trace */
+int trace_level;              /* Current trace level */
+int trace_devs;		      /* Bitmap of devices to trace */
+
+module_param(trace_mask, uint, 0444);
+module_param(trace_level, int, 0644);
+module_param(trace_devs, int, 0644);
+
+enum {
+	TRACE_PACKET  = 0x0001,
+	TRACE_RQ      = 0x0002,
+	TRACE_UUID    = 0x0004,
+	TRACE_RESYNC  = 0x0008,
+	TRACE_EE      = 0x0010,
+	TRACE_UNPLUG  = 0x0020,
+	TRACE_NL      = 0x0040,
+	TRACE_AL_EXT  = 0x0080,
+	TRACE_INT_RQ  = 0x0100,
+	TRACE_MD_IO   = 0x0200,
+	TRACE_EPOCH   = 0x0400,
+};
+
+/* Buffer printing support
+ * dbg_print_flags: used for Flags arg to drbd_print_buffer
+ * - DBGPRINT_BUFFADDR; if set, each line starts with the
+ *	 virtual address of the line being output. If clear,
+ *	 each line starts with the offset from the beginning
+ *	 of the buffer. */
+enum dbg_print_flags {
+    DBGPRINT_BUFFADDR = 0x0001,
+};
+
+/* Macro stuff */
+static char *nl_packet_name(int packet_type)
+{
+/* Generate packet type strings */
+#define NL_PACKET(name, number, fields) \
+	[P_ ## name] = # name,
+#define NL_INTEGER Argh!
+#define NL_BIT Argh!
+#define NL_INT64 Argh!
+#define NL_STRING Argh!
+
+	static char *nl_tag_name[P_nl_after_last_packet] = {
+#include "linux/drbd_nl.h"
+	};
+
+	return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ?
+	    nl_tag_name[packet_type] : "*Unknown*";
+}
+/* /Macro stuff */
+
+static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level)
+{
+	return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs);
+}
+
+static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg)
+{
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt));
+}
+
+static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
+{
+	static char *uuid_str[UI_EXTENDED_SIZE] = {
+		[UI_CURRENT] = "CURRENT",
+		[UI_BITMAP] = "BITMAP",
+		[UI_HISTORY_START] = "HISTORY_START",
+		[UI_HISTORY_END] = "HISTORY_END",
+		[UI_SIZE] = "SIZE",
+		[UI_FLAGS] = "FLAGS",
+	};
+
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	if (index >= UI_EXTENDED_SIZE) {
+		dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
+		return;
+	}
+
+	dev_info(DEV, " uuid[%s] now %016llX\n",
+		 uuid_str[index],
+		 (unsigned long long)mdev->ldev->md.uuid[index]);
+}
+
+static void probe_drbd_md_io(struct drbd_conf *mdev, int rw,
+			     struct drbd_backing_dev *bdev)
+{
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	dev_info(DEV, " %s metadata superblock now\n",
+		 rw == READ ? "Reading" : "Writing");
+}
+
+static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg)
+{
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n",
+		 msg, (unsigned long long)e->sector, e->size, e);
+}
+
+static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch,
+			     enum epoch_event ev)
+{
+	static char *epoch_event_str[] = {
+		[EV_PUT] = "put",
+		[EV_GOT_BARRIER_NR] = "got_barrier_nr",
+		[EV_BARRIER_DONE] = "barrier_done",
+		[EV_BECAME_LAST] = "became_last",
+		[EV_TRACE_FLUSH] = "issuing_flush",
+		[EV_TRACE_ADD_BARRIER] = "added_barrier",
+		[EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch",
+	};
+
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	ev &= ~EV_CLEANUP;
+
+	switch (ev) {
+	case EV_TRACE_ALLOC:
+		dev_info(DEV, "Allocate epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs);
+		break;
+	case EV_TRACE_FREE:
+		dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n",
+			 epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size),
+			 mdev->epochs);
+		break;
+	default:
+		dev_info(DEV, "Update epoch  %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n",
+			 epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size),
+			 atomic_read(&epoch->active),
+			 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-',
+			 test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-',
+			 test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-',
+			 test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-',
+			 epoch_event_str[ev]);
+	}
+}
+
+static void probe_drbd_netlink(void *data, int is_req)
+{
+	struct cn_msg *msg = data;
+
+	if (is_req) {
+		struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data;
+
+		printk(KERN_INFO "drbd%d: "
+			 "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n",
+			 nlp->drbd_minor,
+			 nl_packet_name(nlp->packet_type),
+			 nlp->packet_type,
+			 msg->seq, msg->ack, msg->len);
+	} else {
+		struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data;
+
+		printk(KERN_INFO "drbd%d: "
+		       "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n",
+		       nlp->minor,
+		       nlp->packet_type == P_nl_after_last_packet ?
+		       "Empty-Reply" : nl_packet_name(nlp->packet_type),
+		       nlp->packet_type,
+		       msg->seq, msg->ack, msg->len);
+	}
+}
+
+static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg)
+{
+	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n",
+		 msg, (unsigned long long) sector, enr,
+		 (int)BM_SECT_TO_EXT(sector));
+}
+
+/**
+ * drbd_print_buffer() - Hexdump arbitrary binary data into a buffer
+ * @prefix:	String is output at the beginning of each line output.
+ * @flags:	Currently only defined flag: DBGPRINT_BUFFADDR; if set, each
+ *		line starts with the virtual address of the line being
+ *		output. If clear, each line starts with the offset from the
+ *		beginning of the buffer.
+ * @size:	Indicates the size of each entry in the buffer. Supported
+ * 		values are sizeof(char), sizeof(short) and sizeof(int)
+ * @buffer:	Start address of buffer
+ * @buffer_va:	Virtual address of start of buffer (normally the same
+ *		as Buffer, but having it separate allows it to hold
+ *		file address for example)
+ * @length:	length of buffer
+ */
+static void drbd_print_buffer(const char *prefix, unsigned int flags, int size,
+			      const void *buffer, const void *buffer_va,
+			      unsigned int length)
+
+#define LINE_SIZE       16
+#define LINE_ENTRIES    (int)(LINE_SIZE/size)
+{
+	const unsigned char *pstart;
+	const unsigned char *pstart_va;
+	const unsigned char *pend;
+	char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8];
+	char *pbytes = bytes_str, *pascii = ascii_str;
+	int  offset = 0;
+	long sizemask;
+	int  field_width;
+	int  index;
+	const unsigned char *pend_str;
+	const unsigned char *p;
+	int count;
+
+	/* verify size parameter */
+	if (size != sizeof(char) &&
+	    size != sizeof(short) &&
+	    size != sizeof(int)) {
+		printk(KERN_DEBUG "drbd_print_buffer: "
+			"ERROR invalid size %d\n", size);
+		return;
+	}
+
+	sizemask = size-1;
+	field_width = size*2;
+
+	/* Adjust start/end to be on appropriate boundary for size */
+	buffer = (const char *)((long)buffer & ~sizemask);
+	pend   = (const unsigned char *)
+		(((long)buffer + length + sizemask) & ~sizemask);
+
+	if (flags & DBGPRINT_BUFFADDR) {
+		/* Move start back to nearest multiple of line size,
+		 * if printing address. This results in nicely formatted output
+		 * with addresses being on line size (16) byte boundaries */
+		pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1));
+	} else {
+		pstart = (const unsigned char *)buffer;
+	}
+
+	/* Set value of start VA to print if addresses asked for */
+	pstart_va = (const unsigned char *)buffer_va
+		 - ((const unsigned char *)buffer-pstart);
+
+	/* Calculate end position to nicely align right hand side */
+	pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1));
+
+	/* Init strings */
+	*pbytes = *pascii = '\0';
+
+	/* Start at beginning of first line */
+	p = pstart;
+	count = 0;
+
+	while (p < pend_str) {
+		if (p < (const unsigned char *)buffer || p >= pend) {
+			/* Before start of buffer or after end- print spaces */
+			pbytes += sprintf(pbytes, "%*c ", field_width, ' ');
+			pascii += sprintf(pascii, "%*c", size, ' ');
+			p += size;
+		} else {
+			/* Add hex and ascii to strings */
+			int val;
+			switch (size) {
+			default:
+			case 1:
+				val = *(unsigned char *)p;
+				break;
+			case 2:
+				val = *(unsigned short *)p;
+				break;
+			case 4:
+				val = *(unsigned int *)p;
+				break;
+			}
+
+			pbytes += sprintf(pbytes, "%0*x ", field_width, val);
+
+			for (index = size; index; index--) {
+				*pascii++ = isprint(*p) ? *p : '.';
+				p++;
+			}
+		}
+
+		count++;
+
+		if (count == LINE_ENTRIES || p >= pend_str) {
+			/* Null terminate and print record */
+			*pascii = '\0';
+			printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n",
+			       prefix,
+			       (flags & DBGPRINT_BUFFADDR)
+			       ? (long)pstart_va:(long)offset,
+			       LINE_ENTRIES*(field_width+1), bytes_str,
+			       LINE_SIZE, ascii_str);
+
+			/* Move onto next line */
+			pstart_va += (p-pstart);
+			pstart = p;
+			count  = 0;
+			offset += LINE_SIZE;
+
+			/* Re-init strings */
+			pbytes = bytes_str;
+			pascii = ascii_str;
+			*pbytes = *pascii = '\0';
+		}
+	}
+}
+
+static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args)
+{
+	char str[256];
+
+	if (!is_mdev_trace(mdev, level))
+		return;
+
+	if (vsnprintf(str, 256, fmt, args) >= 256)
+		str[255] = 0;
+
+	printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)),
+	       dev_name(disk_to_dev(mdev->vdisk)), str);
+}
+
+static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
+			   struct drbd_request *r)
+{
+#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD)
+#define SECTOR_FORMAT "%Lx"
+#else
+#define SECTOR_FORMAT "%lx"
+#endif
+#define SECTOR_SHIFT 9
+
+	unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT);
+	char *faddr = (char *)(lowaddr);
+	char rb[sizeof(void *)*2+6] = { 0, };
+	struct bio_vec *bvec;
+	int segno;
+
+	const int rw = bio->bi_rw;
+	const int biorw      = (rw & (RW_MASK|RWA_MASK));
+	const int biobarrier = (rw & (1<<BIO_RW_BARRIER));
+	const int biosync = (rw & ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)));
+
+	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
+		return;
+
+	if (r)
+		sprintf(rb, "Req:%p ", r);
+
+	dev_info(DEV, "%s %s:%s%s%s Bio:%p %s- %soffset " SECTOR_FORMAT ", size %x\n",
+		 complete ? "<<<" : ">>>",
+		 pfx,
+		 biorw == WRITE ? "Write" : "Read",
+		 biobarrier ? " : B" : "",
+		 biosync ? " : S" : "",
+		 bio,
+		 rb,
+		 complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "",
+		 bio->bi_sector << SECTOR_SHIFT,
+		 bio->bi_size);
+
+	if (trace_level >= TRACE_LVL_METRICS &&
+	    ((biorw == WRITE) ^ complete)) {
+		printk(KERN_DEBUG "  ind     page   offset   length\n");
+		__bio_for_each_segment(bvec, bio, segno, 0) {
+			printk(KERN_DEBUG "  [%d] %p %8.8x %8.8x\n", segno,
+			       bvec->bv_page, bvec->bv_offset, bvec->bv_len);
+
+			if (trace_level >= TRACE_LVL_ALL) {
+				char *bvec_buf;
+				unsigned long flags;
+
+				bvec_buf = bvec_kmap_irq(bvec, &flags);
+
+				drbd_print_buffer("    ", DBGPRINT_BUFFADDR, 1,
+						  bvec_buf,
+						  faddr,
+						  (bvec->bv_len <= 0x80)
+						  ? bvec->bv_len : 0x80);
+
+				bvec_kunmap_irq(bvec_buf, &flags);
+
+				if (bvec->bv_len > 0x40)
+					printk(KERN_DEBUG "    ....\n");
+
+				faddr += bvec->bv_len;
+			}
+		}
+	}
+}
+
+static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg)
+{
+	static const char *rq_event_names[] = {
+		[created] = "created",
+		[to_be_send] = "to_be_send",
+		[to_be_submitted] = "to_be_submitted",
+		[queue_for_net_write] = "queue_for_net_write",
+		[queue_for_net_read] = "queue_for_net_read",
+		[send_canceled] = "send_canceled",
+		[send_failed] = "send_failed",
+		[handed_over_to_network] = "handed_over_to_network",
+		[connection_lost_while_pending] =
+					"connection_lost_while_pending",
+		[recv_acked_by_peer] = "recv_acked_by_peer",
+		[write_acked_by_peer] = "write_acked_by_peer",
+		[neg_acked] = "neg_acked",
+		[conflict_discarded_by_peer] = "conflict_discarded_by_peer",
+		[barrier_acked] = "barrier_acked",
+		[data_received] = "data_received",
+		[read_completed_with_error] = "read_completed_with_error",
+		[read_ahead_completed_with_error] = "reada_completed_with_error",
+		[write_completed_with_error] = "write_completed_with_error",
+		[completed_ok] = "completed_ok",
+	};
+
+	struct drbd_conf *mdev = req->mdev;
+
+	const int rw = (req->master_bio == NULL ||
+			bio_data_dir(req->master_bio) == WRITE) ?
+		'W' : 'R';
+	const unsigned long s = req->rq_state;
+
+	if (what != nothing) {
+		dev_info(DEV, "__req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]);
+	} else {
+		dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n",
+			 msg, req, rw,
+			 s & RQ_LOCAL_PENDING ? 'p' : '-',
+			 s & RQ_LOCAL_COMPLETED ? 'c' : '-',
+			 s & RQ_LOCAL_OK ? 'o' : '-',
+			 s & RQ_NET_PENDING ? 'p' : '-',
+			 s & RQ_NET_QUEUED ? 'q' : '-',
+			 s & RQ_NET_SENT ? 's' : '-',
+			 s & RQ_NET_DONE ? 'd' : '-',
+			 s & RQ_NET_OK ? 'o' : '-',
+			 req->epoch,
+			 (unsigned long long)req->sector,
+			 req->size,
+			 drbd_conn_str(mdev->state.conn));
+	}
+}
+
+
+#define drbd_peer_str drbd_role_str
+#define drbd_pdsk_str drbd_disk_str
+
+#define PSM(A)							\
+do {								\
+	if (mask.A) {						\
+		int i = snprintf(p, len, " " #A "( %s )",	\
+				 drbd_##A##_str(val.A));	\
+		if (i >= len)					\
+			return op;				\
+		p += i;						\
+		len -= i;					\
+	}							\
+} while (0)
+
+static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
+{
+	char *op = p;
+	*p = '\0';
+	PSM(role);
+	PSM(peer);
+	PSM(conn);
+	PSM(disk);
+	PSM(pdsk);
+
+	return op;
+}
+
+#define INFOP(fmt, args...) \
+do { \
+	if (trace_level >= TRACE_LVL_ALL) { \
+		dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \
+		     file, line, current->comm, current->pid, \
+		     sockname, recv ? "<<<" : ">>>" , \
+		     ## args); \
+	} else { \
+		dev_info(DEV, "%s %s " fmt, sockname, \
+		     recv ? "<<<" : ">>>" , \
+		     ## args); \
+	} \
+} while (0)
+
+static char *_dump_block_id(u64 block_id, char *buff)
+{
+	if (is_syncer_block_id(block_id))
+		strcpy(buff, "SyncerId");
+	else
+		sprintf(buff, "%llx", (unsigned long long)block_id);
+
+	return buff;
+}
+
+static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock,
+			      int recv, union p_polymorph *p, char *file, int line)
+{
+	char *sockname = sock == mdev->meta.socket ? "meta" : "data";
+	int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command);
+	char tmp[300];
+	union drbd_state m, v;
+
+	switch (cmd) {
+	case P_HAND_SHAKE:
+		INFOP("%s (protocol %u-%u)\n", cmdname(cmd),
+			be32_to_cpu(p->handshake.protocol_min),
+			be32_to_cpu(p->handshake.protocol_max));
+		break;
+
+	case P_BITMAP: /* don't report this */
+	case P_COMPRESSED_BITMAP: /* don't report this */
+		break;
+
+	case P_DATA:
+		INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd),
+		      (unsigned long long)be64_to_cpu(p->data.sector),
+		      _dump_block_id(p->data.block_id, tmp),
+		      be32_to_cpu(p->data.seq_num),
+		      be32_to_cpu(p->data.dp_flags)
+			);
+		break;
+
+	case P_DATA_REPLY:
+	case P_RS_DATA_REPLY:
+		INFOP("%s (sector %llus, id %s)\n", cmdname(cmd),
+		      (unsigned long long)be64_to_cpu(p->data.sector),
+		      _dump_block_id(p->data.block_id, tmp)
+			);
+		break;
+
+	case P_RECV_ACK:
+	case P_WRITE_ACK:
+	case P_RS_WRITE_ACK:
+	case P_DISCARD_ACK:
+	case P_NEG_ACK:
+	case P_NEG_RS_DREPLY:
+		INFOP("%s (sector %llus, size %u, id %s, seq %u)\n",
+			cmdname(cmd),
+		      (long long)be64_to_cpu(p->block_ack.sector),
+		      be32_to_cpu(p->block_ack.blksize),
+		      _dump_block_id(p->block_ack.block_id, tmp),
+		      be32_to_cpu(p->block_ack.seq_num)
+			);
+		break;
+
+	case P_DATA_REQUEST:
+	case P_RS_DATA_REQUEST:
+		INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd),
+		      (long long)be64_to_cpu(p->block_req.sector),
+		      be32_to_cpu(p->block_req.blksize),
+		      _dump_block_id(p->block_req.block_id, tmp)
+			);
+		break;
+
+	case P_BARRIER:
+	case P_BARRIER_ACK:
+		INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier);
+		break;
+
+	case P_SYNC_PARAM:
+	case P_SYNC_PARAM89:
+		INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n",
+			cmdname(cmd), be32_to_cpu(p->rs_param_89.rate),
+			p->rs_param_89.verify_alg, p->rs_param_89.csums_alg);
+		break;
+
+	case P_UUIDS:
+		INFOP("%s Curr:%016llX, Bitmap:%016llX, "
+		      "HisSt:%016llX, HisEnd:%016llX\n",
+		      cmdname(cmd),
+		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]),
+		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]),
+		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]),
+		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END]));
+		break;
+
+	case P_SIZES:
+		INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, "
+		      "max bio %x, q order %x)\n",
+		      cmdname(cmd),
+		      (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)),
+		      (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)),
+		      (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)),
+		      be32_to_cpu(p->sizes.max_segment_size),
+		      be32_to_cpu(p->sizes.queue_order_type));
+		break;
+
+	case P_STATE:
+		v.i = be32_to_cpu(p->state.state);
+		m.i = 0xffffffff;
+		dump_st(tmp, sizeof(tmp), m, v);
+		INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp);
+		break;
+
+	case P_STATE_CHG_REQ:
+		m.i = be32_to_cpu(p->req_state.mask);
+		v.i = be32_to_cpu(p->req_state.val);
+		dump_st(tmp, sizeof(tmp), m, v);
+		INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp);
+		break;
+
+	case P_STATE_CHG_REPLY:
+		INFOP("%s (ret %x)\n", cmdname(cmd),
+		      be32_to_cpu(p->req_state_reply.retcode));
+		break;
+
+	case P_PING:
+	case P_PING_ACK:
+		/*
+		 * Dont trace pings at summary level
+		 */
+		if (trace_level < TRACE_LVL_ALL)
+			break;
+		/* fall through... */
+	default:
+		INFOP("%s (%u)\n", cmdname(cmd), cmd);
+		break;
+	}
+}
+
+
+static int __init drbd_trace_init(void)
+{
+	int ret;
+
+	if (trace_mask & TRACE_UNPLUG) {
+		ret = register_trace_drbd_unplug(probe_drbd_unplug);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_UUID) {
+		ret = register_trace_drbd_uuid(probe_drbd_uuid);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_EE) {
+		ret = register_trace_drbd_ee(probe_drbd_ee);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_PACKET) {
+		ret = register_trace_drbd_packet(probe_drbd_packet);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_MD_IO) {
+		ret = register_trace_drbd_md_io(probe_drbd_md_io);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_EPOCH) {
+		ret = register_trace_drbd_epoch(probe_drbd_epoch);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_NL) {
+		ret = register_trace_drbd_netlink(probe_drbd_netlink);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_AL_EXT) {
+		ret = register_trace_drbd_actlog(probe_drbd_actlog);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_RQ) {
+		ret = register_trace_drbd_bio(probe_drbd_bio);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_INT_RQ) {
+		ret = register_trace_drbd_req(probe_drbd_req);
+		WARN_ON(ret);
+	}
+	if (trace_mask & TRACE_RESYNC) {
+		ret = register_trace__drbd_resync(probe_drbd_resync);
+		WARN_ON(ret);
+	}
+	return 0;
+}
+
+module_init(drbd_trace_init);
+
+static void __exit drbd_trace_exit(void)
+{
+	if (trace_mask & TRACE_UNPLUG)
+		unregister_trace_drbd_unplug(probe_drbd_unplug);
+	if (trace_mask & TRACE_UUID)
+		unregister_trace_drbd_uuid(probe_drbd_uuid);
+	if (trace_mask & TRACE_EE)
+		unregister_trace_drbd_ee(probe_drbd_ee);
+	if (trace_mask & TRACE_PACKET)
+		unregister_trace_drbd_packet(probe_drbd_packet);
+	if (trace_mask & TRACE_MD_IO)
+		unregister_trace_drbd_md_io(probe_drbd_md_io);
+	if (trace_mask & TRACE_EPOCH)
+		unregister_trace_drbd_epoch(probe_drbd_epoch);
+	if (trace_mask & TRACE_NL)
+		unregister_trace_drbd_netlink(probe_drbd_netlink);
+	if (trace_mask & TRACE_AL_EXT)
+		unregister_trace_drbd_actlog(probe_drbd_actlog);
+	if (trace_mask & TRACE_RQ)
+		unregister_trace_drbd_bio(probe_drbd_bio);
+	if (trace_mask & TRACE_INT_RQ)
+		unregister_trace_drbd_req(probe_drbd_req);
+	if (trace_mask & TRACE_RESYNC)
+		unregister_trace__drbd_resync(probe_drbd_resync);
+
+	tracepoint_synchronize_unregister();
+}
+
+module_exit(drbd_trace_exit);
diff --git a/drivers/block/drbd/drbd_tracing.h b/drivers/block/drbd/drbd_tracing.h
new file mode 100644
index 000000000000..c4531a137f65
--- /dev/null
+++ b/drivers/block/drbd/drbd_tracing.h
@@ -0,0 +1,87 @@
+/*
+   drbd_tracing.h
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#ifndef DRBD_TRACING_H
+#define DRBD_TRACING_H
+
+#include <linux/tracepoint.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+enum {
+	TRACE_LVL_ALWAYS = 0,
+	TRACE_LVL_SUMMARY,
+	TRACE_LVL_METRICS,
+	TRACE_LVL_ALL,
+	TRACE_LVL_MAX
+};
+
+DECLARE_TRACE(drbd_unplug,
+	TP_PROTO(struct drbd_conf *mdev, char* msg),
+	TP_ARGS(mdev, msg));
+
+DECLARE_TRACE(drbd_uuid,
+	TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index),
+	TP_ARGS(mdev, index));
+
+DECLARE_TRACE(drbd_ee,
+	TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg),
+	TP_ARGS(mdev, e, msg));
+
+DECLARE_TRACE(drbd_md_io,
+	TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev),
+	TP_ARGS(mdev, rw, bdev));
+
+DECLARE_TRACE(drbd_epoch,
+	TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev),
+	TP_ARGS(mdev, epoch, ev));
+
+DECLARE_TRACE(drbd_netlink,
+	TP_PROTO(void *data, int is_req),
+	TP_ARGS(data, is_req));
+
+DECLARE_TRACE(drbd_actlog,
+	TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg),
+	TP_ARGS(mdev, sector, msg));
+
+DECLARE_TRACE(drbd_bio,
+	TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
+		 struct drbd_request *r),
+	TP_ARGS(mdev, pfx, bio, complete, r));
+
+DECLARE_TRACE(drbd_req,
+	TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg),
+	      TP_ARGS(req, what, msg));
+
+DECLARE_TRACE(drbd_packet,
+	TP_PROTO(struct drbd_conf *mdev, struct socket *sock,
+		 int recv, union p_polymorph *p, char *file, int line),
+	TP_ARGS(mdev, sock, recv, p, file, line));
+
+DECLARE_TRACE(_drbd_resync,
+	TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args),
+	TP_ARGS(mdev, level, fmt, args));
+
+#endif
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
new file mode 100644
index 000000000000..fc824006e721
--- /dev/null
+++ b/drivers/block/drbd/drbd_vli.h
@@ -0,0 +1,351 @@
+/*
+-*- linux-c -*-
+   drbd_receiver.c
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_VLI_H
+#define _DRBD_VLI_H
+
+/*
+ * At a granularity of 4KiB storage represented per bit,
+ * and stroage sizes of several TiB,
+ * and possibly small-bandwidth replication,
+ * the bitmap transfer time can take much too long,
+ * if transmitted in plain text.
+ *
+ * We try to reduce the transfered bitmap information
+ * by encoding runlengths of bit polarity.
+ *
+ * We never actually need to encode a "zero" (runlengths are positive).
+ * But then we have to store the value of the first bit.
+ * The first bit of information thus shall encode if the first runlength
+ * gives the number of set or unset bits.
+ *
+ * We assume that large areas are either completely set or unset,
+ * which gives good compression with any runlength method,
+ * even when encoding the runlength as fixed size 32bit/64bit integers.
+ *
+ * Still, there may be areas where the polarity flips every few bits,
+ * and encoding the runlength sequence of those areas with fix size
+ * integers would be much worse than plaintext.
+ *
+ * We want to encode small runlength values with minimum code length,
+ * while still being able to encode a Huge run of all zeros.
+ *
+ * Thus we need a Variable Length Integer encoding, VLI.
+ *
+ * For some cases, we produce more code bits than plaintext input.
+ * We need to send incompressible chunks as plaintext, skip over them
+ * and then see if the next chunk compresses better.
+ *
+ * We don't care too much about "excellent" compression ratio for large
+ * runlengths (all set/all clear): whether we achieve a factor of 100
+ * or 1000 is not that much of an issue.
+ * We do not want to waste too much on short runlengths in the "noisy"
+ * parts of the bitmap, though.
+ *
+ * There are endless variants of VLI, we experimented with:
+ *  * simple byte-based
+ *  * various bit based with different code word length.
+ *
+ * To avoid yet an other configuration parameter (choice of bitmap compression
+ * algorithm) which was difficult to explain and tune, we just chose the one
+ * variant that turned out best in all test cases.
+ * Based on real world usage patterns, with device sizes ranging from a few GiB
+ * to several TiB, file server/mailserver/webserver/mysql/postgress,
+ * mostly idle to really busy, the all time winner (though sometimes only
+ * marginally better) is:
+ */
+
+/*
+ * encoding is "visualised" as
+ * __little endian__ bitstream, least significant bit first (left most)
+ *
+ * this particular encoding is chosen so that the prefix code
+ * starts as unary encoding the level, then modified so that
+ * 10 levels can be described in 8bit, with minimal overhead
+ * for the smaller levels.
+ *
+ * Number of data bits follow fibonacci sequence, with the exception of the
+ * last level (+1 data bit, so it makes 64bit total).  The only worse code when
+ * encoding bit polarity runlength is 1 plain bits => 2 code bits.
+prefix    data bits                                    max val  Nº data bits
+0 x                                                         0x2            1
+10 x                                                        0x4            1
+110 xx                                                      0x8            2
+1110 xxx                                                   0x10            3
+11110 xxx xx                                               0x30            5
+111110 xx xxxxxx                                          0x130            8
+11111100  xxxxxxxx xxxxx                                 0x2130           13
+11111110  xxxxxxxx xxxxxxxx xxxxx                      0x202130           21
+11111101  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xx   0x400202130           34
+11111111  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
+ * maximum encodable value: 0x100000400202130 == 2**56 + some */
+
+/* compression "table":
+ transmitted   x                                0.29
+ as plaintext x                                  ........................
+             x                                   ........................
+            x                                    ........................
+           x    0.59                         0.21........................
+          x      ........................................................
+         x       .. c ...................................................
+        x    0.44.. o ...................................................
+       x .......... d ...................................................
+      x  .......... e ...................................................
+     X.............   ...................................................
+    x.............. b ...................................................
+2.0x............... i ...................................................
+ #X................ t ...................................................
+ #................. s ...........................  plain bits  ..........
+-+-----------------------------------------------------------------------
+ 1             16              32                              64
+*/
+
+/* LEVEL: (total bits, prefix bits, prefix value),
+ * sorted ascending by number of total bits.
+ * The rest of the code table is calculated at compiletime from this. */
+
+/* fibonacci data 1, 1, ... */
+#define VLI_L_1_1() do { \
+	LEVEL( 2, 1, 0x00); \
+	LEVEL( 3, 2, 0x01); \
+	LEVEL( 5, 3, 0x03); \
+	LEVEL( 7, 4, 0x07); \
+	LEVEL(10, 5, 0x0f); \
+	LEVEL(14, 6, 0x1f); \
+	LEVEL(21, 8, 0x3f); \
+	LEVEL(29, 8, 0x7f); \
+	LEVEL(42, 8, 0xbf); \
+	LEVEL(64, 8, 0xff); \
+	} while (0)
+
+/* finds a suitable level to decode the least significant part of in.
+ * returns number of bits consumed.
+ *
+ * BUG() for bad input, as that would mean a buggy code table. */
+static inline int vli_decode_bits(u64 *out, const u64 in)
+{
+	u64 adj = 1;
+
+#define LEVEL(t,b,v)					\
+	do {						\
+		if ((in & ((1 << b) -1)) == v) {	\
+			*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj;	\
+			return t;			\
+		}					\
+		adj += 1ULL << (t - b);			\
+	} while (0)
+
+	VLI_L_1_1();
+
+	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
+	BUG();
+#undef LEVEL
+}
+
+/* return number of code bits needed,
+ * or negative error number */
+static inline int __vli_encode_bits(u64 *out, const u64 in)
+{
+	u64 max = 0;
+	u64 adj = 1;
+
+	if (in == 0)
+		return -EINVAL;
+
+#define LEVEL(t,b,v) do {		\
+		max += 1ULL << (t - b);	\
+		if (in <= max) {	\
+			if (out)	\
+				*out = ((in - adj) << b) | v;	\
+			return t;	\
+		}			\
+		adj = max + 1;		\
+	} while (0)
+
+	VLI_L_1_1();
+
+	return -EOVERFLOW;
+#undef LEVEL
+}
+
+#undef VLI_L_1_1
+
+/* code from here down is independend of actually used bit code */
+
+/*
+ * Code length is determined by some unique (e.g. unary) prefix.
+ * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
+ * not a byte stream.
+ */
+
+/* for the bitstream, we need a cursor */
+struct bitstream_cursor {
+	/* the current byte */
+	u8 *b;
+	/* the current bit within *b, nomalized: 0..7 */
+	unsigned int bit;
+};
+
+/* initialize cursor to point to first bit of stream */
+static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
+{
+	cur->b = s;
+	cur->bit = 0;
+}
+
+/* advance cursor by that many bits; maximum expected input value: 64,
+ * but depending on VLI implementation, it may be more. */
+static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
+{
+	bits += cur->bit;
+	cur->b = cur->b + (bits >> 3);
+	cur->bit = bits & 7;
+}
+
+/* the bitstream itself knows its length */
+struct bitstream {
+	struct bitstream_cursor cur;
+	unsigned char *buf;
+	size_t buf_len;		/* in bytes */
+
+	/* for input stream:
+	 * number of trailing 0 bits for padding
+	 * total number of valid bits in stream: buf_len * 8 - pad_bits */
+	unsigned int pad_bits;
+};
+
+static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
+{
+	bs->buf = s;
+	bs->buf_len = len;
+	bs->pad_bits = pad_bits;
+	bitstream_cursor_reset(&bs->cur, bs->buf);
+}
+
+static inline void bitstream_rewind(struct bitstream *bs)
+{
+	bitstream_cursor_reset(&bs->cur, bs->buf);
+	memset(bs->buf, 0, bs->buf_len);
+}
+
+/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
+ * Ignores "pad_bits".
+ * Returns zero if bits == 0 (nothing to do).
+ * Returns number of bits used if successful.
+ *
+ * If there is not enough room left in bitstream,
+ * leaves bitstream unchanged and returns -ENOBUFS.
+ */
+static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
+{
+	unsigned char *b = bs->cur.b;
+	unsigned int tmp;
+
+	if (bits == 0)
+		return 0;
+
+	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
+		return -ENOBUFS;
+
+	/* paranoia: strip off hi bits; they should not be set anyways. */
+	if (bits < 64)
+		val &= ~0ULL >> (64 - bits);
+
+	*b++ |= (val & 0xff) << bs->cur.bit;
+
+	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
+		*b++ |= (val >> tmp) & 0xff;
+
+	bitstream_cursor_advance(&bs->cur, bits);
+	return bits;
+}
+
+/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
+ *
+ * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
+ *
+ * If there are less than the requested number of valid bits left in the
+ * bitstream, still fetches all available bits.
+ *
+ * Returns number of actually fetched bits.
+ */
+static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
+{
+	u64 val;
+	unsigned int n;
+
+	if (bits > 64)
+		return -EINVAL;
+
+	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
+		bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
+			- bs->cur.bit - bs->pad_bits;
+
+	if (bits == 0) {
+		*out = 0;
+		return 0;
+	}
+
+	/* get the high bits */
+	val = 0;
+	n = (bs->cur.bit + bits + 7) >> 3;
+	/* n may be at most 9, if cur.bit + bits > 64 */
+	/* which means this copies at most 8 byte */
+	if (n) {
+		memcpy(&val, bs->cur.b+1, n - 1);
+		val = le64_to_cpu(val) << (8 - bs->cur.bit);
+	}
+
+	/* we still need the low bits */
+	val |= bs->cur.b[0] >> bs->cur.bit;
+
+	/* and mask out bits we don't want */
+	val &= ~0ULL >> (64 - bits);
+
+	bitstream_cursor_advance(&bs->cur, bits);
+	*out = val;
+
+	return bits;
+}
+
+/* encodes @in as vli into @bs;
+
+ * return values
+ *  > 0: number of bits successfully stored in bitstream
+ * -ENOBUFS @bs is full
+ * -EINVAL input zero (invalid)
+ * -EOVERFLOW input too large for this vli code (invalid)
+ */
+static inline int vli_encode_bits(struct bitstream *bs, u64 in)
+{
+	u64 code = code;
+	int bits = __vli_encode_bits(&code, in);
+
+	if (bits <= 0)
+		return bits;
+
+	return bitstream_put_bits(bs, code, bits);
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
new file mode 100644
index 000000000000..212e9545e634
--- /dev/null
+++ b/drivers/block/drbd/drbd_worker.c
@@ -0,0 +1,1529 @@
+/*
+   drbd_worker.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/autoconf.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+#include "drbd_tracing.h"
+
+#define SLEEP_TIME (HZ/10)
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+
+
+
+/* defined here:
+   drbd_md_io_complete
+   drbd_endio_write_sec
+   drbd_endio_read_sec
+   drbd_endio_pri
+
+ * more endio handlers:
+   atodb_endio in drbd_actlog.c
+   drbd_bm_async_io_complete in drbd_bitmap.c
+
+ * For all these callbacks, note the following:
+ * The callbacks will be called in irq context by the IDE drivers,
+ * and in Softirqs/Tasklets/BH context by the SCSI drivers.
+ * Try to get the locking right :)
+ *
+ */
+
+
+/* About the global_state_lock
+   Each state transition on an device holds a read lock. In case we have
+   to evaluate the sync after dependencies, we grab a write lock, because
+   we need stable states on all devices for that.  */
+rwlock_t global_state_lock;
+
+/* used for synchronous meta data and bitmap IO
+ * submitted by drbd_md_sync_page_io()
+ */
+void drbd_md_io_complete(struct bio *bio, int error)
+{
+	struct drbd_md_io *md_io;
+
+	md_io = (struct drbd_md_io *)bio->bi_private;
+	md_io->error = error;
+
+	trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL);
+
+	complete(&md_io->event);
+}
+
+/* reads on behalf of the partner,
+ * "submitted" by the receiver
+ */
+void drbd_endio_read_sec(struct bio *bio, int error) __releases(local)
+{
+	unsigned long flags = 0;
+	struct drbd_epoch_entry *e = NULL;
+	struct drbd_conf *mdev;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+	e = bio->bi_private;
+	mdev = e->mdev;
+
+	if (error)
+		dev_warn(DEV, "read: error=%d s=%llus\n", error,
+				(unsigned long long)e->sector);
+	if (!error && !uptodate) {
+		dev_warn(DEV, "read: setting error to -EIO s=%llus\n",
+				(unsigned long long)e->sector);
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	D_ASSERT(e->block_id != ID_VACANT);
+
+	trace_drbd_bio(mdev, "Sec", bio, 1, NULL);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	mdev->read_cnt += e->size >> 9;
+	list_del(&e->w.list);
+	if (list_empty(&mdev->read_ee))
+		wake_up(&mdev->ee_wait);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	drbd_chk_io_error(mdev, error, FALSE);
+	drbd_queue_work(&mdev->data.work, &e->w);
+	put_ldev(mdev);
+
+	trace_drbd_ee(mdev, e, "read completed");
+}
+
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver.
+ */
+void drbd_endio_write_sec(struct bio *bio, int error) __releases(local)
+{
+	unsigned long flags = 0;
+	struct drbd_epoch_entry *e = NULL;
+	struct drbd_conf *mdev;
+	sector_t e_sector;
+	int do_wake;
+	int is_syncer_req;
+	int do_al_complete_io;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+	int is_barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
+
+	e = bio->bi_private;
+	mdev = e->mdev;
+
+	if (error)
+		dev_warn(DEV, "write: error=%d s=%llus\n", error,
+				(unsigned long long)e->sector);
+	if (!error && !uptodate) {
+		dev_warn(DEV, "write: setting error to -EIO s=%llus\n",
+				(unsigned long long)e->sector);
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	/* error == -ENOTSUPP would be a better test,
+	 * alas it is not reliable */
+	if (error && is_barrier && e->flags & EE_IS_BARRIER) {
+		drbd_bump_write_ordering(mdev, WO_bdev_flush);
+		spin_lock_irqsave(&mdev->req_lock, flags);
+		list_del(&e->w.list);
+		e->w.cb = w_e_reissue;
+		/* put_ldev actually happens below, once we come here again. */
+		__release(local);
+		spin_unlock_irqrestore(&mdev->req_lock, flags);
+		drbd_queue_work(&mdev->data.work, &e->w);
+		return;
+	}
+
+	D_ASSERT(e->block_id != ID_VACANT);
+
+	trace_drbd_bio(mdev, "Sec", bio, 1, NULL);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	mdev->writ_cnt += e->size >> 9;
+	is_syncer_req = is_syncer_block_id(e->block_id);
+
+	/* after we moved e to done_ee,
+	 * we may no longer access it,
+	 * it may be freed/reused already!
+	 * (as soon as we release the req_lock) */
+	e_sector = e->sector;
+	do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO;
+
+	list_del(&e->w.list); /* has been on active_ee or sync_ee */
+	list_add_tail(&e->w.list, &mdev->done_ee);
+
+	trace_drbd_ee(mdev, e, "write completed");
+
+	/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+	 * neither did we wake possibly waiting conflicting requests.
+	 * done from "drbd_process_done_ee" within the appropriate w.cb
+	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
+
+	do_wake = is_syncer_req
+		? list_empty(&mdev->sync_ee)
+		: list_empty(&mdev->active_ee);
+
+	if (error)
+		__drbd_chk_io_error(mdev, FALSE);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (is_syncer_req)
+		drbd_rs_complete_io(mdev, e_sector);
+
+	if (do_wake)
+		wake_up(&mdev->ee_wait);
+
+	if (do_al_complete_io)
+		drbd_al_complete_io(mdev, e_sector);
+
+	wake_asender(mdev);
+	put_ldev(mdev);
+
+}
+
+/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
+ */
+void drbd_endio_pri(struct bio *bio, int error)
+{
+	unsigned long flags;
+	struct drbd_request *req = bio->bi_private;
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+	enum drbd_req_event what;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+	if (error)
+		dev_warn(DEV, "p %s: error=%d\n",
+			 bio_data_dir(bio) == WRITE ? "write" : "read", error);
+	if (!error && !uptodate) {
+		dev_warn(DEV, "p %s: setting error to -EIO\n",
+			 bio_data_dir(bio) == WRITE ? "write" : "read");
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	trace_drbd_bio(mdev, "Pri", bio, 1, NULL);
+
+	/* to avoid recursion in __req_mod */
+	if (unlikely(error)) {
+		what = (bio_data_dir(bio) == WRITE)
+			? write_completed_with_error
+			: (bio_rw(bio) == READA)
+			  ? read_completed_with_error
+			  : read_ahead_completed_with_error;
+	} else
+		what = completed_ok;
+
+	bio_put(req->private_bio);
+	req->private_bio = ERR_PTR(error);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	__req_mod(req, what, &m);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+}
+
+int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	/* NOTE: mdev->ldev can be NULL by the time we get here! */
+	/* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
+
+	/* the only way this callback is scheduled is from _req_may_be_done,
+	 * when it is done and had a local write error, see comments there */
+	drbd_req_free(req);
+
+	return TRUE;
+}
+
+int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	/* We should not detach for read io-error,
+	 * but try to WRITE the P_DATA_REPLY to the failed location,
+	 * to give the disk the chance to relocate that block */
+
+	spin_lock_irq(&mdev->req_lock);
+	if (cancel ||
+	    mdev->state.conn < C_CONNECTED ||
+	    mdev->state.pdsk <= D_INCONSISTENT) {
+		_req_mod(req, send_canceled);
+		spin_unlock_irq(&mdev->req_lock);
+		dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
+		return 1;
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	return w_send_read_req(mdev, w, 0);
+}
+
+int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	ERR_IF(cancel) return 1;
+	dev_err(DEV, "resync inactive, but callback triggered??\n");
+	return 1; /* Simply ignore this! */
+}
+
+void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
+{
+	struct hash_desc desc;
+	struct scatterlist sg;
+	struct bio_vec *bvec;
+	int i;
+
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	sg_init_table(&sg, 1);
+	crypto_hash_init(&desc);
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+		crypto_hash_update(&desc, &sg, sg.length);
+	}
+	crypto_hash_final(&desc, digest);
+}
+
+static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int digest_size;
+	void *digest;
+	int ok;
+
+	D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		return 1;
+	}
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+		digest = kmalloc(digest_size, GFP_NOIO);
+		if (digest) {
+			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+
+			inc_rs_pending(mdev);
+			ok = drbd_send_drequest_csum(mdev,
+						     e->sector,
+						     e->size,
+						     digest,
+						     digest_size,
+						     P_CSUM_RS_REQUEST);
+			kfree(digest);
+		} else {
+			dev_err(DEV, "kmalloc() of digest failed.\n");
+			ok = 0;
+		}
+	} else
+		ok = 1;
+
+	drbd_free_ee(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
+	return ok;
+}
+
+#define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
+
+static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	struct drbd_epoch_entry *e;
+
+	if (!get_ldev(mdev))
+		return 0;
+
+	/* GFP_TRY, because if there is no memory available right now, this may
+	 * be rescheduled for later. It is "only" background resync, after all. */
+	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
+	if (!e) {
+		put_ldev(mdev);
+		return 2;
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	list_add(&e->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	e->private_bio->bi_end_io = drbd_endio_read_sec;
+	e->private_bio->bi_rw = READ;
+	e->w.cb = w_e_send_csum;
+
+	mdev->read_cnt += size >> 9;
+	drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio);
+
+	return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+	unsigned long flags;
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+	int queue;
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+
+	if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
+		queue = 1;
+		if (mdev->state.conn == C_VERIFY_S)
+			mdev->resync_work.cb = w_make_ov_request;
+		else
+			mdev->resync_work.cb = w_make_resync_request;
+	} else {
+		queue = 0;
+		mdev->resync_work.cb = w_resync_inactive;
+	}
+
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	/* harmless race: list_empty outside data.work.q_lock */
+	if (list_empty(&mdev->resync_work.list) && queue)
+		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
+}
+
+int w_make_resync_request(struct drbd_conf *mdev,
+		struct drbd_work *w, int cancel)
+{
+	unsigned long bit;
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	int max_segment_size = queue_max_segment_size(mdev->rq_queue);
+	int number, i, size, pe, mx;
+	int align, queued, sndbuf;
+
+	if (unlikely(cancel))
+		return 1;
+
+	if (unlikely(mdev->state.conn < C_CONNECTED)) {
+		dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
+		return 0;
+	}
+
+	if (mdev->state.conn != C_SYNC_TARGET)
+		dev_err(DEV, "%s in w_make_resync_request\n",
+			drbd_conn_str(mdev->state.conn));
+
+	if (!get_ldev(mdev)) {
+		/* Since we only need to access mdev->rsync a
+		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
+		   to continue resync with a broken disk makes no sense at
+		   all */
+		dev_err(DEV, "Disk broke down during resync!\n");
+		mdev->resync_work.cb = w_resync_inactive;
+		return 1;
+	}
+
+	number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+	pe = atomic_read(&mdev->rs_pending_cnt);
+
+	mutex_lock(&mdev->data.mutex);
+	if (mdev->data.socket)
+		mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
+	else
+		mx = 1;
+	mutex_unlock(&mdev->data.mutex);
+
+	/* For resync rates >160MB/sec, allow more pending RS requests */
+	if (number > mx)
+		mx = number;
+
+	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
+	if ((pe + number) > mx) {
+		number = mx - pe;
+	}
+
+	for (i = 0; i < number; i++) {
+		/* Stop generating RS requests, when half of the send buffer is filled */
+		mutex_lock(&mdev->data.mutex);
+		if (mdev->data.socket) {
+			queued = mdev->data.socket->sk->sk_wmem_queued;
+			sndbuf = mdev->data.socket->sk->sk_sndbuf;
+		} else {
+			queued = 1;
+			sndbuf = 0;
+		}
+		mutex_unlock(&mdev->data.mutex);
+		if (queued > sndbuf / 2)
+			goto requeue;
+
+next_sector:
+		size = BM_BLOCK_SIZE;
+		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
+
+		if (bit == -1UL) {
+			mdev->bm_resync_fo = drbd_bm_bits(mdev);
+			mdev->resync_work.cb = w_resync_inactive;
+			put_ldev(mdev);
+			return 1;
+		}
+
+		sector = BM_BIT_TO_SECT(bit);
+
+		if (drbd_try_rs_begin_io(mdev, sector)) {
+			mdev->bm_resync_fo = bit;
+			goto requeue;
+		}
+		mdev->bm_resync_fo = bit + 1;
+
+		if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
+			drbd_rs_complete_io(mdev, sector);
+			goto next_sector;
+		}
+
+#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+		/* try to find some adjacent bits.
+		 * we stop if we have already the maximum req size.
+		 *
+		 * Additionally always align bigger requests, in order to
+		 * be prepared for all stripe sizes of software RAIDs.
+		 *
+		 * we _do_ care about the agreed-upon q->max_segment_size
+		 * here, as splitting up the requests on the other side is more
+		 * difficult.  the consequence is, that on lvm and md and other
+		 * "indirect" devices, this is dead code, since
+		 * q->max_segment_size will be PAGE_SIZE.
+		 */
+		align = 1;
+		for (;;) {
+			if (size + BM_BLOCK_SIZE > max_segment_size)
+				break;
+
+			/* Be always aligned */
+			if (sector & ((1<<(align+3))-1))
+				break;
+
+			/* do not cross extent boundaries */
+			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
+				break;
+			/* now, is it actually dirty, after all?
+			 * caution, drbd_bm_test_bit is tri-state for some
+			 * obscure reason; ( b == 0 ) would get the out-of-band
+			 * only accidentally right because of the "oddly sized"
+			 * adjustment below */
+			if (drbd_bm_test_bit(mdev, bit+1) != 1)
+				break;
+			bit++;
+			size += BM_BLOCK_SIZE;
+			if ((BM_BLOCK_SIZE << align) <= size)
+				align++;
+			i++;
+		}
+		/* if we merged some,
+		 * reset the offset to start the next drbd_bm_find_next from */
+		if (size > BM_BLOCK_SIZE)
+			mdev->bm_resync_fo = bit + 1;
+#endif
+
+		/* adjust very last sectors, in case we are oddly sized */
+		if (sector + (size>>9) > capacity)
+			size = (capacity-sector)<<9;
+		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
+			switch (read_for_csum(mdev, sector, size)) {
+			case 0: /* Disk failure*/
+				put_ldev(mdev);
+				return 0;
+			case 2: /* Allocation failed */
+				drbd_rs_complete_io(mdev, sector);
+				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+				goto requeue;
+			/* case 1: everything ok */
+			}
+		} else {
+			inc_rs_pending(mdev);
+			if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
+					       sector, size, ID_SYNCER)) {
+				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
+				dec_rs_pending(mdev);
+				put_ldev(mdev);
+				return 0;
+			}
+		}
+	}
+
+	if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
+		/* last syncer _request_ was sent,
+		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
+		 * next sync group will resume), as soon as we receive the last
+		 * resync data block, and the last bit is cleared.
+		 * until then resync "work" is "inactive" ...
+		 */
+		mdev->resync_work.cb = w_resync_inactive;
+		put_ldev(mdev);
+		return 1;
+	}
+
+ requeue:
+	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	put_ldev(mdev);
+	return 1;
+}
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	int number, i, size;
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+
+	if (unlikely(cancel))
+		return 1;
+
+	if (unlikely(mdev->state.conn < C_CONNECTED)) {
+		dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
+		return 0;
+	}
+
+	number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+	if (atomic_read(&mdev->rs_pending_cnt) > number)
+		goto requeue;
+
+	number -= atomic_read(&mdev->rs_pending_cnt);
+
+	sector = mdev->ov_position;
+	for (i = 0; i < number; i++) {
+		if (sector >= capacity) {
+			mdev->resync_work.cb = w_resync_inactive;
+			return 1;
+		}
+
+		size = BM_BLOCK_SIZE;
+
+		if (drbd_try_rs_begin_io(mdev, sector)) {
+			mdev->ov_position = sector;
+			goto requeue;
+		}
+
+		if (sector + (size>>9) > capacity)
+			size = (capacity-sector)<<9;
+
+		inc_rs_pending(mdev);
+		if (!drbd_send_ov_request(mdev, sector, size)) {
+			dec_rs_pending(mdev);
+			return 0;
+		}
+		sector += BM_SECT_PER_BIT;
+	}
+	mdev->ov_position = sector;
+
+ requeue:
+	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	return 1;
+}
+
+
+int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	kfree(w);
+	ov_oos_print(mdev);
+	drbd_resync_finished(mdev);
+
+	return 1;
+}
+
+static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	kfree(w);
+
+	drbd_resync_finished(mdev);
+
+	return 1;
+}
+
+int drbd_resync_finished(struct drbd_conf *mdev)
+{
+	unsigned long db, dt, dbdt;
+	unsigned long n_oos;
+	union drbd_state os, ns;
+	struct drbd_work *w;
+	char *khelper_cmd = NULL;
+
+	/* Remove all elements from the resync LRU. Since future actions
+	 * might set bits in the (main) bitmap, then the entries in the
+	 * resync LRU would be wrong. */
+	if (drbd_rs_del_all(mdev)) {
+		/* In case this is not possible now, most probably because
+		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
+		 * queue (or even the read operations for those packets
+		 * is not finished by now).   Retry in 100ms. */
+
+		drbd_kick_lo(mdev);
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ / 10);
+		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
+		if (w) {
+			w->cb = w_resync_finished;
+			drbd_queue_work(&mdev->data.work, w);
+			return 1;
+		}
+		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
+	}
+
+	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+	if (dt <= 0)
+		dt = 1;
+	db = mdev->rs_total;
+	dbdt = Bit2KB(db/dt);
+	mdev->rs_paused /= HZ;
+
+	if (!get_ldev(mdev))
+		goto out;
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+
+	/* This protects us against multiple calls (that can happen in the presence
+	   of application IO), and against connectivity loss just before we arrive here. */
+	if (os.conn <= C_CONNECTED)
+		goto out_unlock;
+
+	ns = os;
+	ns.conn = C_CONNECTED;
+
+	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
+	     (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
+	     "Online verify " : "Resync",
+	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
+
+	n_oos = drbd_bm_total_weight(mdev);
+
+	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
+		if (n_oos) {
+			dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
+			      n_oos, Bit2KB(1));
+			khelper_cmd = "out-of-sync";
+		}
+	} else {
+		D_ASSERT((n_oos - mdev->rs_failed) == 0);
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
+			khelper_cmd = "after-resync-target";
+
+		if (mdev->csums_tfm && mdev->rs_total) {
+			const unsigned long s = mdev->rs_same_csum;
+			const unsigned long t = mdev->rs_total;
+			const int ratio =
+				(t == 0)     ? 0 :
+			(t < 100000) ? ((s*100)/t) : (s/(t/100));
+			dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+			     "transferred %luK total %luK\n",
+			     ratio,
+			     Bit2KB(mdev->rs_same_csum),
+			     Bit2KB(mdev->rs_total - mdev->rs_same_csum),
+			     Bit2KB(mdev->rs_total));
+		}
+	}
+
+	if (mdev->rs_failed) {
+		dev_info(DEV, "            %lu failed blocks\n", mdev->rs_failed);
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+			ns.disk = D_INCONSISTENT;
+			ns.pdsk = D_UP_TO_DATE;
+		} else {
+			ns.disk = D_UP_TO_DATE;
+			ns.pdsk = D_INCONSISTENT;
+		}
+	} else {
+		ns.disk = D_UP_TO_DATE;
+		ns.pdsk = D_UP_TO_DATE;
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+			if (mdev->p_uuid) {
+				int i;
+				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
+					_drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
+				drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
+				_drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
+			} else {
+				dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
+			}
+		}
+
+		drbd_uuid_set_bm(mdev, 0UL);
+
+		if (mdev->p_uuid) {
+			/* Now the two UUID sets are equal, update what we
+			 * know of the peer. */
+			int i;
+			for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+				mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+		}
+	}
+
+	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+out_unlock:
+	spin_unlock_irq(&mdev->req_lock);
+	put_ldev(mdev);
+out:
+	mdev->rs_total  = 0;
+	mdev->rs_failed = 0;
+	mdev->rs_paused = 0;
+	mdev->ov_start_sector = 0;
+
+	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
+		dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
+	}
+
+	if (khelper_cmd)
+		drbd_khelper(mdev, khelper_cmd);
+
+	return 1;
+}
+
+/* helper */
+static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+	if (drbd_bio_has_active_page(e->private_bio)) {
+		/* This might happen if sendpage() has not finished */
+		spin_lock_irq(&mdev->req_lock);
+		list_add_tail(&e->w.list, &mdev->net_ee);
+		spin_unlock_irq(&mdev->req_lock);
+	} else
+		drbd_free_ee(mdev, e);
+}
+
+/**
+ * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		ok = drbd_send_block(mdev, P_DATA_REPLY, e);
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
+			    (unsigned long long)e->sector);
+
+		ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
+	}
+
+	dec_unacked(mdev);
+
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block() failed\n");
+	return ok;
+}
+
+/**
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
+			inc_rs_pending(mdev);
+			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+		} else {
+			if (__ratelimit(&drbd_ratelimit_state))
+				dev_err(DEV, "Not sending RSDataReply, "
+				    "partner DISKLESS!\n");
+			ok = 1;
+		}
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
+			    (unsigned long long)e->sector);
+
+		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+
+		/* update resync data with failure */
+		drbd_rs_failed_io(mdev, e->sector, e->size);
+	}
+
+	dec_unacked(mdev);
+
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block() failed\n");
+	return ok;
+}
+
+int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct digest_info *di;
+	int digest_size;
+	void *digest = NULL;
+	int ok, eq = 0;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	drbd_rs_complete_io(mdev, e->sector);
+
+	di = (struct digest_info *)(unsigned long)e->block_id;
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		/* quick hack to try to avoid a race against reconfiguration.
+		 * a real fix would be much more involved,
+		 * introducing more locking mechanisms */
+		if (mdev->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+			D_ASSERT(digest_size == di->digest_size);
+			digest = kmalloc(digest_size, GFP_NOIO);
+		}
+		if (digest) {
+			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+			eq = !memcmp(digest, di->digest, digest_size);
+			kfree(digest);
+		}
+
+		if (eq) {
+			drbd_set_in_sync(mdev, e->sector, e->size);
+			mdev->rs_same_csum++;
+			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
+		} else {
+			inc_rs_pending(mdev);
+			e->block_id = ID_SYNCER;
+			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+		}
+	} else {
+		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
+	}
+
+	dec_unacked(mdev);
+
+	kfree(di);
+
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block/ack() failed\n");
+	return ok;
+}
+
+int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int digest_size;
+	void *digest;
+	int ok = 1;
+
+	if (unlikely(cancel))
+		goto out;
+
+	if (unlikely(!drbd_bio_uptodate(e->private_bio)))
+		goto out;
+
+	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	/* FIXME if this allocation fails, online verify will not terminate! */
+	digest = kmalloc(digest_size, GFP_NOIO);
+	if (digest) {
+		drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+		inc_rs_pending(mdev);
+		ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+					     digest, digest_size, P_OV_REPLY);
+		if (!ok)
+			dec_rs_pending(mdev);
+		kfree(digest);
+	}
+
+out:
+	drbd_free_ee(mdev, e);
+
+	dec_unacked(mdev);
+
+	return ok;
+}
+
+void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
+		mdev->ov_last_oos_size += size>>9;
+	} else {
+		mdev->ov_last_oos_start = sector;
+		mdev->ov_last_oos_size = size>>9;
+	}
+	drbd_set_out_of_sync(mdev, sector, size);
+	set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+}
+
+int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct digest_info *di;
+	int digest_size;
+	void *digest;
+	int ok, eq = 0;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
+	 * the resync lru has been cleaned up already */
+	drbd_rs_complete_io(mdev, e->sector);
+
+	di = (struct digest_info *)(unsigned long)e->block_id;
+
+	if (likely(drbd_bio_uptodate(e->private_bio))) {
+		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+		digest = kmalloc(digest_size, GFP_NOIO);
+		if (digest) {
+			drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+
+			D_ASSERT(digest_size == di->digest_size);
+			eq = !memcmp(digest, di->digest, digest_size);
+			kfree(digest);
+		}
+	} else {
+		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
+	}
+
+	dec_unacked(mdev);
+
+	kfree(di);
+
+	if (!eq)
+		drbd_ov_oos_found(mdev, e->sector, e->size);
+	else
+		ov_oos_print(mdev);
+
+	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
+			      eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+
+	drbd_free_ee(mdev, e);
+
+	if (--mdev->ov_left == 0) {
+		ov_oos_print(mdev);
+		drbd_resync_finished(mdev);
+	}
+
+	return ok;
+}
+
+int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
+	complete(&b->done);
+	return 1;
+}
+
+int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
+	struct p_barrier *p = &mdev->data.sbuf.barrier;
+	int ok = 1;
+
+	/* really avoid racing with tl_clear.  w.cb may have been referenced
+	 * just before it was reassigned and re-queued, so double check that.
+	 * actually, this race was harmless, since we only try to send the
+	 * barrier packet here, and otherwise do nothing with the object.
+	 * but compare with the head of w_clear_epoch */
+	spin_lock_irq(&mdev->req_lock);
+	if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
+		cancel = 1;
+	spin_unlock_irq(&mdev->req_lock);
+	if (cancel)
+		return 1;
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+	p->barrier = b->br_number;
+	/* inc_ap_pending was done where this was queued.
+	 * dec_ap_pending will be done in got_BarrierAck
+	 * or (on connection loss) in w_clear_epoch.  */
+	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
+				(struct p_header *)p, sizeof(*p), 0);
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	if (cancel)
+		return 1;
+	return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
+}
+
+/**
+ * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_dblock(mdev, req);
+	req_mod(req, ok ? handed_over_to_network : send_failed);
+
+	return ok;
+}
+
+/**
+ * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size,
+				(unsigned long)req);
+
+	if (!ok) {
+		/* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
+		 * so this is probably redundant */
+		if (mdev->state.conn >= C_CONNECTED)
+			drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+	}
+	req_mod(req, ok ? handed_over_to_network : send_failed);
+
+	return ok;
+}
+
+static int _drbd_may_sync_now(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev = mdev;
+
+	while (1) {
+		if (odev->sync_conf.after == -1)
+			return 1;
+		odev = minor_to_mdev(odev->sync_conf.after);
+		ERR_IF(!odev) return 1;
+		if ((odev->state.conn >= C_SYNC_SOURCE &&
+		     odev->state.conn <= C_PAUSED_SYNC_T) ||
+		    odev->state.aftr_isp || odev->state.peer_isp ||
+		    odev->state.user_isp)
+			return 0;
+	}
+}
+
+/**
+ * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * @mdev:	DRBD device.
+ *
+ * Called from process context only (admin command and after_state_ch).
+ */
+static int _drbd_pause_after(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev;
+	int i, rv = 0;
+
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev)
+			continue;
+		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+			continue;
+		if (!_drbd_may_sync_now(odev))
+			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
+			       != SS_NOTHING_TO_DO);
+	}
+
+	return rv;
+}
+
+/**
+ * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * @mdev:	DRBD device.
+ *
+ * Called from process context only (admin command and worker).
+ */
+static int _drbd_resume_next(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev;
+	int i, rv = 0;
+
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev)
+			continue;
+		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+			continue;
+		if (odev->state.aftr_isp) {
+			if (_drbd_may_sync_now(odev))
+				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
+							CS_HARD, NULL)
+				       != SS_NOTHING_TO_DO) ;
+		}
+	}
+	return rv;
+}
+
+void resume_next_sg(struct drbd_conf *mdev)
+{
+	write_lock_irq(&global_state_lock);
+	_drbd_resume_next(mdev);
+	write_unlock_irq(&global_state_lock);
+}
+
+void suspend_other_sg(struct drbd_conf *mdev)
+{
+	write_lock_irq(&global_state_lock);
+	_drbd_pause_after(mdev);
+	write_unlock_irq(&global_state_lock);
+}
+
+static int sync_after_error(struct drbd_conf *mdev, int o_minor)
+{
+	struct drbd_conf *odev;
+
+	if (o_minor == -1)
+		return NO_ERROR;
+	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
+		return ERR_SYNC_AFTER;
+
+	/* check for loops */
+	odev = minor_to_mdev(o_minor);
+	while (1) {
+		if (odev == mdev)
+			return ERR_SYNC_AFTER_CYCLE;
+
+		/* dependency chain ends here, no cycles. */
+		if (odev->sync_conf.after == -1)
+			return NO_ERROR;
+
+		/* follow the dependency chain */
+		odev = minor_to_mdev(odev->sync_conf.after);
+	}
+}
+
+int drbd_alter_sa(struct drbd_conf *mdev, int na)
+{
+	int changes;
+	int retcode;
+
+	write_lock_irq(&global_state_lock);
+	retcode = sync_after_error(mdev, na);
+	if (retcode == NO_ERROR) {
+		mdev->sync_conf.after = na;
+		do {
+			changes  = _drbd_pause_after(mdev);
+			changes |= _drbd_resume_next(mdev);
+		} while (changes);
+	}
+	write_unlock_irq(&global_state_lock);
+	return retcode;
+}
+
+/**
+ * drbd_start_resync() - Start the resync process
+ * @mdev:	DRBD device.
+ * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
+ *
+ * This function might bring you directly into one of the
+ * C_PAUSED_SYNC_* states.
+ */
+void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
+{
+	union drbd_state ns;
+	int r;
+
+	if (mdev->state.conn >= C_SYNC_SOURCE) {
+		dev_err(DEV, "Resync already running!\n");
+		return;
+	}
+
+	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n",
+			  side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource");
+
+	/* In case a previous resync run was aborted by an IO error/detach on the peer. */
+	drbd_rs_cancel_all(mdev);
+
+	if (side == C_SYNC_TARGET) {
+		/* Since application IO was locked out during C_WF_BITMAP_T and
+		   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+		   we check that we might make the data inconsistent. */
+		r = drbd_khelper(mdev, "before-resync-target");
+		r = (r >> 8) & 0xff;
+		if (r > 0) {
+			dev_info(DEV, "before-resync-target handler returned %d, "
+			     "dropping connection.\n", r);
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			return;
+		}
+	}
+
+	drbd_state_lock(mdev);
+
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		drbd_state_unlock(mdev);
+		return;
+	}
+
+	if (side == C_SYNC_TARGET) {
+		mdev->bm_resync_fo = 0;
+	} else /* side == C_SYNC_SOURCE */ {
+		u64 uuid;
+
+		get_random_bytes(&uuid, sizeof(u64));
+		drbd_uuid_set(mdev, UI_BITMAP, uuid);
+		drbd_send_sync_uuid(mdev, uuid);
+
+		D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+	}
+
+	write_lock_irq(&global_state_lock);
+	ns = mdev->state;
+
+	ns.aftr_isp = !_drbd_may_sync_now(mdev);
+
+	ns.conn = side;
+
+	if (side == C_SYNC_TARGET)
+		ns.disk = D_INCONSISTENT;
+	else /* side == C_SYNC_SOURCE */
+		ns.pdsk = D_INCONSISTENT;
+
+	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	ns = mdev->state;
+
+	if (ns.conn < C_CONNECTED)
+		r = SS_UNKNOWN_ERROR;
+
+	if (r == SS_SUCCESS) {
+		mdev->rs_total     =
+		mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+		mdev->rs_failed    = 0;
+		mdev->rs_paused    = 0;
+		mdev->rs_start     =
+		mdev->rs_mark_time = jiffies;
+		mdev->rs_same_csum = 0;
+		_drbd_pause_after(mdev);
+	}
+	write_unlock_irq(&global_state_lock);
+	drbd_state_unlock(mdev);
+	put_ldev(mdev);
+
+	if (r == SS_SUCCESS) {
+		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
+		     drbd_conn_str(ns.conn),
+		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
+		     (unsigned long) mdev->rs_total);
+
+		if (mdev->rs_total == 0) {
+			/* Peer still reachable? Beware of failing before-resync-target handlers! */
+			request_ping(mdev);
+			__set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
+			drbd_resync_finished(mdev);
+			return;
+		}
+
+		/* ns.conn may already be != mdev->state.conn,
+		 * we may have been paused in between, or become paused until
+		 * the timer triggers.
+		 * No matter, that is handled in resync_timer_fn() */
+		if (ns.conn == C_SYNC_TARGET)
+			mod_timer(&mdev->resync_timer, jiffies);
+
+		drbd_md_sync(mdev);
+	}
+}
+
+int drbd_worker(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_work *w = NULL;
+	LIST_HEAD(work_list);
+	int intr = 0, i;
+
+	sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev));
+
+	while (get_t_state(thi) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+
+		if (down_trylock(&mdev->data.work.s)) {
+			mutex_lock(&mdev->data.mutex);
+			if (mdev->data.socket && !mdev->net_conf->no_cork)
+				drbd_tcp_uncork(mdev->data.socket);
+			mutex_unlock(&mdev->data.mutex);
+
+			intr = down_interruptible(&mdev->data.work.s);
+
+			mutex_lock(&mdev->data.mutex);
+			if (mdev->data.socket  && !mdev->net_conf->no_cork)
+				drbd_tcp_cork(mdev->data.socket);
+			mutex_unlock(&mdev->data.mutex);
+		}
+
+		if (intr) {
+			D_ASSERT(intr == -EINTR);
+			flush_signals(current);
+			ERR_IF (get_t_state(thi) == Running)
+				continue;
+			break;
+		}
+
+		if (get_t_state(thi) != Running)
+			break;
+		/* With this break, we have done a down() but not consumed
+		   the entry from the list. The cleanup code takes care of
+		   this...   */
+
+		w = NULL;
+		spin_lock_irq(&mdev->data.work.q_lock);
+		ERR_IF(list_empty(&mdev->data.work.q)) {
+			/* something terribly wrong in our logic.
+			 * we were able to down() the semaphore,
+			 * but the list is empty... doh.
+			 *
+			 * what is the best thing to do now?
+			 * try again from scratch, restarting the receiver,
+			 * asender, whatnot? could break even more ugly,
+			 * e.g. when we are primary, but no good local data.
+			 *
+			 * I'll try to get away just starting over this loop.
+			 */
+			spin_unlock_irq(&mdev->data.work.q_lock);
+			continue;
+		}
+		w = list_entry(mdev->data.work.q.next, struct drbd_work, list);
+		list_del_init(&w->list);
+		spin_unlock_irq(&mdev->data.work.q_lock);
+
+		if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) {
+			/* dev_warn(DEV, "worker: a callback failed! \n"); */
+			if (mdev->state.conn >= C_CONNECTED)
+				drbd_force_state(mdev,
+						NS(conn, C_NETWORK_FAILURE));
+		}
+	}
+	D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
+	D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
+
+	spin_lock_irq(&mdev->data.work.q_lock);
+	i = 0;
+	while (!list_empty(&mdev->data.work.q)) {
+		list_splice_init(&mdev->data.work.q, &work_list);
+		spin_unlock_irq(&mdev->data.work.q_lock);
+
+		while (!list_empty(&work_list)) {
+			w = list_entry(work_list.next, struct drbd_work, list);
+			list_del_init(&w->list);
+			w->cb(mdev, w, 1);
+			i++; /* dead debugging code */
+		}
+
+		spin_lock_irq(&mdev->data.work.q_lock);
+	}
+	sema_init(&mdev->data.work.s, 0);
+	/* DANGEROUS race: if someone did queue his work within the spinlock,
+	 * but up() ed outside the spinlock, we could get an up() on the
+	 * semaphore without corresponding list entry.
+	 * So don't do that.
+	 */
+	spin_unlock_irq(&mdev->data.work.q_lock);
+
+	D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
+	/* _drbd_set_state only uses stop_nowait.
+	 * wait here for the Exiting receiver. */
+	drbd_thread_stop(&mdev->receiver);
+	drbd_mdev_cleanup(mdev);
+
+	dev_info(DEV, "worker terminated\n");
+
+	clear_bit(DEVICE_DYING, &mdev->flags);
+	clear_bit(CONFIG_PENDING, &mdev->flags);
+	wake_up(&mdev->state_wait);
+
+	return 0;
+}
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
new file mode 100644
index 000000000000..f93fa111ce50
--- /dev/null
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -0,0 +1,91 @@
+#ifndef _DRBD_WRAPPERS_H
+#define _DRBD_WRAPPERS_H
+
+#include <linux/ctype.h>
+#include <linux/mm.h>
+
+/* see get_sb_bdev and bd_claim */
+extern char *drbd_sec_holder;
+
+/* sets the number of 512 byte sectors of our virtual device */
+static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
+					sector_t size)
+{
+	/* set_capacity(mdev->this_bdev->bd_disk, size); */
+	set_capacity(mdev->vdisk, size);
+	mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
+
+static inline int drbd_bio_has_active_page(struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (page_count(bvec->bv_page) > 1)
+			return 1;
+	}
+
+	return 0;
+}
+
+/* bi_end_io handlers */
+extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_endio_read_sec(struct bio *bio, int error);
+extern void drbd_endio_write_sec(struct bio *bio, int error);
+extern void drbd_endio_pri(struct bio *bio, int error);
+
+/*
+ * used to submit our private bio
+ */
+static inline void drbd_generic_make_request(struct drbd_conf *mdev,
+					     int fault_type, struct bio *bio)
+{
+	__release(local);
+	if (!bio->bi_bdev) {
+		printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
+				"bio->bi_bdev == NULL\n",
+		       mdev_to_minor(mdev));
+		dump_stack();
+		bio_endio(bio, -ENODEV);
+		return;
+	}
+
+	if (FAULT_ACTIVE(mdev, fault_type))
+		bio_endio(bio, -EIO);
+	else
+		generic_make_request(bio);
+}
+
+static inline void drbd_plug_device(struct drbd_conf *mdev)
+{
+	struct request_queue *q;
+	q = bdev_get_queue(mdev->this_bdev);
+
+	spin_lock_irq(q->queue_lock);
+
+/* XXX the check on !blk_queue_plugged is redundant,
+ * implicitly checked in blk_plug_device */
+
+	if (!blk_queue_plugged(q)) {
+		blk_plug_device(q);
+		del_timer(&q->unplug_timer);
+		/* unplugging should not happen automatically... */
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+
+static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
+{
+        return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
+                == CRYPTO_ALG_TYPE_HASH;
+}
+
+#ifndef __CHECKER__
+# undef __cond_lock
+# define __cond_lock(x,c) (c)
+#endif
+
+#endif
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
new file mode 100644
index 000000000000..69dc711f37b3
--- /dev/null
+++ b/include/linux/drbd.h
@@ -0,0 +1,349 @@
+/*
+  drbd.h
+  Kernel module for 2.6.x Kernels
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 2001-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2001-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+#ifndef DRBD_H
+#define DRBD_H
+#include <linux/connector.h>
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#else
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <limits.h>
+
+/* Altough the Linux source code makes a difference between
+   generic endianness and the bitfields' endianness, there is no
+   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   does not match the generic endianness. */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN_BITFIELD
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define __BIG_ENDIAN_BITFIELD
+#else
+# error "sorry, weird endianness on this box"
+#endif
+
+#endif
+
+
+extern const char *drbd_buildtag(void);
+#define REL_VERSION "8.3.3rc2"
+#define API_VERSION 88
+#define PRO_VERSION_MIN 86
+#define PRO_VERSION_MAX 91
+
+
+enum drbd_io_error_p {
+	EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
+	EP_CALL_HELPER,
+	EP_DETACH
+};
+
+enum drbd_fencing_p {
+	FP_DONT_CARE,
+	FP_RESOURCE,
+	FP_STONITH
+};
+
+enum drbd_disconnect_p {
+	DP_RECONNECT,
+	DP_DROP_NET_CONF,
+	DP_FREEZE_IO
+};
+
+enum drbd_after_sb_p {
+	ASB_DISCONNECT,
+	ASB_DISCARD_YOUNGER_PRI,
+	ASB_DISCARD_OLDER_PRI,
+	ASB_DISCARD_ZERO_CHG,
+	ASB_DISCARD_LEAST_CHG,
+	ASB_DISCARD_LOCAL,
+	ASB_DISCARD_REMOTE,
+	ASB_CONSENSUS,
+	ASB_DISCARD_SECONDARY,
+	ASB_CALL_HELPER,
+	ASB_VIOLENTLY
+};
+
+/* KEEP the order, do not delete or insert. Only append. */
+enum drbd_ret_codes {
+	ERR_CODE_BASE		= 100,
+	NO_ERROR		= 101,
+	ERR_LOCAL_ADDR		= 102,
+	ERR_PEER_ADDR		= 103,
+	ERR_OPEN_DISK		= 104,
+	ERR_OPEN_MD_DISK	= 105,
+	ERR_DISK_NOT_BDEV	= 107,
+	ERR_MD_NOT_BDEV		= 108,
+	ERR_DISK_TO_SMALL	= 111,
+	ERR_MD_DISK_TO_SMALL	= 112,
+	ERR_BDCLAIM_DISK	= 114,
+	ERR_BDCLAIM_MD_DISK	= 115,
+	ERR_MD_IDX_INVALID	= 116,
+	ERR_IO_MD_DISK		= 118,
+	ERR_MD_INVALID          = 119,
+	ERR_AUTH_ALG		= 120,
+	ERR_AUTH_ALG_ND		= 121,
+	ERR_NOMEM		= 122,
+	ERR_DISCARD		= 123,
+	ERR_DISK_CONFIGURED	= 124,
+	ERR_NET_CONFIGURED	= 125,
+	ERR_MANDATORY_TAG	= 126,
+	ERR_MINOR_INVALID	= 127,
+	ERR_INTR		= 129, /* EINTR */
+	ERR_RESIZE_RESYNC	= 130,
+	ERR_NO_PRIMARY		= 131,
+	ERR_SYNC_AFTER		= 132,
+	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_PAUSE_IS_SET	= 134,
+	ERR_PAUSE_IS_CLEAR	= 135,
+	ERR_PACKET_NR		= 137,
+	ERR_NO_DISK		= 138,
+	ERR_NOT_PROTO_C		= 139,
+	ERR_NOMEM_BITMAP	= 140,
+	ERR_INTEGRITY_ALG	= 141, /* DRBD 8.2 only */
+	ERR_INTEGRITY_ALG_ND	= 142, /* DRBD 8.2 only */
+	ERR_CPU_MASK_PARSE	= 143, /* DRBD 8.2 only */
+	ERR_CSUMS_ALG		= 144, /* DRBD 8.2 only */
+	ERR_CSUMS_ALG_ND	= 145, /* DRBD 8.2 only */
+	ERR_VERIFY_ALG		= 146, /* DRBD 8.2 only */
+	ERR_VERIFY_ALG_ND	= 147, /* DRBD 8.2 only */
+	ERR_CSUMS_RESYNC_RUNNING= 148, /* DRBD 8.2 only */
+	ERR_VERIFY_RUNNING	= 149, /* DRBD 8.2 only */
+	ERR_DATA_NOT_CURRENT	= 150,
+	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
+
+	/* insert new ones above this line */
+	AFTER_LAST_ERR_CODE
+};
+
+#define DRBD_PROT_A   1
+#define DRBD_PROT_B   2
+#define DRBD_PROT_C   3
+
+enum drbd_role {
+	R_UNKNOWN = 0,
+	R_PRIMARY = 1,     /* role */
+	R_SECONDARY = 2,   /* role */
+	R_MASK = 3,
+};
+
+/* The order of these constants is important.
+ * The lower ones (<C_WF_REPORT_PARAMS) indicate
+ * that there is no socket!
+ * >=C_WF_REPORT_PARAMS ==> There is a socket
+ */
+enum drbd_conns {
+	C_STANDALONE,
+	C_DISCONNECTING,  /* Temporal state on the way to StandAlone. */
+	C_UNCONNECTED,    /* >= C_UNCONNECTED -> inc_net() succeeds */
+
+	/* These temporal states are all used on the way
+	 * from >= C_CONNECTED to Unconnected.
+	 * The 'disconnect reason' states
+	 * I do not allow to change beween them. */
+	C_TIMEOUT,
+	C_BROKEN_PIPE,
+	C_NETWORK_FAILURE,
+	C_PROTOCOL_ERROR,
+	C_TEAR_DOWN,
+
+	C_WF_CONNECTION,
+	C_WF_REPORT_PARAMS, /* we have a socket */
+	C_CONNECTED,      /* we have introduced each other */
+	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
+	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_WF_BITMAP_S,
+	C_WF_BITMAP_T,
+	C_WF_SYNC_UUID,
+
+	/* All SyncStates are tested with this comparison
+	 * xx >= C_SYNC_SOURCE && xx <= C_PAUSED_SYNC_T */
+	C_SYNC_SOURCE,
+	C_SYNC_TARGET,
+	C_VERIFY_S,
+	C_VERIFY_T,
+	C_PAUSED_SYNC_S,
+	C_PAUSED_SYNC_T,
+	C_MASK = 31
+};
+
+enum drbd_disk_state {
+	D_DISKLESS,
+	D_ATTACHING,      /* In the process of reading the meta-data */
+	D_FAILED,         /* Becomes D_DISKLESS as soon as we told it the peer */
+			/* when >= D_FAILED it is legal to access mdev->bc */
+	D_NEGOTIATING,    /* Late attaching state, we need to talk to the peer */
+	D_INCONSISTENT,
+	D_OUTDATED,
+	D_UNKNOWN,       /* Only used for the peer, never for myself */
+	D_CONSISTENT,     /* Might be D_OUTDATED, might be D_UP_TO_DATE ... */
+	D_UP_TO_DATE,       /* Only this disk state allows applications' IO ! */
+	D_MASK = 15
+};
+
+union drbd_state {
+/* According to gcc's docs is the ...
+ * The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1).
+ * Determined by ABI.
+ * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
+ * even though we transmit as "cpu_to_be32(state)",
+ * the offsets of the bitfields still need to be swapped
+ * on different endianess.
+ */
+	struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned peer_isp:1 ;
+		unsigned user_isp:1 ;
+		unsigned _pad:11;   /* 0	 unused */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+		unsigned _pad:11;   /* 0	 unused */
+		unsigned user_isp:1 ;
+		unsigned peer_isp:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+#else
+# error "this endianess is not supported"
+#endif
+	};
+	unsigned int i;
+};
+
+enum drbd_state_ret_codes {
+	SS_CW_NO_NEED = 4,
+	SS_CW_SUCCESS = 3,
+	SS_NOTHING_TO_DO = 2,
+	SS_SUCCESS = 1,
+	SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */
+	SS_TWO_PRIMARIES = -1,
+	SS_NO_UP_TO_DATE_DISK = -2,
+	SS_NO_LOCAL_DISK = -4,
+	SS_NO_REMOTE_DISK = -5,
+	SS_CONNECTED_OUTDATES = -6,
+	SS_PRIMARY_NOP = -7,
+	SS_RESYNC_RUNNING = -8,
+	SS_ALREADY_STANDALONE = -9,
+	SS_CW_FAILED_BY_PEER = -10,
+	SS_IS_DISKLESS = -11,
+	SS_DEVICE_IN_USE = -12,
+	SS_NO_NET_CONFIG = -13,
+	SS_NO_VERIFY_ALG = -14,       /* drbd-8.2 only */
+	SS_NEED_CONNECTION = -15,    /* drbd-8.2 only */
+	SS_LOWER_THAN_OUTDATED = -16,
+	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
+	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
+	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
+	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+};
+
+/* from drbd_strings.c */
+extern const char *drbd_conn_str(enum drbd_conns);
+extern const char *drbd_role_str(enum drbd_role);
+extern const char *drbd_disk_str(enum drbd_disk_state);
+extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes);
+
+#define SHARED_SECRET_MAX 64
+
+#define MDF_CONSISTENT		(1 << 0)
+#define MDF_PRIMARY_IND		(1 << 1)
+#define MDF_CONNECTED_IND	(1 << 2)
+#define MDF_FULL_SYNC		(1 << 3)
+#define MDF_WAS_UP_TO_DATE	(1 << 4)
+#define MDF_PEER_OUT_DATED	(1 << 5)
+#define MDF_CRASHED_PRIMARY     (1 << 6)
+
+enum drbd_uuid_index {
+	UI_CURRENT,
+	UI_BITMAP,
+	UI_HISTORY_START,
+	UI_HISTORY_END,
+	UI_SIZE,      /* nl-packet: number of dirty bits */
+	UI_FLAGS,     /* nl-packet: flags */
+	UI_EXTENDED_SIZE   /* Everything. */
+};
+
+enum drbd_timeout_flag {
+	UT_DEFAULT      = 0,
+	UT_DEGRADED     = 1,
+	UT_PEER_OUTDATED = 2,
+};
+
+#define UUID_JUST_CREATED ((__u64)4)
+
+#define DRBD_MAGIC 0x83740267
+#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
+
+/* these are of type "int" */
+#define DRBD_MD_INDEX_INTERNAL -1
+#define DRBD_MD_INDEX_FLEX_EXT -2
+#define DRBD_MD_INDEX_FLEX_INT -3
+
+/* Start of the new netlink/connector stuff */
+
+#define DRBD_NL_CREATE_DEVICE 0x01
+#define DRBD_NL_SET_DEFAULTS  0x02
+
+/* The following line should be moved over to linux/connector.h
+ * when the time comes */
+#ifndef CN_IDX_DRBD
+# define CN_IDX_DRBD			0x4
+/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */
+#endif
+#define CN_VAL_DRBD			0x1
+
+/* For searching a vacant cn_idx value */
+#define CN_IDX_STEP			6977
+
+struct drbd_nl_cfg_req {
+	int packet_type;
+	unsigned int drbd_minor;
+	int flags;
+	unsigned short tag_list[];
+};
+
+struct drbd_nl_cfg_reply {
+	int packet_type;
+	unsigned int minor;
+	int ret_code; /* enum ret_code or set_st_err_t */
+	unsigned short tag_list[]; /* only used with get_* calls */
+};
+
+#endif
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
new file mode 100644
index 000000000000..9d067ce46960
--- /dev/null
+++ b/include/linux/drbd_limits.h
@@ -0,0 +1,137 @@
+/*
+  drbd_limits.h
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+*/
+
+/*
+ * Our current limitations.
+ * Some of them are hard limits,
+ * some of them are arbitrary range limits, that make it easier to provide
+ * feedback about nonsense settings for certain configurable values.
+ */
+
+#ifndef DRBD_LIMITS_H
+#define DRBD_LIMITS_H 1
+
+#define DEBUG_RANGE_CHECK 0
+
+#define DRBD_MINOR_COUNT_MIN 1
+#define DRBD_MINOR_COUNT_MAX 255
+
+#define DRBD_DIALOG_REFRESH_MIN 0
+#define DRBD_DIALOG_REFRESH_MAX 600
+
+/* valid port number */
+#define DRBD_PORT_MIN 1
+#define DRBD_PORT_MAX 0xffff
+
+/* startup { */
+  /* if you want more than 3.4 days, disable */
+#define DRBD_WFC_TIMEOUT_MIN 0
+#define DRBD_WFC_TIMEOUT_MAX 300000
+#define DRBD_WFC_TIMEOUT_DEF 0
+
+#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
+#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
+#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+
+#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
+#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+/* }*/
+
+/* net { */
+  /* timeout, unit centi seconds
+   * more than one minute timeout is not usefull */
+#define DRBD_TIMEOUT_MIN 1
+#define DRBD_TIMEOUT_MAX 600
+#define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+
+  /* active connection retries when C_WF_CONNECTION */
+#define DRBD_CONNECT_INT_MIN 1
+#define DRBD_CONNECT_INT_MAX 120
+#define DRBD_CONNECT_INT_DEF 10   /* seconds */
+
+  /* keep-alive probes when idle */
+#define DRBD_PING_INT_MIN 1
+#define DRBD_PING_INT_MAX 120
+#define DRBD_PING_INT_DEF 10
+
+ /* timeout for the ping packets.*/
+#define DRBD_PING_TIMEO_MIN  1
+#define DRBD_PING_TIMEO_MAX  100
+#define DRBD_PING_TIMEO_DEF  5
+
+  /* max number of write requests between write barriers */
+#define DRBD_MAX_EPOCH_SIZE_MIN 1
+#define DRBD_MAX_EPOCH_SIZE_MAX 20000
+#define DRBD_MAX_EPOCH_SIZE_DEF 2048
+
+  /* I don't think that a tcp send buffer of more than 10M is usefull */
+#define DRBD_SNDBUF_SIZE_MIN  0
+#define DRBD_SNDBUF_SIZE_MAX  (10<<20)
+#define DRBD_SNDBUF_SIZE_DEF  (2*65535)
+
+#define DRBD_RCVBUF_SIZE_MIN  0
+#define DRBD_RCVBUF_SIZE_MAX  (10<<20)
+#define DRBD_RCVBUF_SIZE_DEF  (2*65535)
+
+  /* @4k PageSize -> 128kB - 512MB */
+#define DRBD_MAX_BUFFERS_MIN  32
+#define DRBD_MAX_BUFFERS_MAX  131072
+#define DRBD_MAX_BUFFERS_DEF  2048
+
+  /* @4k PageSize -> 4kB - 512MB */
+#define DRBD_UNPLUG_WATERMARK_MIN  1
+#define DRBD_UNPLUG_WATERMARK_MAX  131072
+#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+
+  /* 0 is disabled.
+   * 200 should be more than enough even for very short timeouts */
+#define DRBD_KO_COUNT_MIN  0
+#define DRBD_KO_COUNT_MAX  200
+#define DRBD_KO_COUNT_DEF  0
+/* } */
+
+/* syncer { */
+  /* FIXME allow rate to be zero? */
+#define DRBD_RATE_MIN 1
+/* channel bonding 10 GbE, or other hardware */
+#define DRBD_RATE_MAX (4 << 20)
+#define DRBD_RATE_DEF 250  /* kb/second */
+
+  /* less than 7 would hit performance unneccessarily.
+   * 3833 is the largest prime that still does fit
+   * into 64 sectors of activity log */
+#define DRBD_AL_EXTENTS_MIN  7
+#define DRBD_AL_EXTENTS_MAX  3833
+#define DRBD_AL_EXTENTS_DEF  127
+
+#define DRBD_AFTER_MIN  -1
+#define DRBD_AFTER_MAX  255
+#define DRBD_AFTER_DEF  -1
+
+/* } */
+
+/* drbdsetup XY resize -d Z
+ * you are free to reduce the device size to nothing, if you want to.
+ * the upper limit with 64bit kernel, enough ram and flexible meta data
+ * is 16 TB, currently. */
+/* DRBD_MAX_SECTORS */
+#define DRBD_DISK_SIZE_SECT_MIN  0
+#define DRBD_DISK_SIZE_SECT_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+
+#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_FENCING_DEF FP_DONT_CARE
+#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
+#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
+#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
+#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
+
+#define DRBD_MAX_BIO_BVECS_MIN 0
+#define DRBD_MAX_BIO_BVECS_MAX 128
+#define DRBD_MAX_BIO_BVECS_DEF 0
+
+#undef RANGE
+#endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
new file mode 100644
index 000000000000..db5721ad50d1
--- /dev/null
+++ b/include/linux/drbd_nl.h
@@ -0,0 +1,137 @@
+/*
+   PAKET( name,
+	  TYPE ( pn, pr, member )
+	  ...
+   )
+
+   You may never reissue one of the pn arguments
+*/
+
+#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
+#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
+#endif
+
+NL_PACKET(primary, 1,
+       NL_BIT(		1,	T_MAY_IGNORE,	overwrite_peer)
+)
+
+NL_PACKET(secondary, 2, )
+
+NL_PACKET(disk_conf, 3,
+	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
+	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
+	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
+	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
+	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
+	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
+	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
+	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
+	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
+	  /*  55 max_bio_size was available in 8.2.6rc2 */
+	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
+	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
+	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
+)
+
+NL_PACKET(detach, 4, )
+
+NL_PACKET(net_conf, 5,
+	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
+	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
+	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
+	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
+	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
+	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
+	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
+	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
+	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
+	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
+	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
+	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
+	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
+	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
+	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
+	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
+	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
+	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
+	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
+	  /* 59 addr_family was available in GIT, never released */
+	NL_BIT(		60,	T_MANDATORY,	mind_af)
+	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
+	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
+	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
+	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
+	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
+)
+
+NL_PACKET(disconnect, 6, )
+
+NL_PACKET(resize, 7,
+	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+)
+
+NL_PACKET(syncer_conf, 8,
+	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
+	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
+	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
+	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
+	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
+	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
+	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
+)
+
+NL_PACKET(invalidate, 9, )
+NL_PACKET(invalidate_peer, 10, )
+NL_PACKET(pause_sync, 11, )
+NL_PACKET(resume_sync, 12, )
+NL_PACKET(suspend_io, 13, )
+NL_PACKET(resume_io, 14, )
+NL_PACKET(outdate, 15, )
+NL_PACKET(get_config, 16, )
+NL_PACKET(get_state, 17,
+	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
+)
+
+NL_PACKET(get_uuids, 18,
+	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
+	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
+)
+
+NL_PACKET(get_timeout_flag, 19,
+	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
+)
+
+NL_PACKET(call_helper, 20,
+	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
+)
+
+/* Tag nr 42 already allocated in drbd-8.1 development. */
+
+NL_PACKET(sync_progress, 23,
+	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
+)
+
+NL_PACKET(dump_ee, 24,
+	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
+	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
+	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
+	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
+	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
+	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
+)
+
+NL_PACKET(start_ov, 25,
+	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
+)
+
+NL_PACKET(new_c_uuid, 26,
+       NL_BIT(		63,	T_MANDATORY,	clear_bm)
+)
+
+#undef NL_PACKET
+#undef NL_INTEGER
+#undef NL_INT64
+#undef NL_BIT
+#undef NL_STRING
+
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
new file mode 100644
index 000000000000..fcdff8410e99
--- /dev/null
+++ b/include/linux/drbd_tag_magic.h
@@ -0,0 +1,83 @@
+#ifndef DRBD_TAG_MAGIC_H
+#define DRBD_TAG_MAGIC_H
+
+#define TT_END     0
+#define TT_REMOVED 0xE000
+
+/* declare packet_type enums */
+enum packet_types {
+#define NL_PACKET(name, number, fields) P_ ## name = number,
+#define NL_INTEGER(pn, pr, member)
+#define NL_INT64(pn, pr, member)
+#define NL_BIT(pn, pr, member)
+#define NL_STRING(pn, pr, member, len)
+#include "drbd_nl.h"
+	P_nl_after_last_packet,
+};
+
+/* These struct are used to deduce the size of the tag lists: */
+#define NL_PACKET(name, number, fields)	\
+	struct name ## _tag_len_struct { fields };
+#define NL_INTEGER(pn, pr, member)		\
+	int member; int tag_and_len ## member;
+#define NL_INT64(pn, pr, member)		\
+	__u64 member; int tag_and_len ## member;
+#define NL_BIT(pn, pr, member)		\
+	unsigned char member:1; int tag_and_len ## member;
+#define NL_STRING(pn, pr, member, len)	\
+	unsigned char member[len]; int member ## _len; \
+	int tag_and_len ## member;
+#include "linux/drbd_nl.h"
+
+/* declate tag-list-sizes */
+static const int tag_list_sizes[] = {
+#define NL_PACKET(name, number, fields) 2 fields ,
+#define NL_INTEGER(pn, pr, member)      + 4 + 4
+#define NL_INT64(pn, pr, member)        + 4 + 8
+#define NL_BIT(pn, pr, member)          + 4 + 1
+#define NL_STRING(pn, pr, member, len)  + 4 + (len)
+#include "drbd_nl.h"
+};
+
+/* The two highest bits are used for the tag type */
+#define TT_MASK      0xC000
+#define TT_INTEGER   0x0000
+#define TT_INT64     0x4000
+#define TT_BIT       0x8000
+#define TT_STRING    0xC000
+/* The next bit indicates if processing of the tag is mandatory */
+#define T_MANDATORY  0x2000
+#define T_MAY_IGNORE 0x0000
+#define TN_MASK      0x1fff
+/* The remaining 13 bits are used to enumerate the tags */
+
+#define tag_type(T)   ((T) & TT_MASK)
+#define tag_number(T) ((T) & TN_MASK)
+
+/* declare tag enums */
+#define NL_PACKET(name, number, fields) fields
+enum drbd_tags {
+#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
+#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
+#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
+#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
+#include "drbd_nl.h"
+};
+
+struct tag {
+	const char *name;
+	int type_n_flags;
+	int max_len;
+};
+
+/* declare tag names */
+#define NL_PACKET(name, number, fields) fields
+static const struct tag tag_descriptions[] = {
+#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
+#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
+#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
+#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
+#include "drbd_nl.h"
+};
+
+#endif
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
new file mode 100644
index 000000000000..3a2b2d9b0472
--- /dev/null
+++ b/include/linux/lru_cache.h
@@ -0,0 +1,294 @@
+/*
+   lru_cache.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#ifndef LRU_CACHE_H
+#define LRU_CACHE_H
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/string.h> /* for memset */
+#include <linux/seq_file.h>
+
+/*
+This header file (and its .c file; kernel-doc of functions see there)
+  define a helper framework to easily keep track of index:label associations,
+  and changes to an "active set" of objects, as well as pending transactions,
+  to persistently record those changes.
+
+  We use an LRU policy if it is necessary to "cool down" a region currently in
+  the active set before we can "heat" a previously unused region.
+
+  Because of this later property, it is called "lru_cache".
+  As it actually Tracks Objects in an Active SeT, we could also call it
+  toast (incidentally that is what may happen to the data on the
+  backend storage uppon next resync, if we don't get it right).
+
+What for?
+
+We replicate IO (more or less synchronously) to local and remote disk.
+
+For crash recovery after replication node failure,
+  we need to resync all regions that have been target of in-flight WRITE IO
+  (in use, or "hot", regions), as we don't know wether or not those WRITEs have
+  made it to stable storage.
+
+  To avoid a "full resync", we need to persistently track these regions.
+
+  This is known as "write intent log", and can be implemented as on-disk
+  (coarse or fine grained) bitmap, or other meta data.
+
+  To avoid the overhead of frequent extra writes to this meta data area,
+  usually the condition is softened to regions that _may_ have been target of
+  in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
+  bitmap, trading frequency of meta data transactions against amount of
+  (possibly unneccessary) resync traffic.
+
+  If we set a hard limit on the area that may be "hot" at any given time, we
+  limit the amount of resync traffic needed for crash recovery.
+
+For recovery after replication link failure,
+  we need to resync all blocks that have been changed on the other replica
+  in the mean time, or, if both replica have been changed independently [*],
+  all blocks that have been changed on either replica in the mean time.
+  [*] usually as a result of a cluster split-brain and insufficient protection.
+      but there are valid use cases to do this on purpose.
+
+  Tracking those blocks can be implemented as "dirty bitmap".
+  Having it fine-grained reduces the amount of resync traffic.
+  It should also be persistent, to allow for reboots (or crashes)
+  while the replication link is down.
+
+There are various possible implementations for persistently storing
+write intent log information, three of which are mentioned here.
+
+"Chunk dirtying"
+  The on-disk "dirty bitmap" may be re-used as "write-intent" bitmap as well.
+  To reduce the frequency of bitmap updates for write-intent log purposes,
+  one could dirty "chunks" (of some size) at a time of the (fine grained)
+  on-disk bitmap, while keeping the in-memory "dirty" bitmap as clean as
+  possible, flushing it to disk again when a previously "hot" (and on-disk
+  dirtied as full chunk) area "cools down" again (no IO in flight anymore,
+  and none expected in the near future either).
+
+"Explicit (coarse) write intent bitmap"
+  An other implementation could chose a (probably coarse) explicit bitmap,
+  for write-intent log purposes, additionally to the fine grained dirty bitmap.
+
+"Activity log"
+  Yet an other implementation may keep track of the hot regions, by starting
+  with an empty set, and writing down a journal of region numbers that have
+  become "hot", or have "cooled down" again.
+
+  To be able to use a ring buffer for this journal of changes to the active
+  set, we not only record the actual changes to that set, but also record the
+  not changing members of the set in a round robin fashion. To do so, we use a
+  fixed (but configurable) number of slots which we can identify by index, and
+  associate region numbers (labels) with these indices.
+  For each transaction recording a change to the active set, we record the
+  change itself (index: -old_label, +new_label), and which index is associated
+  with which label (index: current_label) within a certain sliding window that
+  is moved further over the available indices with each such transaction.
+
+  Thus, for crash recovery, if the ringbuffer is sufficiently large, we can
+  accurately reconstruct the active set.
+
+  Sufficiently large depends only on maximum number of active objects, and the
+  size of the sliding window recording "index: current_label" associations within
+  each transaction.
+
+  This is what we call the "activity log".
+
+  Currently we need one activity log transaction per single label change, which
+  does not give much benefit over the "dirty chunks of bitmap" approach, other
+  than potentially less seeks.
+
+  We plan to change the transaction format to support multiple changes per
+  transaction, which then would reduce several (disjoint, "random") updates to
+  the bitmap into one transaction to the activity log ring buffer.
+*/
+
+/* this defines an element in a tracked set
+ * .colision is for hash table lookup.
+ * When we process a new IO request, we know its sector, thus can deduce the
+ * region number (label) easily.  To do the label -> object lookup without a
+ * full list walk, we use a simple hash table.
+ *
+ * .list is on one of three lists:
+ *  in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
+ *     lru: unused but ready to be reused or recycled
+ *          (ts_refcnt == 0, lc_number != LC_FREE),
+ *    free: unused but ready to be recycled
+ *          (ts_refcnt == 0, lc_number == LC_FREE),
+ *
+ * an element is said to be "in the active set",
+ * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
+ *
+ * DRBD currently (May 2009) only uses 61 elements on the resync lru_cache
+ * (total memory usage 2 pages), and up to 3833 elements on the act_log
+ * lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages.
+ *
+ * We usually do not actually free these objects again, but only "recycle"
+ * them, as the change "index: -old_label, +LC_FREE" would need a transaction
+ * as well.  Which also means that using a kmem_cache to allocate the objects
+ * from wastes some resources.
+ * But it avoids high order page allocations in kmalloc.
+ */
+struct lc_element {
+	struct hlist_node colision;
+	struct list_head list;		 /* LRU list or free list */
+	unsigned refcnt;
+	/* back "pointer" into ts_cache->element[index],
+	 * for paranoia, and for "ts_element_to_index" */
+	unsigned lc_index;
+	/* if we want to track a larger set of objects,
+	 * it needs to become arch independend u64 */
+	unsigned lc_number;
+
+	/* special label when on free list */
+#define LC_FREE (~0U)
+};
+
+struct lru_cache {
+	/* the least recently used item is kept at lru->prev */
+	struct list_head lru;
+	struct list_head free;
+	struct list_head in_use;
+
+	/* the pre-created kmem cache to allocate the objects from */
+	struct kmem_cache *lc_cache;
+
+	/* size of tracked objects, used to memset(,0,) them in lc_reset */
+	size_t element_size;
+	/* offset of struct lc_element member in the tracked object */
+	size_t element_off;
+
+	/* number of elements (indices) */
+	unsigned int  nr_elements;
+	/* Arbitrary limit on maximum tracked objects. Practical limit is much
+	 * lower due to allocation failures, probably. For typical use cases,
+	 * nr_elements should be a few thousand at most.
+	 * This also limits the maximum value of ts_element.ts_index, allowing the
+	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+#define LC_MAX_ACTIVE	(1<<24)
+
+	/* statistics */
+	unsigned used; /* number of lelements currently on in_use list */
+	unsigned long hits, misses, starving, dirty, changed;
+
+	/* see below: flag-bits for lru_cache */
+	unsigned long flags;
+
+	/* when changing the label of an index element */
+	unsigned int  new_number;
+
+	/* for paranoia when changing the label of an index element */
+	struct lc_element *changing_element;
+
+	void  *lc_private;
+	const char *name;
+
+	/* nr_elements there */
+	struct hlist_head *lc_slot;
+	struct lc_element **lc_element;
+};
+
+
+/* flag-bits for lru_cache */
+enum {
+	/* debugging aid, to catch concurrent access early.
+	 * user needs to guarantee exclusive access by proper locking! */
+	__LC_PARANOIA,
+	/* if we need to change the set, but currently there is a changing
+	 * transaction pending, we are "dirty", and must deferr further
+	 * changing requests */
+	__LC_DIRTY,
+	/* if we need to change the set, but currently there is no free nor
+	 * unused element available, we are "starving", and must not give out
+	 * further references, to guarantee that eventually some refcnt will
+	 * drop to zero and we will be able to make progress again, changing
+	 * the set, writing the transaction.
+	 * if the statistics say we are frequently starving,
+	 * nr_elements is too small. */
+	__LC_STARVING,
+};
+#define LC_PARANOIA (1<<__LC_PARANOIA)
+#define LC_DIRTY    (1<<__LC_DIRTY)
+#define LC_STARVING (1<<__LC_STARVING)
+
+extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned e_count, size_t e_size, size_t e_off);
+extern void lc_reset(struct lru_cache *lc);
+extern void lc_destroy(struct lru_cache *lc);
+extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
+extern void lc_del(struct lru_cache *lc, struct lc_element *element);
+
+extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
+extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
+extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
+extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
+extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+
+struct seq_file;
+extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
+
+extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
+				void (*detail) (struct seq_file *, struct lc_element *));
+
+/**
+ * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change.  Returns true if we aquired the lock.
+ */
+static inline int lc_try_lock(struct lru_cache *lc)
+{
+	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+}
+
+/**
+ * lc_unlock - unlock @lc, allow lc_get() to change the set again
+ * @lc: the lru cache to operate on
+ */
+static inline void lc_unlock(struct lru_cache *lc)
+{
+	clear_bit(__LC_DIRTY, &lc->flags);
+	smp_mb__after_clear_bit();
+}
+
+static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e = lc_find(lc, enr);
+	return e && e->refcnt;
+}
+
+#define lc_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
+extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839c..1cfe51628e1b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -200,4 +200,7 @@ config NLATTR
 config GENERIC_ATOMIC64
        bool
 
+config LRU_CACHE
+	tristate
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..347ad8db29d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -91,6 +91,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
 
 obj-$(CONFIG_NLATTR) += nlattr.o
 
+obj-$(CONFIG_LRU_CACHE) += lru_cache.o
+
 obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
new file mode 100644
index 000000000000..270de9d31b8c
--- /dev/null
+++ b/lib/lru_cache.c
@@ -0,0 +1,560 @@
+/*
+   lru_cache.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/string.h> /* for memset */
+#include <linux/seq_file.h> /* for seq_printf */
+#include <linux/lru_cache.h>
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+	      "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
+MODULE_LICENSE("GPL");
+
+/* this is developers aid only.
+ * it catches concurrent access (lack of locking on the users part) */
+#define PARANOIA_ENTRY() do {		\
+	BUG_ON(!lc);			\
+	BUG_ON(!lc->nr_elements);	\
+	BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
+} while (0)
+
+#define RETURN(x...)     do { \
+	clear_bit(__LC_PARANOIA, &lc->flags); \
+	smp_mb__after_clear_bit(); return x ; } while (0)
+
+/* BUG() if e is not one of the elements tracked by lc */
+#define PARANOIA_LC_ELEMENT(lc, e) do {	\
+	struct lru_cache *lc_ = (lc);	\
+	struct lc_element *e_ = (e);	\
+	unsigned i = e_->lc_index;	\
+	BUG_ON(i >= lc_->nr_elements);	\
+	BUG_ON(lc_->lc_element[i] != e_); } while (0)
+
+/**
+ * lc_create - prepares to track objects in an active set
+ * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @e_count: number of elements allowed to be active simultaneously
+ * @e_size: size of the tracked objects
+ * @e_off: offset to the &struct lc_element member in a tracked object
+ *
+ * Returns a pointer to a newly initialized struct lru_cache on success,
+ * or NULL on (allocation) failure.
+ */
+struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned e_count, size_t e_size, size_t e_off)
+{
+	struct hlist_head *slot = NULL;
+	struct lc_element **element = NULL;
+	struct lru_cache *lc;
+	struct lc_element *e;
+	unsigned cache_obj_size = kmem_cache_size(cache);
+	unsigned i;
+
+	WARN_ON(cache_obj_size < e_size);
+	if (cache_obj_size < e_size)
+		return NULL;
+
+	/* e_count too big; would probably fail the allocation below anyways.
+	 * for typical use cases, e_count should be few thousand at most. */
+	if (e_count > LC_MAX_ACTIVE)
+		return NULL;
+
+	slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+	if (!slot)
+		goto out_fail;
+	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
+	if (!element)
+		goto out_fail;
+
+	lc = kzalloc(sizeof(*lc), GFP_KERNEL);
+	if (!lc)
+		goto out_fail;
+
+	INIT_LIST_HEAD(&lc->in_use);
+	INIT_LIST_HEAD(&lc->lru);
+	INIT_LIST_HEAD(&lc->free);
+
+	lc->name = name;
+	lc->element_size = e_size;
+	lc->element_off = e_off;
+	lc->nr_elements = e_count;
+	lc->new_number = LC_FREE;
+	lc->lc_cache = cache;
+	lc->lc_element = element;
+	lc->lc_slot = slot;
+
+	/* preallocate all objects */
+	for (i = 0; i < e_count; i++) {
+		void *p = kmem_cache_alloc(cache, GFP_KERNEL);
+		if (!p)
+			break;
+		memset(p, 0, lc->element_size);
+		e = p + e_off;
+		e->lc_index = i;
+		e->lc_number = LC_FREE;
+		list_add(&e->list, &lc->free);
+		element[i] = e;
+	}
+	if (i == e_count)
+		return lc;
+
+	/* else: could not allocate all elements, give up */
+	for (i--; i; i--) {
+		void *p = element[i];
+		kmem_cache_free(cache, p - e_off);
+	}
+	kfree(lc);
+out_fail:
+	kfree(element);
+	kfree(slot);
+	return NULL;
+}
+
+void lc_free_by_index(struct lru_cache *lc, unsigned i)
+{
+	void *p = lc->lc_element[i];
+	WARN_ON(!p);
+	if (p) {
+		p -= lc->element_off;
+		kmem_cache_free(lc->lc_cache, p);
+	}
+}
+
+/**
+ * lc_destroy - frees memory allocated by lc_create()
+ * @lc: the lru cache to destroy
+ */
+void lc_destroy(struct lru_cache *lc)
+{
+	unsigned i;
+	if (!lc)
+		return;
+	for (i = 0; i < lc->nr_elements; i++)
+		lc_free_by_index(lc, i);
+	kfree(lc->lc_element);
+	kfree(lc->lc_slot);
+	kfree(lc);
+}
+
+/**
+ * lc_reset - does a full reset for @lc and the hash table slots.
+ * @lc: the lru cache to operate on
+ *
+ * It is roughly the equivalent of re-allocating a fresh lru_cache object,
+ * basically a short cut to lc_destroy(lc); lc = lc_create(...);
+ */
+void lc_reset(struct lru_cache *lc)
+{
+	unsigned i;
+
+	INIT_LIST_HEAD(&lc->in_use);
+	INIT_LIST_HEAD(&lc->lru);
+	INIT_LIST_HEAD(&lc->free);
+	lc->used = 0;
+	lc->hits = 0;
+	lc->misses = 0;
+	lc->starving = 0;
+	lc->dirty = 0;
+	lc->changed = 0;
+	lc->flags = 0;
+	lc->changing_element = NULL;
+	lc->new_number = LC_FREE;
+	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
+
+	for (i = 0; i < lc->nr_elements; i++) {
+		struct lc_element *e = lc->lc_element[i];
+		void *p = e;
+		p -= lc->element_off;
+		memset(p, 0, lc->element_size);
+		/* re-init it */
+		e->lc_index = i;
+		e->lc_number = LC_FREE;
+		list_add(&e->list, &lc->free);
+	}
+}
+
+/**
+ * lc_seq_printf_stats - print stats about @lc into @seq
+ * @seq: the seq_file to print into
+ * @lc: the lru cache to print statistics of
+ */
+size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+{
+	/* NOTE:
+	 * total calls to lc_get are
+	 * (starving + hits + misses)
+	 * misses include "dirty" count (update from an other thread in
+	 * progress) and "changed", when this in fact lead to an successful
+	 * update of the cache.
+	 */
+	return seq_printf(seq, "\t%s: used:%u/%u "
+		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		lc->name, lc->used, lc->nr_elements,
+		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+}
+
+static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
+{
+	return  lc->lc_slot + (enr % lc->nr_elements);
+}
+
+
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+{
+	struct hlist_node *n;
+	struct lc_element *e;
+
+	BUG_ON(!lc);
+	BUG_ON(!lc->nr_elements);
+	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
+		if (e->lc_number == enr)
+			return e;
+	}
+	return NULL;
+}
+
+/* returned element will be "recycled" immediately */
+static struct lc_element *lc_evict(struct lru_cache *lc)
+{
+	struct list_head  *n;
+	struct lc_element *e;
+
+	if (list_empty(&lc->lru))
+		return NULL;
+
+	n = lc->lru.prev;
+	e = list_entry(n, struct lc_element, list);
+
+	PARANOIA_LC_ELEMENT(lc, e);
+
+	list_del(&e->list);
+	hlist_del(&e->colision);
+	return e;
+}
+
+/**
+ * lc_del - removes an element from the cache
+ * @lc: The lru_cache object
+ * @e: The element to remove
+ *
+ * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
+ * sets @e->enr to %LC_FREE.
+ */
+void lc_del(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	PARANOIA_LC_ELEMENT(lc, e);
+	BUG_ON(e->refcnt);
+
+	e->lc_number = LC_FREE;
+	hlist_del_init(&e->colision);
+	list_move(&e->list, &lc->free);
+	RETURN();
+}
+
+static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+{
+	struct list_head *n;
+
+	if (list_empty(&lc->free))
+		return lc_evict(lc);
+
+	n = lc->free.next;
+	list_del(n);
+	return list_entry(n, struct lc_element, list);
+}
+
+static int lc_unused_element_available(struct lru_cache *lc)
+{
+	if (!list_empty(&lc->free))
+		return 1; /* something on the free list */
+	if (!list_empty(&lc->lru))
+		return 1;  /* something to evict */
+
+	return 0;
+}
+
+
+/**
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * In case the requested number is not present, it needs to be added to the
+ * cache. Therefore it is possible that an other element becomes evicted from
+ * the cache. In either case, the user is notified so he is able to e.g. keep
+ * a persistent log of the cache changes, and therefore the objects in use.
+ *
+ * Return values:
+ *  NULL
+ *     The cache was marked %LC_STARVING,
+ *     or the requested label was not in the active set
+ *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ *     Or no unused or free element could be recycled (@lc will be marked as
+ *     %LC_STARVING, blocking further lc_get() operations).
+ *
+ *  pointer to the element with the REQUESTED element number.
+ *     In this case, it can be used right away
+ *
+ *  pointer to an UNUSED element with some different element number,
+ *          where that different number may also be %LC_FREE.
+ *
+ *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
+ *          and the returned element pointer is removed from the lru list and
+ *          hash collision chains.  The user now should do whatever housekeeping
+ *          is necessary.
+ *          Then he must call lc_changed(lc,element_pointer), to finish
+ *          the change.
+ *
+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
+ *       any cache set change.
+ */
+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e;
+
+	PARANOIA_ENTRY();
+	if (lc->flags & LC_STARVING) {
+		++lc->starving;
+		RETURN(NULL);
+	}
+
+	e = lc_find(lc, enr);
+	if (e) {
+		++lc->hits;
+		if (e->refcnt++ == 0)
+			lc->used++;
+		list_move(&e->list, &lc->in_use); /* Not evictable... */
+		RETURN(e);
+	}
+
+	++lc->misses;
+
+	/* In case there is nothing available and we can not kick out
+	 * the LRU element, we have to wait ...
+	 */
+	if (!lc_unused_element_available(lc)) {
+		__set_bit(__LC_STARVING, &lc->flags);
+		RETURN(NULL);
+	}
+
+	/* it was not present in the active set.
+	 * we are going to recycle an unused (or even "free") element.
+	 * user may need to commit a transaction to record that change.
+	 * we serialize on flags & TF_DIRTY */
+	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
+		++lc->dirty;
+		RETURN(NULL);
+	}
+
+	e = lc_get_unused_element(lc);
+	BUG_ON(!e);
+
+	clear_bit(__LC_STARVING, &lc->flags);
+	BUG_ON(++e->refcnt != 1);
+	lc->used++;
+
+	lc->changing_element = e;
+	lc->new_number = enr;
+
+	RETURN(e);
+}
+
+/* similar to lc_get,
+ * but only gets a new reference on an existing element.
+ * you either get the requested element, or NULL.
+ * will be consolidated into one function.
+ */
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e;
+
+	PARANOIA_ENTRY();
+	if (lc->flags & LC_STARVING) {
+		++lc->starving;
+		RETURN(NULL);
+	}
+
+	e = lc_find(lc, enr);
+	if (e) {
+		++lc->hits;
+		if (e->refcnt++ == 0)
+			lc->used++;
+		list_move(&e->list, &lc->in_use); /* Not evictable... */
+	}
+	RETURN(e);
+}
+
+/**
+ * lc_changed - tell @lc that the change has been recorded
+ * @lc: the lru cache to operate on
+ * @e: the element pending label change
+ */
+void lc_changed(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	BUG_ON(e != lc->changing_element);
+	PARANOIA_LC_ELEMENT(lc, e);
+	++lc->changed;
+	e->lc_number = lc->new_number;
+	list_add(&e->list, &lc->in_use);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
+	lc->changing_element = NULL;
+	lc->new_number = LC_FREE;
+	clear_bit(__LC_DIRTY, &lc->flags);
+	smp_mb__after_clear_bit();
+	RETURN();
+}
+
+
+/**
+ * lc_put - give up refcnt of @e
+ * @lc: the lru cache to operate on
+ * @e: the element to put
+ *
+ * If refcnt reaches zero, the element is moved to the lru list,
+ * and a %LC_STARVING (if set) is cleared.
+ * Returns the new (post-decrement) refcnt.
+ */
+unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	PARANOIA_LC_ELEMENT(lc, e);
+	BUG_ON(e->refcnt == 0);
+	BUG_ON(e == lc->changing_element);
+	if (--e->refcnt == 0) {
+		/* move it to the front of LRU. */
+		list_move(&e->list, &lc->lru);
+		lc->used--;
+		clear_bit(__LC_STARVING, &lc->flags);
+		smp_mb__after_clear_bit();
+	}
+	RETURN(e->refcnt);
+}
+
+/**
+ * lc_element_by_index
+ * @lc: the lru cache to operate on
+ * @i: the index of the element to return
+ */
+struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
+{
+	BUG_ON(i >= lc->nr_elements);
+	BUG_ON(lc->lc_element[i] == NULL);
+	BUG_ON(lc->lc_element[i]->lc_index != i);
+	return lc->lc_element[i];
+}
+
+/**
+ * lc_index_of
+ * @lc: the lru cache to operate on
+ * @e: the element to query for its index position in lc->element
+ */
+unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_LC_ELEMENT(lc, e);
+	return e->lc_index;
+}
+
+/**
+ * lc_set - associate index with label
+ * @lc: the lru cache to operate on
+ * @enr: the label to set
+ * @index: the element index to associate label with.
+ *
+ * Used to initialize the active set to some previously recorded state.
+ */
+void lc_set(struct lru_cache *lc, unsigned int enr, int index)
+{
+	struct lc_element *e;
+
+	if (index < 0 || index >= lc->nr_elements)
+		return;
+
+	e = lc_element_by_index(lc, index);
+	e->lc_number = enr;
+
+	hlist_del_init(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+}
+
+/**
+ * lc_dump - Dump a complete LRU cache to seq in textual form.
+ * @lc: the lru cache to operate on
+ * @seq: the &struct seq_file pointer to seq_printf into
+ * @utext: user supplied "heading" or other info
+ * @detail: function pointer the user may provide to dump further details
+ * of the object the lc_element is embedded in.
+ */
+void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
+	     void (*detail) (struct seq_file *, struct lc_element *))
+{
+	unsigned int nr_elements = lc->nr_elements;
+	struct lc_element *e;
+	int i;
+
+	seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
+	for (i = 0; i < nr_elements; i++) {
+		e = lc_element_by_index(lc, i);
+		if (e->lc_number == LC_FREE) {
+			seq_printf(seq, "\t%2d: FREE\n", i);
+		} else {
+			seq_printf(seq, "\t%2d: %4u %4u    ", i,
+				   e->lc_number, e->refcnt);
+			detail(seq, e);
+		}
+	}
+}
+
+EXPORT_SYMBOL(lc_create);
+EXPORT_SYMBOL(lc_reset);
+EXPORT_SYMBOL(lc_destroy);
+EXPORT_SYMBOL(lc_set);
+EXPORT_SYMBOL(lc_del);
+EXPORT_SYMBOL(lc_try_get);
+EXPORT_SYMBOL(lc_find);
+EXPORT_SYMBOL(lc_get);
+EXPORT_SYMBOL(lc_put);
+EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_element_by_index);
+EXPORT_SYMBOL(lc_index_of);
+EXPORT_SYMBOL(lc_seq_printf_stats);
+EXPORT_SYMBOL(lc_seq_dump_details);
-- 
cgit v1.2.3


From 23fb064bb96f001ecb8682129f7ee1bc1ca691bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Jul 2009 21:18:35 +0900
Subject: percpu: kill legacy percpu allocator

With ia64 converted, there's no arch left which still uses legacy
percpu allocator.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Delightedly-acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu.h |  24 -------
 kernel/module.c        | 150 -----------------------------------------
 mm/Makefile            |   4 --
 mm/allocpercpu.c       | 177 -------------------------------------------------
 mm/percpu.c            |   2 -
 5 files changed, 357 deletions(-)
 delete mode 100644 mm/allocpercpu.c

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 878836ca999c..5baf5b8788fb 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -34,8 +34,6 @@
 
 #ifdef CONFIG_SMP
 
-#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
-
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
 
@@ -130,28 +128,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
 
 extern void *__alloc_reserved_percpu(size_t size, size_t align);
-
-#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
-struct percpu_data {
-	void *ptrs[1];
-};
-
-/* pointer disguising messes up the kmemleak objects tracking */
-#ifndef CONFIG_DEBUG_KMEMLEAK
-#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
-#else
-#define __percpu_disguise(pdata) (struct percpu_data *)(pdata)
-#endif
-
-#define per_cpu_ptr(ptr, cpu)						\
-({									\
-        struct percpu_data *__p = __percpu_disguise(ptr);		\
-        (__typeof__(ptr))__p->ptrs[(cpu)];				\
-})
-
-#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
 extern void *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void *__pdata);
 
diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..64787cddeb5e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
 
-#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
-
 static void *percpu_modalloc(unsigned long size, unsigned long align,
 			     const char *name)
 {
@@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme)
 	free_percpu(freeme);
 }
 
-#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
-/* Number of blocks used and allocated. */
-static unsigned int pcpu_num_used, pcpu_num_allocated;
-/* Size of each block.  -ve means used. */
-static int *pcpu_size;
-
-static int split_block(unsigned int i, unsigned short size)
-{
-	/* Reallocation required? */
-	if (pcpu_num_used + 1 > pcpu_num_allocated) {
-		int *new;
-
-		new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
-			       GFP_KERNEL);
-		if (!new)
-			return 0;
-
-		pcpu_num_allocated *= 2;
-		pcpu_size = new;
-	}
-
-	/* Insert a new subblock */
-	memmove(&pcpu_size[i+1], &pcpu_size[i],
-		sizeof(pcpu_size[0]) * (pcpu_num_used - i));
-	pcpu_num_used++;
-
-	pcpu_size[i+1] -= size;
-	pcpu_size[i] = size;
-	return 1;
-}
-
-static inline unsigned int block_size(int val)
-{
-	if (val < 0)
-		return -val;
-	return val;
-}
-
-static void *percpu_modalloc(unsigned long size, unsigned long align,
-			     const char *name)
-{
-	unsigned long extra;
-	unsigned int i;
-	void *ptr;
-	int cpu;
-
-	if (align > PAGE_SIZE) {
-		printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
-		       name, align, PAGE_SIZE);
-		align = PAGE_SIZE;
-	}
-
-	ptr = __per_cpu_start;
-	for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-		/* Extra for alignment requirement. */
-		extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
-		BUG_ON(i == 0 && extra != 0);
-
-		if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
-			continue;
-
-		/* Transfer extra to previous block. */
-		if (pcpu_size[i-1] < 0)
-			pcpu_size[i-1] -= extra;
-		else
-			pcpu_size[i-1] += extra;
-		pcpu_size[i] -= extra;
-		ptr += extra;
-
-		/* Split block if warranted */
-		if (pcpu_size[i] - size > sizeof(unsigned long))
-			if (!split_block(i, size))
-				return NULL;
-
-		/* add the per-cpu scanning areas */
-		for_each_possible_cpu(cpu)
-			kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
-				       GFP_KERNEL);
-
-		/* Mark allocated */
-		pcpu_size[i] = -pcpu_size[i];
-		return ptr;
-	}
-
-	printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-	       size);
-	return NULL;
-}
-
-static void percpu_modfree(void *freeme)
-{
-	unsigned int i;
-	void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
-	int cpu;
-
-	/* First entry is core kernel percpu data. */
-	for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-		if (ptr == freeme) {
-			pcpu_size[i] = -pcpu_size[i];
-			goto free;
-		}
-	}
-	BUG();
-
- free:
-	/* remove the per-cpu scanning areas */
-	for_each_possible_cpu(cpu)
-		kmemleak_free(freeme + per_cpu_offset(cpu));
-
-	/* Merge with previous? */
-	if (pcpu_size[i-1] >= 0) {
-		pcpu_size[i-1] += pcpu_size[i];
-		pcpu_num_used--;
-		memmove(&pcpu_size[i], &pcpu_size[i+1],
-			(pcpu_num_used - i) * sizeof(pcpu_size[0]));
-		i--;
-	}
-	/* Merge with next? */
-	if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
-		pcpu_size[i] += pcpu_size[i+1];
-		pcpu_num_used--;
-		memmove(&pcpu_size[i+1], &pcpu_size[i+2],
-			(pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
-	}
-}
-
-static int percpu_modinit(void)
-{
-	pcpu_num_used = 2;
-	pcpu_num_allocated = 2;
-	pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
-			    GFP_KERNEL);
-	/* Static in-kernel percpu data (used). */
-	pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
-	/* Free room. */
-	pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
-	if (pcpu_size[1] < 0) {
-		printk(KERN_ERR "No per-cpu room for modules.\n");
-		pcpu_num_used = 1;
-	}
-
-	return 0;
-}
-__initcall(percpu_modinit);
-
-#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
 				 Elf_Shdr *sechdrs,
 				 const char *secstrings)
diff --git a/mm/Makefile b/mm/Makefile
index ebf849042ed3..82131d0f8d85 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -34,11 +34,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 obj-$(CONFIG_SMP) += percpu.o
-else
-obj-$(CONFIG_SMP) += allocpercpu.o
-endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
deleted file mode 100644
index df34ceae0c67..000000000000
--- a/mm/allocpercpu.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * linux/mm/allocpercpu.c
- *
- * Separated from slab.c August 11, 2006 Christoph Lameter
- */
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/bootmem.h>
-#include <asm/sections.h>
-
-#ifndef cache_line_size
-#define cache_line_size()	L1_CACHE_BYTES
-#endif
-
-/**
- * percpu_depopulate - depopulate per-cpu data for given cpu
- * @__pdata: per-cpu data to depopulate
- * @cpu: depopulate per-cpu data for this cpu
- *
- * Depopulating per-cpu data for a cpu going offline would be a typical
- * use case. You need to register a cpu hotplug handler for that purpose.
- */
-static void percpu_depopulate(void *__pdata, int cpu)
-{
-	struct percpu_data *pdata = __percpu_disguise(__pdata);
-
-	kfree(pdata->ptrs[cpu]);
-	pdata->ptrs[cpu] = NULL;
-}
-
-/**
- * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
- * @__pdata: per-cpu data to depopulate
- * @mask: depopulate per-cpu data for cpu's selected through mask bits
- */
-static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask)
-{
-	int cpu;
-	for_each_cpu_mask_nr(cpu, *mask)
-		percpu_depopulate(__pdata, cpu);
-}
-
-#define percpu_depopulate_mask(__pdata, mask) \
-	__percpu_depopulate_mask((__pdata), &(mask))
-
-/**
- * percpu_populate - populate per-cpu data for given cpu
- * @__pdata: per-cpu data to populate further
- * @size: size of per-cpu object
- * @gfp: may sleep or not etc.
- * @cpu: populate per-data for this cpu
- *
- * Populating per-cpu data for a cpu coming online would be a typical
- * use case. You need to register a cpu hotplug handler for that purpose.
- * Per-cpu object is populated with zeroed buffer.
- */
-static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
-{
-	struct percpu_data *pdata = __percpu_disguise(__pdata);
-	int node = cpu_to_node(cpu);
-
-	/*
-	 * We should make sure each CPU gets private memory.
-	 */
-	size = roundup(size, cache_line_size());
-
-	BUG_ON(pdata->ptrs[cpu]);
-	if (node_online(node))
-		pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node);
-	else
-		pdata->ptrs[cpu] = kzalloc(size, gfp);
-	return pdata->ptrs[cpu];
-}
-
-/**
- * percpu_populate_mask - populate per-cpu data for more cpu's
- * @__pdata: per-cpu data to populate further
- * @size: size of per-cpu object
- * @gfp: may sleep or not etc.
- * @mask: populate per-cpu data for cpu's selected through mask bits
- *
- * Per-cpu objects are populated with zeroed buffers.
- */
-static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
-				  cpumask_t *mask)
-{
-	cpumask_t populated;
-	int cpu;
-
-	cpus_clear(populated);
-	for_each_cpu_mask_nr(cpu, *mask)
-		if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
-			__percpu_depopulate_mask(__pdata, &populated);
-			return -ENOMEM;
-		} else
-			cpu_set(cpu, populated);
-	return 0;
-}
-
-#define percpu_populate_mask(__pdata, size, gfp, mask) \
-	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
-
-/**
- * alloc_percpu - initial setup of per-cpu data
- * @size: size of per-cpu object
- * @align: alignment
- *
- * Allocate dynamic percpu area.  Percpu objects are populated with
- * zeroed buffers.
- */
-void *__alloc_percpu(size_t size, size_t align)
-{
-	/*
-	 * We allocate whole cache lines to avoid false sharing
-	 */
-	size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
-	void *pdata = kzalloc(sz, GFP_KERNEL);
-	void *__pdata = __percpu_disguise(pdata);
-
-	/*
-	 * Can't easily make larger alignment work with kmalloc.  WARN
-	 * on it.  Larger alignment should only be used for module
-	 * percpu sections on SMP for which this path isn't used.
-	 */
-	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-
-	if (unlikely(!pdata))
-		return NULL;
-	if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL,
-					   &cpu_possible_map)))
-		return __pdata;
-	kfree(pdata);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-/**
- * free_percpu - final cleanup of per-cpu data
- * @__pdata: object to clean up
- *
- * We simply clean up any per-cpu object left. No need for the client to
- * track and specify through a bis mask which per-cpu objects are to free.
- */
-void free_percpu(void *__pdata)
-{
-	if (unlikely(!__pdata))
-		return;
-	__percpu_depopulate_mask(__pdata, cpu_possible_mask);
-	kfree(__percpu_disguise(__pdata));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-/*
- * Generic percpu area setup.
- */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-void __init setup_per_cpu_areas(void)
-{
-	unsigned long size, i;
-	char *ptr;
-	unsigned long nr_possible_cpus = num_possible_cpus();
-
-	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
-	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
-	for_each_possible_cpu(i) {
-		__per_cpu_offset[i] = ptr - __per_cpu_start;
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-		ptr += size;
-	}
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/percpu.c b/mm/percpu.c
index 4a048abad043..e4e08b87b77e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -46,8 +46,6 @@
  *
  * To use this allocator, arch code should do the followings.
  *
- * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
- *
  * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
  *   regular address to percpu pointer and back if they need to be
  *   different from the default
-- 
cgit v1.2.3


From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 2 Oct 2009 07:50:50 -0700
Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For automated testing it is useful to have the option to turn
the warnings on copy_from_user() etc checks into errors:

 In function ‘copy_from_user’,
     inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080,
     inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503:
   linux/arch/x86/include/asm/uaccess_32.h:213:
  error: call to ‘copy_from_user_overflow’ declared with attribute error:
  copy_from_user buffer size is not provably correct

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20091002075050.4e9f7641@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug            | 14 ++++++++++++++
 arch/x86/include/asm/uaccess_32.h |  4 +++-
 include/linux/compiler-gcc4.h     |  1 +
 include/linux/compiler.h          |  3 +++
 4 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d105f29bb6bb..1bd2e36f1538 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -287,4 +287,18 @@ config OPTIMIZE_INLINING
 
 	  If unsure, say N.
 
+config DEBUG_STRICT_USER_COPY_CHECKS
+	bool "Strict copy size checks"
+	depends on DEBUG_KERNEL
+	---help---
+	  Enabling this option turns a certain set of sanity checks for user
+	  copy operations into compile time failures.
+
+	  The copy_from_user() etc checks are there to help test if there
+	  are sufficient security checks on the length argument of
+	  the copy operation, by having gcc prove that the argument is
+	  within bounds.
+
+	  If unsure, or if you run an older (pre 4.4) gcc, say N.
+
 endmenu
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 952f9e793c3e..0c9825e97f36 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to,
 
 
 extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+	__compiletime_error("copy_from_user() buffer size is not provably correct")
+#else
 	__compiletime_warning("copy_from_user() buffer size is not provably correct")
 #endif
 ;
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index f1709c1f9eae..77542c57e20a 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -41,4 +41,5 @@
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #if __GNUC_MINOR__ >= 4
 #define __compiletime_warning(message) __attribute__((warning(message)))
+#define __compiletime_error(message) __attribute__((error(message)))
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 950356311f12..88fd4b673cb4 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_warning
 # define __compiletime_warning(message)
 #endif
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
 
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
-- 
cgit v1.2.3


From 7340a0b15280c9d902c7dd0608b8e751b5a7c403 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Sat, 3 Oct 2009 19:48:22 +0900
Subject: this_cpu: Introduce this_cpu_ptr() and generic this_cpu_* operations

This patch introduces two things: First this_cpu_ptr and then per cpu
atomic operations.

this_cpu_ptr
------------

A common operation when dealing with cpu data is to get the instance of the
cpu data associated with the currently executing processor. This can be
optimized by

this_cpu_ptr(xx) = per_cpu_ptr(xx, smp_processor_id).

The problem with per_cpu_ptr(x, smp_processor_id) is that it requires
an array lookup to find the offset for the cpu. Processors typically
have the offset for the current cpu area in some kind of (arch dependent)
efficiently accessible register or memory location.

We can use that instead of doing the array lookup to speed up the
determination of the address of the percpu variable. This is particularly
significant because these lookups occur in performance critical paths
of the core kernel. this_cpu_ptr() can avoid memory accesses and

this_cpu_ptr comes in two flavors. The preemption context matters since we
are referring the the currently executing processor. In many cases we must
insure that the processor does not change while a code segment is executed.

__this_cpu_ptr 	-> Do not check for preemption context
this_cpu_ptr	-> Check preemption context

The parameter to these operations is a per cpu pointer. This can be the
address of a statically defined per cpu variable (&per_cpu_var(xxx)) or
the address of a per cpu variable allocated with the per cpu allocator.

per cpu atomic operations: this_cpu_*(var, val)
-----------------------------------------------
this_cpu_* operations (like this_cpu_add(struct->y, value) operate on
abitrary scalars that are members of structures allocated with the new
per cpu allocator. They can also operate on static per_cpu variables
if they are passed to per_cpu_var() (See patch to use this_cpu_*
operations for vm statistics).

These operations are guaranteed to be atomic vs preemption when modifying
the scalar. The calculation of the per cpu offset is also guaranteed to
be atomic at the same time. This means that a this_cpu_* operation can be
safely used to modify a per cpu variable in a context where interrupts are
enabled and preemption is allowed. Many architectures can perform such
a per cpu atomic operation with a single instruction.

Note that the atomicity here is different from regular atomic operations.
Atomicity is only guaranteed for data accessed from the currently executing
processor. Modifications from other processors are still possible. There
must be other guarantees that the per cpu data is not modified from another
processor when using these instruction. The per cpu atomicity is created
by the fact that the processor either executes and instruction or not.
Embedded in the instruction is the relocation of the per cpu address to
the are reserved for the current processor and the RMW action. Therefore
interrupts or preemption cannot occur in the mids of this processing.

Generic fallback functions are used if an arch does not define optimized
this_cpu operations. The functions come also come in the two flavors used
for this_cpu_ptr().

The firstparameter is a scalar that is a member of a structure allocated
through allocpercpu or a per cpu variable (use per_cpu_var(xxx)). The
operations are similar to what percpu_add() and friends do.

this_cpu_read(scalar)
this_cpu_write(scalar, value)
this_cpu_add(scale, value)
this_cpu_sub(scalar, value)
this_cpu_inc(scalar)
this_cpu_dec(scalar)
this_cpu_and(scalar, value)
this_cpu_or(scalar, value)
this_cpu_xor(scalar, value)

Arch code can override the generic functions and provide optimized atomic
per cpu operations. These atomic operations must provide both the relocation
(x86 does it through a segment override) and the operation on the data in a
single instruction. Otherwise preempt needs to be disabled and there is no
gain from providing arch implementations.

A third variant is provided prefixed by irqsafe_. These variants are safe
against hardware interrupts on the *same* processor (all per cpu atomic
primitives are *always* *only* providing safety for code running on the
*same* processor!). The increment needs to be implemented by the hardware
in such a way that it is a single RMW instruction that is either processed
before or after an interrupt.

cc: David Howells <dhowells@redhat.com>
cc: Ingo Molnar <mingo@elte.hu>
cc: Rusty Russell <rusty@rustcorp.com.au>
cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h |   5 +
 include/linux/percpu.h       | 400 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 405 insertions(+)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 90079c373f1c..8087b90d4673 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -56,6 +56,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define __raw_get_cpu_var(var) \
 	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
 
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
+
 
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
@@ -66,6 +69,8 @@ extern void setup_per_cpu_areas(void);
 #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
 #define __get_cpu_var(var)			per_cpu_var(var)
 #define __raw_get_cpu_var(var)			per_cpu_var(var)
+#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
+#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
 
 #endif	/* SMP */
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5baf5b8788fb..3d9ba92b104f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -219,4 +219,404 @@ do {									\
 # define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=)
 #endif
 
+/*
+ * Branching function to split up a function into a set of functions that
+ * are called for different scalar sizes of the objects handled.
+ */
+
+extern void __bad_size_call_parameter(void);
+
+#define __size_call_return(stem, variable)				\
+({	typeof(variable) ret__;						\
+	switch(sizeof(variable)) {					\
+	case 1: ret__ = stem##1(variable);break;			\
+	case 2: ret__ = stem##2(variable);break;			\
+	case 4: ret__ = stem##4(variable);break;			\
+	case 8: ret__ = stem##8(variable);break;			\
+	default:							\
+		__bad_size_call_parameter();break;			\
+	}								\
+	ret__;								\
+})
+
+#define __size_call(stem, variable, ...)				\
+do {									\
+	switch(sizeof(variable)) {					\
+		case 1: stem##1(variable, __VA_ARGS__);break;		\
+		case 2: stem##2(variable, __VA_ARGS__);break;		\
+		case 4: stem##4(variable, __VA_ARGS__);break;		\
+		case 8: stem##8(variable, __VA_ARGS__);break;		\
+		default: 						\
+			__bad_size_call_parameter();break;		\
+	}								\
+} while (0)
+
+/*
+ * Optimized manipulation for memory allocated through the per cpu
+ * allocator or for addresses of per cpu variables (can be determined
+ * using per_cpu_var(xx).
+ *
+ * These operation guarantee exclusivity of access for other operations
+ * on the *same* processor. The assumption is that per cpu data is only
+ * accessed by a single processor instance (the current one).
+ *
+ * The first group is used for accesses that must be done in a
+ * preemption safe way since we know that the context is not preempt
+ * safe. Interrupts may occur. If the interrupt modifies the variable
+ * too then RMW actions will not be reliable.
+ *
+ * The arch code can provide optimized functions in two ways:
+ *
+ * 1. Override the function completely. F.e. define this_cpu_add().
+ *    The arch must then ensure that the various scalar format passed
+ *    are handled correctly.
+ *
+ * 2. Provide functions for certain scalar sizes. F.e. provide
+ *    this_cpu_add_2() to provide per cpu atomic operations for 2 byte
+ *    sized RMW actions. If arch code does not provide operations for
+ *    a scalar size then the fallback in the generic code will be
+ *    used.
+ */
+
+#define _this_cpu_generic_read(pcp)					\
+({	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ret__ = *this_cpu_ptr(&(pcp));					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#ifndef this_cpu_read
+# ifndef this_cpu_read_1
+#  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_2
+#  define this_cpu_read_2(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_4
+#  define this_cpu_read_4(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_8
+#  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# define this_cpu_read(pcp)	__size_call_return(this_cpu_read_, (pcp))
+#endif
+
+#define _this_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	preempt_disable();						\
+	*__this_cpu_ptr(&pcp) op val;					\
+	preempt_enable();						\
+} while (0)
+
+#ifndef this_cpu_write
+# ifndef this_cpu_write_1
+#  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_2
+#  define this_cpu_write_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_4
+#  define this_cpu_write_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_8
+#  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# define this_cpu_write(pcp, val)	__size_call(this_cpu_write_, (pcp), (val))
+#endif
+
+#ifndef this_cpu_add
+# ifndef this_cpu_add_1
+#  define this_cpu_add_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_2
+#  define this_cpu_add_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_4
+#  define this_cpu_add_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_8
+#  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# define this_cpu_add(pcp, val)		__size_call(this_cpu_add_, (pcp), (val))
+#endif
+
+#ifndef this_cpu_sub
+# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(val))
+#endif
+
+#ifndef this_cpu_inc
+# define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
+#endif
+
+#ifndef this_cpu_dec
+# define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
+#endif
+
+#ifndef this_cpu_and
+# ifndef this_cpu_and_1
+#  define this_cpu_and_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_2
+#  define this_cpu_and_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_4
+#  define this_cpu_and_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_8
+#  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# define this_cpu_and(pcp, val)		__size_call(this_cpu_and_, (pcp), (val))
+#endif
+
+#ifndef this_cpu_or
+# ifndef this_cpu_or_1
+#  define this_cpu_or_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_2
+#  define this_cpu_or_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_4
+#  define this_cpu_or_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_8
+#  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# define this_cpu_or(pcp, val)		__size_call(this_cpu_or_, (pcp), (val))
+#endif
+
+#ifndef this_cpu_xor
+# ifndef this_cpu_xor_1
+#  define this_cpu_xor_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef this_cpu_xor_2
+#  define this_cpu_xor_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef this_cpu_xor_4
+#  define this_cpu_xor_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef this_cpu_xor_8
+#  define this_cpu_xor_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# define this_cpu_xor(pcp, val)		__size_call(this_cpu_or_, (pcp), (val))
+#endif
+
+/*
+ * Generic percpu operations that do not require preemption handling.
+ * Either we do not care about races or the caller has the
+ * responsibility of handling preemptions issues. Arch code can still
+ * override these instructions since the arch per cpu code may be more
+ * efficient and may actually get race freeness for free (that is the
+ * case for x86 for example).
+ *
+ * If there is no other protection through preempt disable and/or
+ * disabling interupts then one of these RMW operations can show unexpected
+ * behavior because the execution thread was rescheduled on another processor
+ * or an interrupt occurred and the same percpu variable was modified from
+ * the interrupt context.
+ */
+#ifndef __this_cpu_read
+# ifndef __this_cpu_read_1
+#  define __this_cpu_read_1(pcp)	(*__this_cpu_ptr(&(pcp)))
+# endif
+# ifndef __this_cpu_read_2
+#  define __this_cpu_read_2(pcp)	(*__this_cpu_ptr(&(pcp)))
+# endif
+# ifndef __this_cpu_read_4
+#  define __this_cpu_read_4(pcp)	(*__this_cpu_ptr(&(pcp)))
+# endif
+# ifndef __this_cpu_read_8
+#  define __this_cpu_read_8(pcp)	(*__this_cpu_ptr(&(pcp)))
+# endif
+# define __this_cpu_read(pcp)	__size_call_return(__this_cpu_read_, (pcp))
+#endif
+
+#define __this_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	*__this_cpu_ptr(&(pcp)) op val;					\
+} while (0)
+
+#ifndef __this_cpu_write
+# ifndef __this_cpu_write_1
+#  define __this_cpu_write_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef __this_cpu_write_2
+#  define __this_cpu_write_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef __this_cpu_write_4
+#  define __this_cpu_write_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef __this_cpu_write_8
+#  define __this_cpu_write_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# define __this_cpu_write(pcp, val)	__size_call(__this_cpu_write_, (pcp), (val))
+#endif
+
+#ifndef __this_cpu_add
+# ifndef __this_cpu_add_1
+#  define __this_cpu_add_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef __this_cpu_add_2
+#  define __this_cpu_add_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef __this_cpu_add_4
+#  define __this_cpu_add_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef __this_cpu_add_8
+#  define __this_cpu_add_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# define __this_cpu_add(pcp, val)	__size_call(__this_cpu_add_, (pcp), (val))
+#endif
+
+#ifndef __this_cpu_sub
+# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(val))
+#endif
+
+#ifndef __this_cpu_inc
+# define __this_cpu_inc(pcp)		__this_cpu_add((pcp), 1)
+#endif
+
+#ifndef __this_cpu_dec
+# define __this_cpu_dec(pcp)		__this_cpu_sub((pcp), 1)
+#endif
+
+#ifndef __this_cpu_and
+# ifndef __this_cpu_and_1
+#  define __this_cpu_and_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef __this_cpu_and_2
+#  define __this_cpu_and_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef __this_cpu_and_4
+#  define __this_cpu_and_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef __this_cpu_and_8
+#  define __this_cpu_and_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# define __this_cpu_and(pcp, val)	__size_call(__this_cpu_and_, (pcp), (val))
+#endif
+
+#ifndef __this_cpu_or
+# ifndef __this_cpu_or_1
+#  define __this_cpu_or_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef __this_cpu_or_2
+#  define __this_cpu_or_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef __this_cpu_or_4
+#  define __this_cpu_or_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef __this_cpu_or_8
+#  define __this_cpu_or_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# define __this_cpu_or(pcp, val)	__size_call(__this_cpu_or_, (pcp), (val))
+#endif
+
+#ifndef __this_cpu_xor
+# ifndef __this_cpu_xor_1
+#  define __this_cpu_xor_1(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef __this_cpu_xor_2
+#  define __this_cpu_xor_2(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef __this_cpu_xor_4
+#  define __this_cpu_xor_4(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef __this_cpu_xor_8
+#  define __this_cpu_xor_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# define __this_cpu_xor(pcp, val)	__size_call(__this_cpu_xor_, (pcp), (val))
+#endif
+
+/*
+ * IRQ safe versions of the per cpu RMW operations. Note that these operations
+ * are *not* safe against modification of the same variable from another
+ * processors (which one gets when using regular atomic operations)
+ . They are guaranteed to be atomic vs. local interrupts and
+ * preemption only.
+ */
+#define irqsafe_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
+	*__this_cpu_ptr(&(pcp)) op val;					\
+	local_irq_restore(flags);					\
+} while (0)
+
+#ifndef irqsafe_cpu_add
+# ifndef irqsafe_cpu_add_1
+#  define irqsafe_cpu_add_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef irqsafe_cpu_add_2
+#  define irqsafe_cpu_add_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef irqsafe_cpu_add_4
+#  define irqsafe_cpu_add_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef irqsafe_cpu_add_8
+#  define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# define irqsafe_cpu_add(pcp, val) __size_call(irqsafe_cpu_add_, (pcp), (val))
+#endif
+
+#ifndef irqsafe_cpu_sub
+# define irqsafe_cpu_sub(pcp, val)	irqsafe_cpu_add((pcp), -(val))
+#endif
+
+#ifndef irqsafe_cpu_inc
+# define irqsafe_cpu_inc(pcp)	irqsafe_cpu_add((pcp), 1)
+#endif
+
+#ifndef irqsafe_cpu_dec
+# define irqsafe_cpu_dec(pcp)	irqsafe_cpu_sub((pcp), 1)
+#endif
+
+#ifndef irqsafe_cpu_and
+# ifndef irqsafe_cpu_and_1
+#  define irqsafe_cpu_and_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef irqsafe_cpu_and_2
+#  define irqsafe_cpu_and_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef irqsafe_cpu_and_4
+#  define irqsafe_cpu_and_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef irqsafe_cpu_and_8
+#  define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# define irqsafe_cpu_and(pcp, val) __size_call(irqsafe_cpu_and_, (val))
+#endif
+
+#ifndef irqsafe_cpu_or
+# ifndef irqsafe_cpu_or_1
+#  define irqsafe_cpu_or_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef irqsafe_cpu_or_2
+#  define irqsafe_cpu_or_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef irqsafe_cpu_or_4
+#  define irqsafe_cpu_or_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef irqsafe_cpu_or_8
+#  define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# define irqsafe_cpu_or(pcp, val) __size_call(irqsafe_cpu_or_, (val))
+#endif
+
+#ifndef irqsafe_cpu_xor
+# ifndef irqsafe_cpu_xor_1
+#  define irqsafe_cpu_xor_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef irqsafe_cpu_xor_2
+#  define irqsafe_cpu_xor_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef irqsafe_cpu_xor_4
+#  define irqsafe_cpu_xor_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# ifndef irqsafe_cpu_xor_8
+#  define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+# endif
+# define irqsafe_cpu_xor(pcp, val) __size_call(irqsafe_cpu_xor_, (val))
+#endif
+
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.2.3


From 4dac3e98840f11bb2d8d52fd375150c7c1912117 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Sat, 3 Oct 2009 19:48:23 +0900
Subject: this_cpu: Use this_cpu ops for VM statistics

Using per cpu atomics for the vm statistics reduces their overhead.
And in the case of x86 we are guaranteed that they will never race even
in the lax form used for vm statistics.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/vmstat.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d0f222388a8..d85889710f9b 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -76,24 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
 
 static inline void __count_vm_event(enum vm_event_item item)
 {
-	__get_cpu_var(vm_event_states).event[item]++;
+	__this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
 }
 
 static inline void count_vm_event(enum vm_event_item item)
 {
-	get_cpu_var(vm_event_states).event[item]++;
-	put_cpu();
+	this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
 }
 
 static inline void __count_vm_events(enum vm_event_item item, long delta)
 {
-	__get_cpu_var(vm_event_states).event[item] += delta;
+	__this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
 }
 
 static inline void count_vm_events(enum vm_event_item item, long delta)
 {
-	get_cpu_var(vm_event_states).event[item] += delta;
-	put_cpu();
+	this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
 }
 
 extern void all_vm_events(unsigned long *);
-- 
cgit v1.2.3


From 1087e9b4ff708976499b4de541d9e1d57d49b60a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sun, 4 Oct 2009 02:20:11 +0200
Subject: HWPOISON: Clean up PR_MCE_KILL interface

While writing the manpage I noticed some shortcomings in the
current interface.

- Define symbolic names for all the different values
- Boundary check the kill mode values
- For symmetry add a get interface too. This allows library
code to get/set the current state.
- For consistency define a PR_MCE_KILL_DEFAULT value

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/prctl.h | 12 ++++++++++++
 kernel/sys.c          | 23 ++++++++++++++++++-----
 2 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 931150566ade..a3baeb2c2161 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,6 +88,18 @@
 #define PR_TASK_PERF_EVENTS_DISABLE		31
 #define PR_TASK_PERF_EVENTS_ENABLE		32
 
+/*
+ * Set early/late kill mode for hwpoison memory corruption.
+ * This influences when the process gets killed on a memory corruption.
+ */
 #define PR_MCE_KILL	33
+# define PR_MCE_KILL_CLEAR   0
+# define PR_MCE_KILL_SET     1
+
+# define PR_MCE_KILL_LATE    0
+# define PR_MCE_KILL_EARLY   1
+# define PR_MCE_KILL_DEFAULT 2
+
+#define PR_MCE_KILL_GET 34
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 255475d163e0..f6afe07d6c0b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1546,24 +1546,37 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			if (arg4 | arg5)
 				return -EINVAL;
 			switch (arg2) {
-			case 0:
+			case PR_MCE_KILL_CLEAR:
 				if (arg3 != 0)
 					return -EINVAL;
 				current->flags &= ~PF_MCE_PROCESS;
 				break;
-			case 1:
+			case PR_MCE_KILL_SET:
 				current->flags |= PF_MCE_PROCESS;
-				if (arg3 != 0)
+				if (arg3 == PR_MCE_KILL_EARLY)
 					current->flags |= PF_MCE_EARLY;
-				else
+				else if (arg3 == PR_MCE_KILL_LATE)
 					current->flags &= ~PF_MCE_EARLY;
+				else if (arg3 == PR_MCE_KILL_DEFAULT)
+					current->flags &=
+						~(PF_MCE_EARLY|PF_MCE_PROCESS);
+				else
+					return -EINVAL;
 				break;
 			default:
 				return -EINVAL;
 			}
 			error = 0;
 			break;
-
+		case PR_MCE_KILL_GET:
+			if (arg2 | arg3 | arg4 | arg5)
+				return -EINVAL;
+			if (current->flags & PF_MCE_PROCESS)
+				error = (current->flags & PF_MCE_EARLY) ?
+					PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
+			else
+				error = PR_MCE_KILL_DEFAULT;
+			break;
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.3


From ee34b32d8c2950f66038c8975747ef9aec855289 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 2 Oct 2009 11:01:21 -0700
Subject: dmar: support for parsing Remapping Hardware Static Affinity
 structure

Add support for parsing Remapping Hardware Static Affinity (RHSA) structure.
This enables identifying the association between remapping hardware units and
the corresponding proximity domain. This enables to allocate transalation
structures closer to the remapping hardware unit.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c          | 24 +++++++++++++++++++++++-
 include/linux/intel-iommu.h |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 22b02c6df854..577956566a0b 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -348,6 +348,26 @@ found:
 }
 #endif
 
+static int __init
+dmar_parse_one_rhsa(struct acpi_dmar_header *header)
+{
+	struct acpi_dmar_rhsa *rhsa;
+	struct dmar_drhd_unit *drhd;
+
+	rhsa = (struct acpi_dmar_rhsa *)header;
+	for_each_drhd_unit(drhd)
+		if (drhd->reg_base_addr == rhsa->base_address) {
+			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
+
+			if (!node_online(node))
+				node = -1;
+			drhd->iommu->node = node;
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
 static void __init
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 {
@@ -467,7 +487,7 @@ parse_dmar_table(void)
 #endif
 			break;
 		case ACPI_DMAR_HARDWARE_AFFINITY:
-			/* We don't do anything with RHSA (yet?) */
+			ret = dmar_parse_one_rhsa(entry_header);
 			break;
 		default:
 			printk(KERN_WARNING PREFIX
@@ -677,6 +697,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->agaw = agaw;
 	iommu->msagaw = msagaw;
 
+	iommu->node = -1;
+
 	/* the registers might be more than one page */
 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 		cap_max_fault_reg_offset(iommu->cap));
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4f0a72a9740c..9310c699a37d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -332,6 +332,7 @@ struct intel_iommu {
 #ifdef CONFIG_INTR_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
 #endif
+	int		node;
 };
 
 static inline void __iommu_flush_cache(
-- 
cgit v1.2.3


From a9828ec6bc0b7e19a65f7e13daa8bd35a926a753 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 1 Oct 2009 11:33:03 +0000
Subject: ethtool: Remove support for obsolete string query operations

The in-tree implementations have all been converted to
get_sset_count().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 ----
 net/core/ethtool.c      | 58 +++++++++----------------------------------------
 2 files changed, 10 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 15e4eb713694..aa0dcb3833d1 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -495,10 +495,6 @@ struct ethtool_ops {
 	u32     (*get_priv_flags)(struct net_device *);
 	int     (*set_priv_flags)(struct net_device *, u32);
 	int	(*get_sset_count)(struct net_device *, int);
-
-	/* the following hooks are obsolete */
-	int	(*self_test_count)(struct net_device *);/* use get_sset_count */
-	int	(*get_stats_count)(struct net_device *);/* use get_sset_count */
 	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4c12ddb5f5ee..e1951084b973 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -198,13 +198,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
-	} else {
-		/* code path for obsolete hooks */
-
-		if (ops->self_test_count)
-			info.testinfo_len = ops->self_test_count(dev);
-		if (ops->get_stats_count)
-			info.n_stats = ops->get_stats_count(dev);
 	}
 	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
@@ -684,16 +677,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 	u64 *data;
 	int ret, test_len;
 
-	if (!ops->self_test)
-		return -EOPNOTSUPP;
-	if (!ops->get_sset_count && !ops->self_test_count)
+	if (!ops->self_test || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
-	if (ops->get_sset_count)
-		test_len = ops->get_sset_count(dev, ETH_SS_TEST);
-	else
-		/* code path for obsolete hook */
-		test_len = ops->self_test_count(dev);
+	test_len = ops->get_sset_count(dev, ETH_SS_TEST);
 	if (test_len < 0)
 		return test_len;
 	WARN_ON(test_len == 0);
@@ -728,36 +715,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 	u8 *data;
 	int ret;
 
-	if (!ops->get_strings)
+	if (!ops->get_strings || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
 		return -EFAULT;
 
-	if (ops->get_sset_count) {
-		ret = ops->get_sset_count(dev, gstrings.string_set);
-		if (ret < 0)
-			return ret;
-
-		gstrings.len = ret;
-	} else {
-		/* code path for obsolete hooks */
-
-		switch (gstrings.string_set) {
-		case ETH_SS_TEST:
-			if (!ops->self_test_count)
-				return -EOPNOTSUPP;
-			gstrings.len = ops->self_test_count(dev);
-			break;
-		case ETH_SS_STATS:
-			if (!ops->get_stats_count)
-				return -EOPNOTSUPP;
-			gstrings.len = ops->get_stats_count(dev);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
+	ret = ops->get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
+
+	gstrings.len = ret;
 
 	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
 	if (!data)
@@ -798,16 +766,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 	u64 *data;
 	int ret, n_stats;
 
-	if (!ops->get_ethtool_stats)
-		return -EOPNOTSUPP;
-	if (!ops->get_sset_count && !ops->get_stats_count)
+	if (!ops->get_ethtool_stats || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
-	if (ops->get_sset_count)
-		n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
-	else
-		/* code path for obsolete hook */
-		n_stats = ops->get_stats_count(dev);
+	n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
 	if (n_stats < 0)
 		return n_stats;
 	WARN_ON(n_stats == 0);
-- 
cgit v1.2.3


From 977750076d98c7ff6cbda51858bb5a5894a9d9ab Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 2 Oct 2009 06:56:41 +0000
Subject: af_packet: add interframe drop cmsg (v6)

Add Ancilliary data to better represent loss information

I've had a few requests recently to provide more detail regarding frame loss
during an AF_PACKET packet capture session.  Specifically the requestors want to
see where in a packet sequence frames were lost, i.e. they want to see that 40
frames were lost between frames 302 and 303 in a packet capture file.  In order
to do this we need:

1) The kernel to export this data to user space
2) The applications to make use of it

This patch addresses item (1).  It does this by doing the following:

A) Anytime we drop a frame for which we would increment po->stats.tp_drops, we
also no increment a stats called po->stats.tp_gap.

B) Every time we successfully enqueue a frame to sk_receive_queue, we record the
value of po->stats.tp_gap in skb->mark.  skb->cb would nominally be the place to
record this, but since all the space there is used up, we're overloading
skb->mark.  Its safe to do since any enqueued packet is guaranteed to be
unshared at this point, and skb->mark isn't used for anything else in the rx
path to the application.  After we record tp_gap in the skb, we zero
po->stats.tp_gap.  This allows us to keep a counter of the number of frames lost
between any two enqueued packets

C) When the application goes to dequeue a frame from the packet socket, we look
at skb->mark for that frame.  If it is non-zero, we add a cmsg chunk to the
msghdr of level SOL_PACKET and type PACKET_GAPDATA.  Its a 32 bit integer that
represents the number of frames lost between this packet and the last previous
frame received.

Note there is a chance that if there is frame loss after a receive, and then the
socket is closed, some gap data might be lost.  This is covered by the use of
the PACKET_AUXDATA socket option, which gives total loss data.  With a bit of
math, the final gap can be determined that way.

I've tested this patch myself, and it works well.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

 include/linux/if_packet.h |    2 ++
 net/packet/af_packet.c    |   33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  2 ++
 net/packet/af_packet.c    | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index dea7d6b7cf98..e5d200f53fc3 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,11 +48,13 @@ struct sockaddr_ll
 #define PACKET_RESERVE			12
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
+#define PACKET_GAPDATA			15
 
 struct tpacket_stats
 {
 	unsigned int	tp_packets;
 	unsigned int	tp_drops;
+	unsigned int    tp_gap;
 };
 
 struct tpacket_auxdata
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7ecca0a0c07..d398a9bf6903 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -523,6 +523,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	return res;
 }
 
+/*
+ * If we've lost frames since the last time we queued one to the
+ * sk_receive_queue, we need to record it here.
+ * This must be called under the protection of the socket lock
+ * to prevent racing with other softirqs and user space
+ */
+static inline void record_packet_gap(struct sk_buff *skb,
+					struct packet_sock *po)
+{
+	/*
+	 * We overload the mark field here, since we're about
+	 * to enqueue to a receive queue and no body else will
+	 * use this field at this point
+	 */
+	skb->mark = po->stats.tp_gap;
+	po->stats.tp_gap = 0;
+	return;
+
+}
+
+static inline __u32 check_packet_gap(struct sk_buff *skb)
+{
+	return skb->mark;
+}
+
 /*
    This function makes lazy skb cloning in hope that most of packets
    are discarded by BPF.
@@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
+	record_packet_gap(skb, po);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
@@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 drop_n_acct:
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_drops++;
+	po->stats.tp_gap++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
@@ -811,6 +838,7 @@ drop:
 
 ring_is_full:
 	po->stats.tp_drops++;
+	po->stats.tp_gap++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 	sk->sk_data_ready(sk, 0);
@@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
+	__u32 gap;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
 
+	gap = check_packet_gap(skb);
+	if (gap)
+		put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
+
 	/*
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
-- 
cgit v1.2.3


From e1e499eef2200c2a7120c9ebf297d48b195cf887 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 2 Oct 2009 05:15:25 +0000
Subject: usbnet: Use wwan%d interface name for mobile broadband devices

Add support for usbnet based devices like CDC-Ether to indicate that they
are actually mobile broadband devices. In that case use wwan%d as default
interface name.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 20 ++++++++++++++------
 drivers/net/usb/usbnet.c    |  3 +++
 include/linux/usb/usbnet.h  |  1 +
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 4a6aff579403..71e65fc10e6f 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -420,6 +420,14 @@ static const struct driver_info	cdc_info = {
 	.status =	cdc_status,
 };
 
+static const struct driver_info mbm_info = {
+	.description =	"Mobile Broadband Network Device",
+	.flags =	FLAG_WWAN,
+	.bind = 	cdc_bind,
+	.unbind =	usbnet_cdc_unbind,
+	.status =	cdc_status,
+};
+
 /*-------------------------------------------------------------------------*/
 
 
@@ -532,32 +540,32 @@ static const struct usb_device_id	products [] = {
 	/* Ericsson F3507g */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1900, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Ericsson F3507g ver. 2 */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1902, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Ericsson F3607gw */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1904, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Ericsson F3307 */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1906, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Toshiba F3507g */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Dell F3507g */
 	USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8147, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
-	.driver_info = (unsigned long) &cdc_info,
+	.driver_info = (unsigned long) &mbm_info,
 },
 	{ },		// END
 };
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ca5ca5ae061d..8124cf16259f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1295,6 +1295,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		/* WLAN devices should always be named "wlan%d" */
 		if ((dev->driver_info->flags & FLAG_WLAN) != 0)
 			strcpy(net->name, "wlan%d");
+		/* WWAN devices should always be named "wwan%d" */
+		if ((dev->driver_info->flags & FLAG_WWAN) != 0)
+			strcpy(net->name, "wwan%d");
 
 		/* maybe the remote can't receive an Ethernet MTU */
 		if (net->mtu > (dev->hard_mtu - net->hard_header_len))
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index f81473052059..86c31b753266 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -90,6 +90,7 @@ struct driver_info {
 #define FLAG_WLAN	0x0080		/* use "wlan%d" names */
 #define FLAG_AVOID_UNLINK_URBS 0x0100	/* don't unlink urbs at usbnet_stop() */
 #define FLAG_SEND_ZLP	0x0200		/* hw requires ZLPs are sent */
+#define FLAG_WWAN	0x0400		/* use "wwan%d" names */
 
 
 	/* init device ... can sleep, or cause probe() failure */
-- 
cgit v1.2.3


From 7ffbe3fdace0bdfcdab8dc6c77506feda0871f79 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 2 Oct 2009 05:15:27 +0000
Subject: net: introduce NETDEV_POST_INIT notifier

For various purposes including a wireless extensions
bugfix, we need to hook into the netdev creation before
before netdev_register_kobject(). This will also ease
doing the dev type assignment that Marcel was working
on for cfg80211 drivers w/o touching them all.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h | 1 +
 net/core/dev.c           | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 44428d247dbe..29714b8441b1 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -201,6 +201,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_PRE_UP		0x000D
 #define NETDEV_BONDING_OLDTYPE  0x000E
 #define NETDEV_BONDING_NEWTYPE  0x000F
+#define NETDEV_POST_INIT	0x0010
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index b8f74cfb1bfd..a74c8fd69556 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4836,6 +4836,12 @@ int register_netdevice(struct net_device *dev)
 		dev->features |= NETIF_F_GSO;
 
 	netdev_initialize_kobject(dev);
+
+	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		goto err_uninit;
+
 	ret = netdev_register_kobject(dev);
 	if (ret)
 		goto err_uninit;
-- 
cgit v1.2.3


From 23e018a1b083ecb4b8bb2fb43d58e7c19b5d7959 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 5 Oct 2009 08:52:35 +0200
Subject: block: get rid of kblock_schedule_delayed_work()

It was briefly introduced to allow CFQ to to delayed scheduling,
but we ended up removing that feature again. So lets kill the
function and export, and just switch CFQ back to the normal work
schedule since it is now passing in a '0' delay from all call
sites.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       |  8 --------
 block/cfq-iosched.c    | 24 +++++++++++-------------
 include/linux/blkdev.h |  4 ----
 3 files changed, 11 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 81f34311659a..73ecbed02605 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2492,14 +2492,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-int kblockd_schedule_delayed_work(struct request_queue *q,
-				  struct delayed_work *work,
-				  unsigned long delay)
-{
-	return queue_delayed_work(kblockd_workqueue, work, delay);
-}
-EXPORT_SYMBOL(kblockd_schedule_delayed_work);
-
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae14cbaf9d0e..690ebd96dc42 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -150,7 +150,7 @@ struct cfq_data {
 	 * idle window management
 	 */
 	struct timer_list idle_slice_timer;
-	struct delayed_work unplug_work;
+	struct work_struct unplug_work;
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
@@ -268,13 +268,11 @@ static inline int cfq_bio_sync(struct bio *bio)
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
-					 unsigned long delay)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
-						delay);
+		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
 	}
 }
 
@@ -1400,7 +1398,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	kmem_cache_free(cfq_pool, cfqq);
@@ -1495,7 +1493,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	cfq_put_queue(cfqq);
@@ -2213,7 +2211,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	if (!rq_in_driver(cfqd))
-		cfq_schedule_dispatch(cfqd, 0);
+		cfq_schedule_dispatch(cfqd);
 }
 
 /*
@@ -2343,7 +2341,7 @@ queue_fail:
 	if (cic)
 		put_io_context(cic->ioc);
 
-	cfq_schedule_dispatch(cfqd, 0);
+	cfq_schedule_dispatch(cfqd);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	cfq_log(cfqd, "set_request fail");
 	return 1;
@@ -2352,7 +2350,7 @@ queue_fail:
 static void cfq_kick_queue(struct work_struct *work)
 {
 	struct cfq_data *cfqd =
-		container_of(work, struct cfq_data, unplug_work.work);
+		container_of(work, struct cfq_data, unplug_work);
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
@@ -2406,7 +2404,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 expire:
 	cfq_slice_expired(cfqd, timed_out);
 out_kick:
-	cfq_schedule_dispatch(cfqd, 0);
+	cfq_schedule_dispatch(cfqd);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2414,7 +2412,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	cancel_delayed_work_sync(&cfqd->unplug_work);
+	cancel_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2496,7 +2494,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
 	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-	INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25119041e034..221cecd86bd3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1172,11 +1172,7 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
-struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct request_queue *q,
-					struct delayed_work *work,
-				 	unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From a092ff0f90cae22b2ac8028ecd2c6f6c1a9e4601 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Fri, 2 Oct 2009 16:17:53 -0700
Subject: time: Implement logarithmic time accumulation

Accumulating one tick at a time works well unless we're using NOHZ.
Then it can be an issue, since we may have to run through the loop
a few thousand times, which can increase timer interrupt caused
latency.

The current solution was to accumulate in half-second intervals
with NOHZ. This kept the number of loops down, however it did
slightly change how we make NTP adjustments. While not an issue
with NTPd users, as NTPd makes adjustments over a longer period of
time, other adjtimex() users have noticed the half-second
granularity with which we can apply frequency changes to the clock.

For instance, if a application tries to apply a 100ppm frequency
correction for 20ms to correct a 2us offset, with NOHZ they either
get no correction, or a 50us correction.

Now, there will always be some granularity error for applying
frequency corrections. However with users sensitive to this error
have seen a 50-500x increase with NOHZ compared to running without
NOHZ.

So I figured I'd try another approach then just simply increasing
the interval. My approach is to consume the time interval
logarithmically. This reduces the number of times through the loop
needed keeping latency down, while still preserving the original
granularity error for adjtimex() changes.

Further, this change allows us to remove the xtime_cache code
(patch to follow), as xtime is always within one tick of the
current time, instead of the half-second updates it saw before.

An earlier version of this patch has been shipping to x86 users in
the RedHat MRG releases for awhile without issue, but I've reworked
this version to be even more careful about avoiding possible
overflows if the shift value gets too large.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: John Kacur <jkacur@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <1254525473.7741.88.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h     |  4 ---
 kernel/time/timekeeping.c | 85 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index e6967d10d9e5..0c0ef7d4db7c 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -261,11 +261,7 @@ static inline int ntp_synced(void)
 
 #define NTP_SCALE_SHIFT		32
 
-#ifdef CONFIG_NO_HZ
-#define NTP_INTERVAL_FREQ  (2)
-#else
 #define NTP_INTERVAL_FREQ  (HZ)
-#endif
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb0f46fa1ecd..5fdd78e0858a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -721,6 +721,51 @@ static void timekeeping_adjust(s64 offset)
 				timekeeper.ntp_error_shift;
 }
 
+
+/**
+ * logarithmic_accumulation - shifted accumulation of cycles
+ *
+ * This functions accumulates a shifted interval of cycles into
+ * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ * loop.
+ *
+ * Returns the unconsumed cycles.
+ */
+static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+{
+	u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+
+	/* If the offset is smaller then a shifted interval, do nothing */
+	if (offset < timekeeper.cycle_interval<<shift)
+		return offset;
+
+	/* Accumulate one shifted interval */
+	offset -= timekeeper.cycle_interval << shift;
+	timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+
+	timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+	while (timekeeper.xtime_nsec >= nsecps) {
+		timekeeper.xtime_nsec -= nsecps;
+		xtime.tv_sec++;
+		second_overflow();
+	}
+
+	/* Accumulate into raw time */
+	raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+	while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+		raw_time.tv_nsec -= NSEC_PER_SEC;
+		raw_time.tv_sec++;
+	}
+
+	/* Accumulate error between NTP and clock interval */
+	timekeeper.ntp_error += tick_length << shift;
+	timekeeper.ntp_error -= timekeeper.xtime_interval <<
+				(timekeeper.ntp_error_shift + shift);
+
+	return offset;
+}
+
+
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
@@ -731,6 +776,7 @@ void update_wall_time(void)
 	struct clocksource *clock;
 	cycle_t offset;
 	u64 nsecs;
+	int shift = 0, maxshift;
 
 	/* Make sure we're fully resumed: */
 	if (unlikely(timekeeping_suspended))
@@ -744,33 +790,22 @@ void update_wall_time(void)
 #endif
 	timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
 
-	/* normally this loop will run just once, however in the
-	 * case of lost or late ticks, it will accumulate correctly.
+	/*
+	 * With NO_HZ we may have to accumulate many cycle_intervals
+	 * (think "ticks") worth of time at once. To do this efficiently,
+	 * we calculate the largest doubling multiple of cycle_intervals
+	 * that is smaller then the offset. We then accumulate that
+	 * chunk in one go, and then try to consume the next smaller
+	 * doubled multiple.
 	 */
+	shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+	shift = max(0, shift);
+	/* Bound shift to one less then what overflows tick_length */
+	maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+	shift = min(shift, maxshift);
 	while (offset >= timekeeper.cycle_interval) {
-		u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
-
-		/* accumulate one interval */
-		offset -= timekeeper.cycle_interval;
-		clock->cycle_last += timekeeper.cycle_interval;
-
-		timekeeper.xtime_nsec += timekeeper.xtime_interval;
-		if (timekeeper.xtime_nsec >= nsecps) {
-			timekeeper.xtime_nsec -= nsecps;
-			xtime.tv_sec++;
-			second_overflow();
-		}
-
-		raw_time.tv_nsec += timekeeper.raw_interval;
-		if (raw_time.tv_nsec >= NSEC_PER_SEC) {
-			raw_time.tv_nsec -= NSEC_PER_SEC;
-			raw_time.tv_sec++;
-		}
-
-		/* accumulate error between NTP and clock interval */
-		timekeeper.ntp_error += tick_length;
-		timekeeper.ntp_error -= timekeeper.xtime_interval <<
-					timekeeper.ntp_error_shift;
+		offset = logarithmic_accumulation(offset, shift);
+		shift--;
 	}
 
 	/* correct the clock when NTP error is too big */
-- 
cgit v1.2.3


From 9f5180e5c331d7b3ccc35e1a78072235d38f9f34 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 6 Oct 2009 09:30:14 +0200
Subject: drbd: Work on permission enforcement

Now we have the capabilities of the sending process available,
use them to enforce CAP_SYS_ADMIN.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/drbd/drbd_nl.c | 7 ++++++-
 include/linux/drbd.h         | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 73c55ccb629a..22538d9628f1 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2000,7 +2000,7 @@ static struct cn_handler_struct cnd_table[] = {
 	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
 };
 
-static void drbd_connector_callback(struct cn_msg *req)
+static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
 {
 	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
 	struct cn_handler_struct *cm;
@@ -2017,6 +2017,11 @@ static void drbd_connector_callback(struct cn_msg *req)
 		return;
 	}
 
+	if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
+		retcode = ERR_PERM;
+		goto fail;
+	}
+
 	mdev = ensure_mdev(nlp);
 	if (!mdev) {
 		retcode = ERR_MINOR_INVALID;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 69dc711f37b3..233db5c18b86 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -138,6 +138,7 @@ enum drbd_ret_codes {
 	ERR_VERIFY_RUNNING	= 149, /* DRBD 8.2 only */
 	ERR_DATA_NOT_CURRENT	= 150,
 	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
+	ERR_PERM		= 152,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3


From 316d315bffa4026f28085f6b24ebcebede370ac7 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Tue, 6 Oct 2009 20:16:55 +0200
Subject: block: Seperate read and write statistics of in_flight requests v2

Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read
and write statistics of in_flight requests. And exported the number
of read and write requests in progress seperately through sysfs.

But  Corrado Zoccolo <czoccolo@gmail.com> reported getting strange
output from "iostat -kx 2". Global values for service time and
utilization were garbage. For interval values, utilization was always
100%, and service time is higher than normal.

So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b

The problem was in part_round_stats_single(), I missed the following:
        if (now == part->stamp)
                return;

-       if (part->in_flight) {
+       if (part_in_flight(part)) {
                __part_stat_add(cpu, part, time_in_queue,
                                part_in_flight(part) * (now - part->stamp));
                __part_stat_add(cpu, part, io_ticks, (now - part->stamp));

With this chunk included, the reported regression gets fixed.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

--
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      |  8 ++++----
 block/blk-merge.c     |  2 +-
 block/genhd.c         |  4 +++-
 drivers/md/dm.c       | 16 ++++++++++------
 fs/partitions/check.c | 12 +++++++++++-
 include/linux/genhd.h | 21 ++++++++++++++-------
 6 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 73ecbed02605..ac0fa10f8fa5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part);
+		part_inc_in_flight(part, rw);
 	}
 
 	part_stat_unlock();
@@ -1030,9 +1030,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 	if (now == part->stamp)
 		return;
 
-	if (part->in_flight) {
+	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part->in_flight * (now - part->stamp));
+				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req)
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rw);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b0de8574fdc8..99cb5cf1f447 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rq_data_dir(req));
 
 		part_stat_unlock();
 	}
diff --git a/block/genhd.c b/block/genhd.c
index 5a0861da324d..517e4332cb37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   hd->in_flight,
+			   part_in_flight(hd),
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 23e76fe0d359..376f1ab48a24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending;
+	atomic_t pending[2];
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	int cpu;
+	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight = pending =
-		atomic_dec_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = pending =
+		atomic_dec_return(&md->pending[rw]);
+	pending += atomic_read(&md->pending[rw^0x1]);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending, 0);
+	atomic_set(&md->pending[0], 0);
+	atomic_set(&md->pending[1], 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending))
+		} else if (!atomic_read(&md->pending[0]) &&
+					!atomic_read(&md->pending[1]))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f38fee0311a7..7b685e10cbad 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		p->in_flight,
+		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
+ssize_t part_inflight_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+
+	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+}
+
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7beaa21b3880..297df45ffd0a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight;
+	int in_flight[2];
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part)
+static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight++;
+	part->in_flight[rw]++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight++;
+		part_to_disk(part)->part0.in_flight[rw]++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part)
+static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight--;
+	part->in_flight[rw]--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight--;
+		part_to_disk(part)->part0.in_flight[rw]--;
+}
+
+static inline int part_in_flight(struct hd_struct *part)
+{
+	return part->in_flight[0] + part->in_flight[1];
 }
 
 /* block/blk-core.c */
@@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
+extern ssize_t part_inflight_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From ee5e81f00051b5c373c8de16e3604fd6d3be699e Mon Sep 17 00:00:00 2001
From: Ilia K <mail4ilia@gmail.com>
Date: Wed, 16 Sep 2009 05:53:07 +0000
Subject: add vif using local interface index instead of IP

When routing daemon wants to enable forwarding of multicast traffic it
performs something like:

       struct vifctl vc = {
               .vifc_vifi  = 1,
               .vifc_flags = 0,
               .vifc_threshold = 1,
               .vifc_rate_limit = 0,
               .vifc_lcl_addr = ip, /* <--- ip address of physical
interface, e.g. eth0 */
               .vifc_rmt_addr.s_addr = htonl(INADDR_ANY),
         };
       setsockopt(fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc));

This leads (in the kernel) to calling  vif_add() function call which
search the (physical) device using assigned IP address:
       dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);

The current API (struct vifctl) does not allow to specify an
interface other way than using it's IP, and if there are more than a
single interface with specified IP only the first one will be found.

The attached patch (against 2.6.30.4) allows to specify an interface
by its index, instead of IP address:

       struct vifctl vc = {
               .vifc_vifi  = 1,
               .vifc_flags = VIFF_USE_IFINDEX,   /* NEW */
               .vifc_threshold = 1,
               .vifc_rate_limit = 0,
               .vifc_lcl_ifindex = if_nametoindex("eth0"),   /* NEW */
               .vifc_rmt_addr.s_addr = htonl(INADDR_ANY),
         };
       setsockopt(fd, IPPROTO_IP, MRT_ADD_VIF, &vc, sizeof(vc));

Signed-off-by: Ilia K. <mail4ilia@gmail.com>

=== modified file 'include/linux/mroute.h'
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 13 +++++++++----
 net/ipv4/ipmr.c        | 12 +++++++++++-
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 08bc776d05e2..d5f69151f692 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -59,13 +59,18 @@ struct vifctl {
 	unsigned char vifc_flags;	/* VIFF_ flags */
 	unsigned char vifc_threshold;	/* ttl limit */
 	unsigned int vifc_rate_limit;	/* Rate limiter values (NI) */
-	struct in_addr vifc_lcl_addr;	/* Our address */
+	union {
+		struct in_addr vifc_lcl_addr;     /* Local interface address */
+		int            vifc_lcl_ifindex;  /* Local interface index   */
+	};
 	struct in_addr vifc_rmt_addr;	/* IPIP tunnel addr */
 };
 
-#define VIFF_TUNNEL	0x1	/* IPIP tunnel */
-#define VIFF_SRCRT	0x2	/* NI */
-#define VIFF_REGISTER	0x4	/* register vif	*/
+#define VIFF_TUNNEL		0x1	/* IPIP tunnel */
+#define VIFF_SRCRT		0x2	/* NI */
+#define VIFF_REGISTER		0x4	/* register vif	*/
+#define VIFF_USE_IFINDEX	0x8	/* use vifc_lcl_ifindex instead of
+					   vifc_lcl_addr to find an interface */
 
 /*
  *	Cache manipulation structures for mrouted and PIMd
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 630a56df7b47..c757f0b4b74c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -469,8 +469,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 			return err;
 		}
 		break;
+
+	case VIFF_USE_IFINDEX:
 	case 0:
-		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
+		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
+			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
+			if (dev && dev->ip_ptr == NULL) {
+				dev_put(dev);
+				return -EADDRNOTAVAIL;
+			}
+		} else
+			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
+
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		err = dev_set_allmulti(dev, 1);
-- 
cgit v1.2.3


From fa857afcf77da669eb6b7031ec07ad14b912c307 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Sep 2009 23:43:14 +0000
Subject: ipv6 sit: 6rd (IPv6 Rapid Deployment) Support.

IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
deploy IPv6 unicast service to IPv4 sites to which it provides
customer premise equipment.  Like 6to4, it utilizes stateless IPv6 in
IPv4 encapsulation in order to transit IPv4-only network
infrastructure.  Unlike 6to4, a 6rd service provider uses an IPv6
prefix of its own in place of the fixed 6to4 prefix.

With this option enabled, the SIT driver offers 6rd functionality by
providing additional ioctl API to configure the IPv6 Prefix for in
stead of static 2002::/16 for 6to4.

Original patch was done by Alexandre Cassen <acassen@freebox.fr>
based on old Internet-Draft.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  11 ++++
 include/net/ipip.h        |  13 +++++
 net/ipv6/Kconfig          |  19 +++++++
 net/ipv6/sit.c            | 124 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 159 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 5a9aae4adb44..c53c8e016940 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -15,6 +15,10 @@
 #define SIOCADDPRL      (SIOCDEVPRIVATE + 5)
 #define SIOCDELPRL      (SIOCDEVPRIVATE + 6)
 #define SIOCCHGPRL      (SIOCDEVPRIVATE + 7)
+#define SIOCGET6RD      (SIOCDEVPRIVATE + 8)
+#define SIOCADD6RD      (SIOCDEVPRIVATE + 9)
+#define SIOCDEL6RD      (SIOCDEVPRIVATE + 10)
+#define SIOCCHG6RD      (SIOCDEVPRIVATE + 11)
 
 #define GRE_CSUM	__cpu_to_be16(0x8000)
 #define GRE_ROUTING	__cpu_to_be16(0x4000)
@@ -51,6 +55,13 @@ struct ip_tunnel_prl {
 /* PRL flags */
 #define	PRL_DEFAULT		0x0001
 
+struct ip_tunnel_6rd {
+	struct in6_addr		prefix;
+	__be32			relay_prefix;
+	__u16			prefixlen;
+	__u16			relay_prefixlen;
+};
+
 enum
 {
 	IFLA_GRE_UNSPEC,
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 0159221a8509..86f1c8bd040c 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -7,6 +7,15 @@
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO	(30*HZ)
 
+/* 6rd prefix/relay information */
+struct ip_tunnel_6rd_parm
+{
+	struct in6_addr		prefix;
+	__be32			relay_prefix;
+	u16			prefixlen;
+	u16			relay_prefixlen;
+};
+
 struct ip_tunnel
 {
 	struct ip_tunnel	*next;
@@ -23,6 +32,10 @@ struct ip_tunnel
 
 	struct ip_tunnel_parm	parms;
 
+	/* for SIT */
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd_parm	ip6rd;
+#endif
 	struct ip_tunnel_prl_entry	*prl;		/* potential router list */
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ead6c7a42f44..f56199827452 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -170,6 +170,25 @@ config IPV6_SIT
 
 	  Saying M here will produce a module called sit. If unsure, say Y.
 
+config IPV6_SIT_6RD
+	bool "IPv6: IPv6 Rapid Development (6RD) (EXPERIMENTAL)"
+	depends on IPV6_SIT && EXPERIMENTAL
+	default n
+	---help---
+	  IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
+	  mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
+	  deploy IPv6 unicast service to IPv4 sites to which it provides
+	  customer premise equipment.  Like 6to4, it utilizes stateless IPv6 in
+	  IPv4 encapsulation in order to transit IPv4-only network
+	  infrastructure.  Unlike 6to4, a 6rd service provider uses an IPv6
+	  prefix of its own in place of the fixed 6to4 prefix.
+
+	  With this option enabled, the SIT driver offers 6rd functionality by
+	  providing additional ioctl API to configure the IPv6 Prefix for in
+	  stead of static 2002::/16 for 6to4.
+
+	  If unsure, say N.
+
 config IPV6_NDISC_NODETYPE
 	bool
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99da272951dc..6955654262a5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -161,6 +161,21 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
 	write_unlock_bh(&ipip6_lock);
 }
 
+static void ipip6_tunnel_clone_6rd(struct ip_tunnel *t, struct sit_net *sitn)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (t->dev == sitn->fb_tunnel_dev) {
+		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+		t->ip6rd.relay_prefix = 0;
+		t->ip6rd.prefixlen = 16;
+		t->ip6rd.relay_prefixlen = 0;
+	} else {
+		struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
+		memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
+	}
+#endif
+}
+
 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 		struct ip_tunnel_parm *parms, int create)
 {
@@ -213,6 +228,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
 
 	dev_hold(dev);
 
+	ipip6_tunnel_clone_6rd(t, sitn);
+
 	ipip6_tunnel_link(sitn, nt);
 	return nt;
 
@@ -532,17 +549,41 @@ out:
 	return 0;
 }
 
-/* Returns the embedded IPv4 address if the IPv6 address
-   comes from 6to4 (RFC 3056) addr space */
-
-static inline __be32 try_6to4(struct in6_addr *v6dst)
+/*
+ * Returns the embedded IPv4 address if the IPv6 address
+ * comes from 6rd / 6to4 (RFC 3056) addr space.
+ */
+static inline
+__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
 {
 	__be32 dst = 0;
 
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
+			      tunnel->ip6rd.prefixlen)) {
+		unsigned pbw0, pbi0;
+		int pbi1;
+		u32 d;
+
+		pbw0 = tunnel->ip6rd.prefixlen >> 5;
+		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+
+		d = (ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0]) << pbi0) >>
+		    tunnel->ip6rd.relay_prefixlen;
+
+		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+		if (pbi1 > 0)
+			d |= ntohl(tunnel->ip6rd.prefix.s6_addr32[pbw0 + 1]) >>
+			     (32 - pbi1);
+
+		dst = tunnel->ip6rd.relay_prefix | htonl(d);
+	}
+#else
 	if (v6dst->s6_addr16[0] == htons(0x2002)) {
 		/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
 		memcpy(&dst, &v6dst->s6_addr16[1], 4);
 	}
+#endif
 	return dst;
 }
 
@@ -596,7 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (!dst)
-		dst = try_6to4(&iph6->daddr);
+		dst = try_6rd(&iph6->daddr, tunnel);
 
 	if (!dst) {
 		struct neighbour *neigh = NULL;
@@ -786,9 +827,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ip_tunnel *t;
 	struct net *net = dev_net(dev);
 	struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd ip6rd;
+#endif
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCGET6RD:
+#endif
 		t = NULL;
 		if (dev == sitn->fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
@@ -799,9 +846,25 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 		}
 		if (t == NULL)
 			t = netdev_priv(dev);
-		memcpy(&p, &t->parms, sizeof(p));
-		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-			err = -EFAULT;
+
+		err = -EFAULT;
+		if (cmd == SIOCGETTUNNEL) {
+			memcpy(&p, &t->parms, sizeof(p));
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
+					 sizeof(p)))
+				goto done;
+#ifdef CONFIG_IPV6_SIT_6RD
+		} else {
+			ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+			ip6rd.prefixlen = t->ip6rd.prefixlen;
+			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
+					 sizeof(ip6rd)))
+				goto done;
+#endif
+		}
+		err = 0;
 		break;
 
 	case SIOCADDTUNNEL:
@@ -922,6 +985,51 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 		netdev_state_change(dev);
 		break;
 
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCADD6RD:
+	case SIOCCHG6RD:
+	case SIOCDEL6RD:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
+				   sizeof(ip6rd)))
+			goto done;
+
+		t = netdev_priv(dev);
+
+		if (cmd != SIOCDEL6RD) {
+			struct in6_addr prefix;
+			__be32 relay_prefix;
+
+			err = -EINVAL;
+			if (ip6rd.relay_prefixlen > 32 ||
+			    ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
+				goto done;
+
+			ipv6_addr_prefix(&prefix, &ip6rd.prefix,
+					 ip6rd.prefixlen);
+			if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+				goto done;
+			relay_prefix = ip6rd.relay_prefix &
+				       htonl(0xffffffffUL <<
+					     (32 - ip6rd.relay_prefixlen));
+			if (relay_prefix != ip6rd.relay_prefix)
+				goto done;
+
+			ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+			t->ip6rd.relay_prefix = relay_prefix;
+			t->ip6rd.prefixlen = ip6rd.prefixlen;
+			t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
+		} else
+			ipip6_tunnel_clone_6rd(t, sitn);
+
+		err = 0;
+		break;
+#endif
+
 	default:
 		err = -EINVAL;
 	}
-- 
cgit v1.2.3


From d73d3a8cb4723e161589864741d8528d70b350eb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 5 Oct 2009 10:59:58 +0000
Subject: ethtool: Add reset operation

After updating firmware stored in flash, users may wish to reset the
relevant hardware and start the new firmware immediately.  This should
not be completely automatic as it may be disruptive.

A selective reset may also be useful for debugging or diagnostics.

This adds a separate reset operation which takes flags indicating the
components to be reset.  Drivers are allowed to reset only a subset of
those requested, and must indicate the actual subset.  This allows the
use of generic component masks and some future expansion.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 32 ++++++++++++++++++++++++++++++++
 net/core/ethtool.c      | 23 +++++++++++++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index aa0dcb3833d1..eb1a48da2d43 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -498,6 +498,7 @@ struct ethtool_ops {
 	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
+	int	(*reset)(struct net_device *, u32 *);
 };
 #endif /* __KERNEL__ */
 
@@ -555,6 +556,7 @@ struct ethtool_ops {
 #define	ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
 #define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
 #define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
+#define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -685,4 +687,34 @@ struct ethtool_ops {
 
 #define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
 
+/* Reset flags */
+/* The reset() operation must clear the flags for the components which
+ * were actually reset.  On successful return, the flags indicate the
+ * components which were not reset, either because they do not exist
+ * in the hardware or because they cannot be reset independently.  The
+ * driver must never reset any components that were not requested.
+ */
+enum ethtool_reset_flags {
+	/* These flags represent components dedicated to the interface
+	 * the command is addressed to.  Shift any flag left by
+	 * ETH_RESET_SHARED_SHIFT to reset a shared component of the
+	 * same type.
+	 */
+	ETH_RESET_MGMT		= 1 << 0,	/* Management processor */
+	ETH_RESET_IRQ		= 1 << 1,	/* Interrupt requester */
+	ETH_RESET_DMA		= 1 << 2,	/* DMA engine */
+	ETH_RESET_FILTER	= 1 << 3,	/* Filtering/flow direction */
+	ETH_RESET_OFFLOAD	= 1 << 4,	/* Protocol offload */
+	ETH_RESET_MAC		= 1 << 5,	/* Media access controller */
+	ETH_RESET_PHY		= 1 << 6,	/* Transceiver/PHY */
+	ETH_RESET_RAM		= 1 << 7,	/* RAM shared between
+						 * multiple components */
+
+	ETH_RESET_DEDICATED	= 0x0000ffff,	/* All components dedicated to
+						 * this interface */
+	ETH_RESET_ALL		= 0xffffffff,	/* All components used by this
+						 * interface, even if shared */
+};
+#define ETH_RESET_SHARED_SHIFT	16
+
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e1951084b973..d8aee584e8d1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -302,6 +302,26 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	return ret;
 }
 
+static int ethtool_reset(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value reset;
+	int ret;
+
+	if (!dev->ethtool_ops->reset)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&reset, useraddr, sizeof(reset)))
+		return -EFAULT;
+
+	ret = dev->ethtool_ops->reset(dev, &reset.data);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &reset, sizeof(reset)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
@@ -1089,6 +1109,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_FLASHDEV:
 		rc = ethtool_flash_device(dev, useraddr);
 		break;
+	case ETHTOOL_RESET:
+		rc = ethtool_reset(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 2 Oct 2009 11:39:15 +0000
Subject: make TLLAO option for NA packets configurable

On Friday 02 October 2009 20:53:51 you wrote:

> This is good although I would have shortened the name.

Ah, I knew I forgot something :) Here is v4.

tavi

>From 24d96d825b9fa832b22878cc6c990d5711968734 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 2 Oct 2009 00:51:15 +0300
Subject: [PATCH] ipv6: new sysctl for sending TLLAO with unicast NAs

Neighbor advertisements responding to unicast neighbor solicitations
did not include the target link-layer address option. This patch adds
a new sysctl option (disabled by default) which controls whether this
option should be sent even with unicast NAs.

The need for this arose because certain routers expect the TLLAO in
some situations even as a response to unicast NS packets.

Moreover, RFC 2461 recommends sending this to avoid a race condition
(section 4.4, Target link-layer address)

Signed-off-by: Cosmin Ratiu <cratiu@ixiacom.com>
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 18 ++++++++++++++++++
 include/linux/ipv6.h                   |  1 +
 net/ipv6/addrconf.c                    |  8 ++++++++
 net/ipv6/ndisc.c                       |  1 +
 4 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a6580c..a0e134dd2523 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1086,6 +1086,24 @@ accept_dad - INTEGER
 	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
 	   link-local address has been found.
 
+force_tllao - BOOLEAN
+	Enable sending the target link-layer address option even when
+	responding to a unicast neighbor solicitation.
+	Default: FALSE
+
+	Quoting from RFC 2461, section 4.4, Target link-layer address:
+
+	"The option MUST be included for multicast solicitations in order to
+	avoid infinite Neighbor Solicitation "recursion" when the peer node
+	does not have a cache entry to return a Neighbor Advertisements
+	message.  When responding to unicast solicitations, the option can be
+	omitted since the sender of the solicitation has the correct link-
+	layer address; otherwise it would not have be able to send the unicast
+	solicitation in the first place. However, including the link-layer
+	address in this case adds little overhead and eliminates a potential
+	race condition where the sender deletes the cached link-layer address
+	prior to receiving a response to a previous solicitation."
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c662efa68289..ae74ede1abe7 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -167,6 +167,7 @@ struct ipv6_devconf {
 #endif
 	__s32		disable_ipv6;
 	__s32		accept_dad;
+	__s32		force_tllao;
 	void		*sysctl;
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1fd0a3d775d2..bdcee6981c60 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4352,6 +4352,14 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
 			.proc_handler	=	proc_dointvec,
 		},
+		{
+			.ctl_name       = CTL_UNNUMBERED,
+			.procname       = "force_tllao",
+			.data           = &ipv6_devconf.force_tllao,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f74e4e2cdd06..3507cfe1e7a2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	icmp6h.icmp6_solicited = solicited;
 	icmp6h.icmp6_override = override;
 
+	inc_opt |= ifp->idev->cnf.force_tllao;
 	__ndisc_send(dev, neigh, daddr, src_addr,
 		     &icmp6h, solicited_addr,
 		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
-- 
cgit v1.2.3


From 32953543221cfe2bf0a24205fab225e5b8ed81a0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 5 Oct 2009 06:01:03 +0000
Subject: dcb: data center bridging ops should be r/o

The data center bridging ops structure can be const

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe.h        | 2 +-
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 2 +-
 include/linux/netdevice.h        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 385be6016667..28f32da794dd 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -397,7 +397,7 @@ enum ixgbe_boards {
 extern struct ixgbe_info ixgbe_82598_info;
 extern struct ixgbe_info ixgbe_82599_info;
 #ifdef CONFIG_IXGBE_DCB
-extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern const struct dcbnl_rtnl_ops dcbnl_ops;
 extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
                               struct ixgbe_dcb_config *dst_dcb_cfg,
                               int tc_max);
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index a6bc1ef28f92..3c7a79a7d7c6 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -563,7 +563,7 @@ static u8 ixgbe_dcbnl_setapp(struct net_device *netdev,
 	return rval;
 }
 
-struct dcbnl_rtnl_ops dcbnl_ops = {
+const struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getstate	= ixgbe_dcbnl_get_state,
 	.setstate	= ixgbe_dcbnl_set_state,
 	.getpermhwaddr	= ixgbe_dcbnl_get_perm_hw_addr,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94958c109761..b332eefebb1b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -909,7 +909,7 @@ struct net_device
 
 #ifdef CONFIG_DCB
 	/* Data Center Bridging netlink ops */
-	struct dcbnl_rtnl_ops *dcbnl_ops;
+	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
 
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-- 
cgit v1.2.3


From 1f56f4a2b4d12c1c348cab23024024396ec7cddc Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@ni.com>
Date: Tue, 6 Oct 2009 09:19:45 -0500
Subject: PCI quirk: TI XIO200a erroneously reports support for fast b2b
 transfers

This quirk will disable fast back to back transfer on the secondary bus
segment of the TI Bridge.

Signed-off-by: Gabe Black <gabe.black@ni.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c    | 19 +++++++++++++++++++
 include/linux/pci_ids.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6099facecd79..efa6534a6593 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -670,6 +670,25 @@ static void __devinit quirk_vt8235_acpi(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8235,	quirk_vt8235_acpi);
 
+/*
+ * TI XIO2000a PCIe-PCI Bridge erroneously reports it supports fast back-to-back:
+ *	Disable fast back-to-back on the secondary bus segment
+ */
+static void __devinit quirk_xio2000a(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+	u16 command;
+
+	dev_warn(&dev->dev, "TI XIO2000a quirk detected; "
+		"secondary bus fast back-to-back transfers disabled\n");
+	list_for_each_entry(pdev, &dev->subordinate->devices, bus_list) {
+		pci_read_config_word(pdev, PCI_COMMAND, &command);
+		if (command & PCI_COMMAND_FAST_BACK)
+			pci_write_config_word(pdev, PCI_COMMAND, command & ~PCI_COMMAND_FAST_BACK);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A,
+			quirk_xio2000a);
 
 #ifdef CONFIG_X86_IO_APIC 
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index da1fda8623e0..f490e7a7307a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -776,6 +776,7 @@
 #define PCI_DEVICE_ID_TI_X515		0x8036
 #define PCI_DEVICE_ID_TI_XX12		0x8039
 #define PCI_DEVICE_ID_TI_XX12_FM	0x803b
+#define PCI_DEVICE_ID_TI_XIO2000A	0x8231
 #define PCI_DEVICE_ID_TI_1130		0xac12
 #define PCI_DEVICE_ID_TI_1031		0xac13
 #define PCI_DEVICE_ID_TI_1131		0xac15
-- 
cgit v1.2.3


From 7c89606e24cdabaceb8ca9b3c7ab866c6bcc9e38 Mon Sep 17 00:00:00 2001
From: Holger Schurig <hs4233@mail.mn-solutions.de>
Date: Thu, 24 Sep 2009 12:21:01 +0200
Subject: nl80211: report age of scan results

Linux keeps scan results up to 15 seconds. This can be a problem for fast
moving clients: they get back stale data. But if the kernel reports the age
of the BSS items, then user-space can simply weed out old entries by itself.

Signed-off-by: Holger Schurig <hs4233@mail.mn-solutions.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 net/wireless/nl80211.c  | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index a8d71ed43a0e..50afca3dcff1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1277,6 +1277,7 @@ enum nl80211_channel_type {
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
  *	in unspecified units, scaled to 0..100 (u8)
  * @NL80211_BSS_STATUS: status, if this BSS is "used"
+ * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -1291,6 +1292,7 @@ enum nl80211_bss {
 	NL80211_BSS_SIGNAL_MBM,
 	NL80211_BSS_SIGNAL_UNSPEC,
 	NL80211_BSS_STATUS,
+	NL80211_BSS_SEEN_MS_AGO,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eddab097435c..e0ecc9f153d4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3105,6 +3105,8 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval);
 	NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability);
 	NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq);
+	NLA_PUT_U32(msg, NL80211_BSS_SEEN_MS_AGO,
+		jiffies_to_msecs(jiffies - intbss->ts));
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-- 
cgit v1.2.3


From d308e38fa5467fbb523fc13e4b984375c2198c3d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Wed, 7 Oct 2009 13:53:11 -0700
Subject: include/linux/netdevice.h: fix nanodoc mismatch

nanodoc was missing an ndo_-prefix.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94958c109761..812a5f3c2abe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -557,7 +557,7 @@ struct netdev_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct net_device_stats* (*get_stats)(struct net_device *dev);
+ * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. If not defined, the counters in dev->stats will
  *	be used.
-- 
cgit v1.2.3


From 125a77ed9fbd21d1277f53e9ed6b39ad3d34e613 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 7 Oct 2009 13:57:10 -0700
Subject: IPv6: Fix 6RD build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix build error introduced in commit fa857afcf - ipv6 sit: 6rd
(IPv6 Rapid Deployment) Support.  Struct in6_addr is the issue.
I'm only seeing this on x86_64 systems, not on 32-bit with same
IPv6 config options, so it could be there's a missing forward
declaration somewhere, but including the correct header file
fixes the problem too.

  CC [M]  net/ipv6/ip6_tunnel.o
In file included from net/ipv6/ip6_tunnel.c:31:
include/linux/if_tunnel.h:59: error: field ‘prefix’ has incomplete type
make[2]: *** [net/ipv6/ip6_tunnel.o] Error 1
make[1]: *** [net/ipv6] Error 2

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index c53c8e016940..8d76cb4c86fa 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -5,6 +5,7 @@
 
 #ifdef __KERNEL__
 #include <linux/ip.h>
+#include <linux/in6.h>
 #endif
 
 #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
-- 
cgit v1.2.3


From f86dcc5aa8c7908f2c287e7a211228df599e3e71 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Oct 2009 00:37:59 +0000
Subject: udp: dynamically size hash tables at boot time

UDP_HTABLE_SIZE was initialy defined to 128, which is a bit small for
several setups.

4000 active UDP sockets -> 32 sockets per chain in average. An
incoming frame has to lookup all sockets to find best match, so long
chains hurt latency.

Instead of a fixed size hash table that cant be perfect for every
needs, let UDP stack choose its table size at boot time like tcp/ip
route, using alloc_large_system_hash() helper

Add an optional boot parameter, uhash_entries=x so that an admin can
force a size between 256 and 65536 if needed, like thash_entries and
rhash_entries.

dmesg logs two new lines :
[    0.647039] UDP hash table entries: 512 (order: 0, 4096 bytes)
[    0.647099] UDP Lite hash table entries: 512 (order: 0, 4096 bytes)

Maximal size on 64bit arches would be 65536 slots, ie 1 MBytes for non
debugging spinlocks.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/kernel-parameters.txt |  3 ++
 include/linux/udp.h                 |  6 +--
 include/net/udp.h                   | 13 ++++--
 net/ipv4/udp.c                      | 91 +++++++++++++++++++++++++++----------
 net/ipv4/udplite.c                  |  4 +-
 net/ipv6/udp.c                      |  6 +--
 6 files changed, 87 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6fa7292947e5..02df20be7764 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2589,6 +2589,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	uart6850=	[HW,OSS]
 			Format: <io>,<irq>
 
+	uhash_entries=	[KNL,NET]
+			Set number of hash buckets for UDP/UDP-Lite connections
+
 	uhci-hcd.ignore_oc=
 			[USB] Ignore overcurrent events (default N).
 			Some badly-designed motherboards generate lots of
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0cf5c4c0ec81..832361e3e596 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -45,11 +45,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 	return (struct udphdr *)skb_transport_header(skb);
 }
 
-#define UDP_HTABLE_SIZE		128
+#define UDP_HTABLE_SIZE_MIN		(CONFIG_BASE_SMALL ? 128 : 256)
 
-static inline int udp_hashfn(struct net *net, const unsigned num)
+static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
 {
-	return (num + net_hash_mix(net)) & (UDP_HTABLE_SIZE - 1);
+	return (num + net_hash_mix(net)) & mask;
 }
 
 struct udp_sock {
diff --git a/include/net/udp.h b/include/net/udp.h
index f98abd2ce709..22aa2e7eb1d7 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -54,12 +54,19 @@ struct udp_hslot {
 	struct hlist_nulls_head	head;
 	spinlock_t		lock;
 } __attribute__((aligned(2 * sizeof(long))));
+
 struct udp_table {
-	struct udp_hslot	hash[UDP_HTABLE_SIZE];
+	struct udp_hslot	*hash;
+	unsigned int mask;
+	unsigned int log;
 };
 extern struct udp_table udp_table;
-extern void udp_table_init(struct udp_table *);
-
+extern void udp_table_init(struct udp_table *, const char *);
+static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
+					     struct net *net, unsigned num)
+{
+	return &table->hash[udp_hashfn(net, num, table->mask)];
+}
 
 /* Note: this must match 'valbool' in sock_setsockopt */
 #define UDP_CSUM_NOXMIT		1
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ec6a8a4a224..194bcdc6d9fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,7 +106,7 @@
 #include <net/xfrm.h>
 #include "udp_impl.h"
 
-struct udp_table udp_table;
+struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
 
 int sysctl_udp_mem[3] __read_mostly;
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
 atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
-#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
+#define MAX_UDP_PORTS 65536
+#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
 
 static int udp_lib_lport_inuse(struct net *net, __u16 num,
 			       const struct udp_hslot *hslot,
 			       unsigned long *bitmap,
 			       struct sock *sk,
 			       int (*saddr_comp)(const struct sock *sk1,
-						 const struct sock *sk2))
+						 const struct sock *sk2),
+			       unsigned int log)
 {
 	struct sock *sk2;
 	struct hlist_nulls_node *node;
@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 		    (*saddr_comp)(sk, sk2)) {
 			if (bitmap)
-				__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
-					  bitmap);
+				__set_bit(sk2->sk_hash >> log, bitmap);
 			else
 				return 1;
 		}
@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		/*
 		 * force rand to be an odd multiple of UDP_HTABLE_SIZE
 		 */
-		rand = (rand | 1) * UDP_HTABLE_SIZE;
-		for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
-			hslot = &udptable->hash[udp_hashfn(net, first)];
+		rand = (rand | 1) * (udptable->mask + 1);
+		for (last = first + udptable->mask + 1;
+		     first != last;
+		     first++) {
+			hslot = udp_hashslot(udptable, net, first);
 			bitmap_zero(bitmap, PORTS_PER_CHAIN);
 			spin_lock_bh(&hslot->lock);
 			udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
-					    saddr_comp);
+					    saddr_comp, udptable->log);
 
 			snum = first;
 			/*
@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 			 */
 			do {
 				if (low <= snum && snum <= high &&
-				    !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
+				    !test_bit(snum >> udptable->log, bitmap))
 					goto found;
 				snum += rand;
 			} while (snum != first);
@@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		}
 		goto fail;
 	} else {
-		hslot = &udptable->hash[udp_hashfn(net, snum)];
+		hslot = udp_hashslot(udptable, net, snum);
 		spin_lock_bh(&hslot->lock);
-		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
+		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
+					saddr_comp, 0))
 			goto fail_unlock;
 	}
 found:
@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash = udp_hashfn(net, hnum);
+	unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot = &udptable->hash[hash];
 	int score, badness;
 
@@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk)
 {
 	if (sk_hashed(sk)) {
 		struct udp_table *udptable = sk->sk_prot->h.udp_table;
-		unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
-		struct udp_hslot *hslot = &udptable->hash[hash];
+		struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
+						     sk->sk_hash);
 
 		spin_lock_bh(&hslot->lock);
 		if (sk_nulls_del_node_init_rcu(sk)) {
@@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				    struct udp_table *udptable)
 {
 	struct sock *sk;
-	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
+	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
 
 	spin_lock(&hslot->lock);
@@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+	for (state->bucket = start; state->bucket <= state->udp_table->mask;
+	     ++state->bucket) {
 		struct hlist_nulls_node *node;
 		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+
+		if (hlist_nulls_empty(&hslot->head))
+			continue;
+
 		spin_lock_bh(&hslot->lock);
 		sk_nulls_for_each(sk, node, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
@@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
 
 	if (!sk) {
-		if (state->bucket < UDP_HTABLE_SIZE)
+		if (state->bucket <= state->udp_table->mask)
 			spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
 		return udp_get_first(seq, state->bucket + 1);
 	}
@@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct udp_iter_state *state = seq->private;
-	state->bucket = UDP_HTABLE_SIZE;
+	state->bucket = MAX_UDP_PORTS;
 
 	return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
 }
@@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
 {
 	struct udp_iter_state *state = seq->private;
 
-	if (state->bucket < UDP_HTABLE_SIZE)
+	if (state->bucket <= state->udp_table->mask)
 		spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
 }
 
@@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 	__u16 destp	  = ntohs(inet->dport);
 	__u16 srcp	  = ntohs(inet->sport);
 
-	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
+	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
@@ -1804,11 +1813,43 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
-void __init udp_table_init(struct udp_table *table)
+static __initdata unsigned long uhash_entries;
+static int __init set_uhash_entries(char *str)
 {
-	int i;
+	if (!str)
+		return 0;
+	uhash_entries = simple_strtoul(str, &str, 0);
+	if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
+		uhash_entries = UDP_HTABLE_SIZE_MIN;
+	return 1;
+}
+__setup("uhash_entries=", set_uhash_entries);
 
-	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+void __init udp_table_init(struct udp_table *table, const char *name)
+{
+	unsigned int i;
+
+	if (!CONFIG_BASE_SMALL)
+		table->hash = alloc_large_system_hash(name,
+			sizeof(struct udp_hslot),
+			uhash_entries,
+			21, /* one slot per 2 MB */
+			0,
+			&table->log,
+			&table->mask,
+			64 * 1024);
+	/*
+	 * Make sure hash table has the minimum size
+	 */
+	if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
+		table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
+				      sizeof(struct udp_hslot), GFP_KERNEL);
+		if (!table->hash)
+			panic(name);
+		table->log = ilog2(UDP_HTABLE_SIZE_MIN);
+		table->mask = UDP_HTABLE_SIZE_MIN - 1;
+	}
+	for (i = 0; i <= table->mask; i++) {
 		INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
 		spin_lock_init(&table->hash[i].lock);
 	}
@@ -1818,7 +1859,7 @@ void __init udp_init(void)
 {
 	unsigned long nr_pages, limit;
 
-	udp_table_init(&udp_table);
+	udp_table_init(&udp_table, "UDP");
 	/* Set the pressure threshold up by the same strategy of TCP. It is a
 	 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
 	 * toward zero with the amount of memory, with a floor of 128 pages.
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 95248d7f75ec..470c504b9554 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -12,7 +12,7 @@
  */
 #include "udp_impl.h"
 
-struct udp_table 	udplite_table;
+struct udp_table 	udplite_table __read_mostly;
 EXPORT_SYMBOL(udplite_table);
 
 static int udplite_rcv(struct sk_buff *skb)
@@ -110,7 +110,7 @@ static inline int udplite4_proc_init(void)
 
 void __init udplite4_register(void)
 {
-	udp_table_init(&udplite_table);
+	udp_table_init(&udplite_table, "UDP-Lite");
 	if (proto_register(&udplite_prot, 1))
 		goto out_register_err;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6a303ec834c..ff778c172ef2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
-	unsigned int hash = udp_hashfn(net, hnum);
+	unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot = &udptable->hash[hash];
 	int score, badness;
 
@@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 {
 	struct sock *sk, *sk2;
 	const struct udphdr *uh = udp_hdr(skb);
-	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
+	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
 
 	spin_lock(&hslot->lock);
@@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 	destp = ntohs(inet->dport);
 	srcp  = ntohs(inet->sport);
 	seq_printf(seq,
-		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
 		   bucket,
 		   src->s6_addr32[0], src->s6_addr32[1],
-- 
cgit v1.2.3


From 3758bf25db8caeec667e4e56e030da0ec3060529 Mon Sep 17 00:00:00 2001
From: Anant Gole <anantgole@ti.com>
Date: Wed, 7 Oct 2009 02:59:47 +0000
Subject: can: add TI CAN (HECC) driver

TI HECC (High End CAN Controller) module is found on many TI devices. It
has 32 hardware mailboxes with full implementation of CAN protocol 2.0B
with bus speeds up to 1Mbps. Specifications of the module are available
on TI web <http://www.ti.com>

Signed-off-by: Anant Gole <anantgole@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig              |    7 +
 drivers/net/can/Makefile             |    1 +
 drivers/net/can/ti_hecc.c            | 1006 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/ti_hecc.h |   40 ++
 4 files changed, 1054 insertions(+)
 create mode 100644 drivers/net/can/ti_hecc.c
 create mode 100644 include/linux/can/platform/ti_hecc.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index df32c109b7ac..26d77cc0ded7 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -95,6 +95,13 @@ config CAN_AT91
 	---help---
 	  This is a driver for the SoC CAN controller in Atmel's AT91SAM9263.
 
+config CAN_TI_HECC
+	depends on CAN_DEV && ARCH_OMAP3
+	tristate "TI High End CAN Controller"
+	---help---
+	  Driver for TI HECC (High End CAN Controller) module found on many
+	  TI devices. The device specifications are available from www.ti.com
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 0dea62721f2f..31f4ab5df28b 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -11,5 +11,6 @@ obj-y				+= usb/
 
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
+obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
new file mode 100644
index 000000000000..814e6c5c6386
--- /dev/null
+++ b/drivers/net/can/ti_hecc.c
@@ -0,0 +1,1006 @@
+/*
+ * TI HECC (CAN) device driver
+ *
+ * This driver supports TI's HECC (High End CAN Controller module) and the
+ * specs for the same is available at <http://www.ti.com>
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed as is WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * Your platform definitions should specify module ram offsets and interrupt
+ * number to use as follows:
+ *
+ * static struct ti_hecc_platform_data am3517_evm_hecc_pdata = {
+ *         .scc_hecc_offset        = 0,
+ *         .scc_ram_offset         = 0x3000,
+ *         .hecc_ram_offset        = 0x3000,
+ *         .mbx_offset             = 0x2000,
+ *         .int_line               = 0,
+ *         .revision               = 1,
+ * };
+ *
+ * Please see include/can/platform/ti_hecc.h for description of above fields
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/platform/ti_hecc.h>
+
+#define DRV_NAME "ti_hecc"
+#define HECC_MODULE_VERSION     "0.7"
+MODULE_VERSION(HECC_MODULE_VERSION);
+#define DRV_DESC "TI High End CAN Controller Driver " HECC_MODULE_VERSION
+
+/* TX / RX Mailbox Configuration */
+#define HECC_MAX_MAILBOXES	32	/* hardware mailboxes - do not change */
+#define MAX_TX_PRIO		0x3F	/* hardware value - do not change */
+
+/*
+ * Important Note: TX mailbox configuration
+ * TX mailboxes should be restricted to the number of SKB buffers to avoid
+ * maintaining SKB buffers separately. TX mailboxes should be a power of 2
+ * for the mailbox logic to work.  Top mailbox numbers are reserved for RX
+ * and lower mailboxes for TX.
+ *
+ * HECC_MAX_TX_MBOX	HECC_MB_TX_SHIFT
+ * 4 (default)		2
+ * 8			3
+ * 16			4
+ */
+#define HECC_MB_TX_SHIFT	2 /* as per table above */
+#define HECC_MAX_TX_MBOX	BIT(HECC_MB_TX_SHIFT)
+
+#if (HECC_MAX_TX_MBOX > CAN_ECHO_SKB_MAX)
+#error "HECC: MAX TX mailboxes should be equal or less than CAN_ECHO_SKB_MAX"
+#endif
+
+#define HECC_TX_PRIO_SHIFT	(HECC_MB_TX_SHIFT)
+#define HECC_TX_PRIO_MASK	(MAX_TX_PRIO << HECC_MB_TX_SHIFT)
+#define HECC_TX_MB_MASK		(HECC_MAX_TX_MBOX - 1)
+#define HECC_TX_MASK		((HECC_MAX_TX_MBOX - 1) | HECC_TX_PRIO_MASK)
+#define HECC_TX_MBOX_MASK	(~(BIT(HECC_MAX_TX_MBOX) - 1))
+#define HECC_DEF_NAPI_WEIGHT	HECC_MAX_RX_MBOX
+
+/*
+ * Important Note: RX mailbox configuration
+ * RX mailboxes are further logically split into two - main and buffer
+ * mailboxes. The goal is to get all packets into main mailboxes as
+ * driven by mailbox number and receive priority (higher to lower) and
+ * buffer mailboxes are used to receive pkts while main mailboxes are being
+ * processed. This ensures in-order packet reception.
+ *
+ * Here are the recommended values for buffer mailbox. Note that RX mailboxes
+ * start after TX mailboxes:
+ *
+ * HECC_MAX_RX_MBOX		HECC_RX_BUFFER_MBOX	No of buffer mailboxes
+ * 28				12			8
+ * 16				20			4
+ */
+
+#define HECC_MAX_RX_MBOX	(HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX)
+#define HECC_RX_BUFFER_MBOX	12 /* as per table above */
+#define HECC_RX_FIRST_MBOX	(HECC_MAX_MAILBOXES - 1)
+#define HECC_RX_HIGH_MBOX_MASK	(~(BIT(HECC_RX_BUFFER_MBOX) - 1))
+
+/* TI HECC module registers */
+#define HECC_CANME		0x0	/* Mailbox enable */
+#define HECC_CANMD		0x4	/* Mailbox direction */
+#define HECC_CANTRS		0x8	/* Transmit request set */
+#define HECC_CANTRR		0xC	/* Transmit request */
+#define HECC_CANTA		0x10	/* Transmission acknowledge */
+#define HECC_CANAA		0x14	/* Abort acknowledge */
+#define HECC_CANRMP		0x18	/* Receive message pending */
+#define HECC_CANRML		0x1C	/* Remote message lost */
+#define HECC_CANRFP		0x20	/* Remote frame pending */
+#define HECC_CANGAM		0x24	/* SECC only:Global acceptance mask */
+#define HECC_CANMC		0x28	/* Master control */
+#define HECC_CANBTC		0x2C	/* Bit timing configuration */
+#define HECC_CANES		0x30	/* Error and status */
+#define HECC_CANTEC		0x34	/* Transmit error counter */
+#define HECC_CANREC		0x38	/* Receive error counter */
+#define HECC_CANGIF0		0x3C	/* Global interrupt flag 0 */
+#define HECC_CANGIM		0x40	/* Global interrupt mask */
+#define HECC_CANGIF1		0x44	/* Global interrupt flag 1 */
+#define HECC_CANMIM		0x48	/* Mailbox interrupt mask */
+#define HECC_CANMIL		0x4C	/* Mailbox interrupt level */
+#define HECC_CANOPC		0x50	/* Overwrite protection control */
+#define HECC_CANTIOC		0x54	/* Transmit I/O control */
+#define HECC_CANRIOC		0x58	/* Receive I/O control */
+#define HECC_CANLNT		0x5C	/* HECC only: Local network time */
+#define HECC_CANTOC		0x60	/* HECC only: Time-out control */
+#define HECC_CANTOS		0x64	/* HECC only: Time-out status */
+#define HECC_CANTIOCE		0x68	/* SCC only:Enhanced TX I/O control */
+#define HECC_CANRIOCE		0x6C	/* SCC only:Enhanced RX I/O control */
+
+/* Mailbox registers */
+#define HECC_CANMID		0x0
+#define HECC_CANMCF		0x4
+#define HECC_CANMDL		0x8
+#define HECC_CANMDH		0xC
+
+#define HECC_SET_REG		0xFFFFFFFF
+#define HECC_CANID_MASK		0x3FF	/* 18 bits mask for extended id's */
+#define HECC_CCE_WAIT_COUNT     100	/* Wait for ~1 sec for CCE bit */
+
+#define HECC_CANMC_SCM		BIT(13)	/* SCC compat mode */
+#define HECC_CANMC_CCR		BIT(12)	/* Change config request */
+#define HECC_CANMC_PDR		BIT(11)	/* Local Power down - for sleep mode */
+#define HECC_CANMC_ABO		BIT(7)	/* Auto Bus On */
+#define HECC_CANMC_STM		BIT(6)	/* Self test mode - loopback */
+#define HECC_CANMC_SRES		BIT(5)	/* Software reset */
+
+#define HECC_CANTIOC_EN		BIT(3)	/* Enable CAN TX I/O pin */
+#define HECC_CANRIOC_EN		BIT(3)	/* Enable CAN RX I/O pin */
+
+#define HECC_CANMID_IDE		BIT(31)	/* Extended frame format */
+#define HECC_CANMID_AME		BIT(30)	/* Acceptance mask enable */
+#define HECC_CANMID_AAM		BIT(29)	/* Auto answer mode */
+
+#define HECC_CANES_FE		BIT(24)	/* form error */
+#define HECC_CANES_BE		BIT(23)	/* bit error */
+#define HECC_CANES_SA1		BIT(22)	/* stuck at dominant error */
+#define HECC_CANES_CRCE		BIT(21)	/* CRC error */
+#define HECC_CANES_SE		BIT(20)	/* stuff bit error */
+#define HECC_CANES_ACKE		BIT(19)	/* ack error */
+#define HECC_CANES_BO		BIT(18)	/* Bus off status */
+#define HECC_CANES_EP		BIT(17)	/* Error passive status */
+#define HECC_CANES_EW		BIT(16)	/* Error warning status */
+#define HECC_CANES_SMA		BIT(5)	/* suspend mode ack */
+#define HECC_CANES_CCE		BIT(4)	/* Change config enabled */
+#define HECC_CANES_PDA		BIT(3)	/* Power down mode ack */
+
+#define HECC_CANBTC_SAM		BIT(7)	/* sample points */
+
+#define HECC_BUS_ERROR		(HECC_CANES_FE | HECC_CANES_BE |\
+				HECC_CANES_CRCE | HECC_CANES_SE |\
+				HECC_CANES_ACKE)
+
+#define HECC_CANMCF_RTR		BIT(4)	/* Remote transmit request */
+
+#define HECC_CANGIF_MAIF	BIT(17)	/* Message alarm interrupt */
+#define HECC_CANGIF_TCOIF	BIT(16) /* Timer counter overflow int */
+#define HECC_CANGIF_GMIF	BIT(15)	/* Global mailbox interrupt */
+#define HECC_CANGIF_AAIF	BIT(14)	/* Abort ack interrupt */
+#define HECC_CANGIF_WDIF	BIT(13)	/* Write denied interrupt */
+#define HECC_CANGIF_WUIF	BIT(12)	/* Wake up interrupt */
+#define HECC_CANGIF_RMLIF	BIT(11)	/* Receive message lost interrupt */
+#define HECC_CANGIF_BOIF	BIT(10)	/* Bus off interrupt */
+#define HECC_CANGIF_EPIF	BIT(9)	/* Error passive interrupt */
+#define HECC_CANGIF_WLIF	BIT(8)	/* Warning level interrupt */
+#define HECC_CANGIF_MBOX_MASK	0x1F	/* Mailbox number mask */
+#define HECC_CANGIM_I1EN	BIT(1)	/* Int line 1 enable */
+#define HECC_CANGIM_I0EN	BIT(0)	/* Int line 0 enable */
+#define HECC_CANGIM_DEF_MASK	0x700	/* only busoff/warning/passive */
+#define HECC_CANGIM_SIL		BIT(2)	/* system interrupts to int line 1 */
+
+/* CAN Bittiming constants as per HECC specs */
+static struct can_bittiming_const ti_hecc_bittiming_const = {
+	.name = DRV_NAME,
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 256,
+	.brp_inc = 1,
+};
+
+struct ti_hecc_priv {
+	struct can_priv can;	/* MUST be first member/field */
+	struct napi_struct napi;
+	struct net_device *ndev;
+	struct clk *clk;
+	void __iomem *base;
+	u32 scc_ram_offset;
+	u32 hecc_ram_offset;
+	u32 mbx_offset;
+	u32 int_line;
+	spinlock_t mbx_lock; /* CANME register needs protection */
+	u32 tx_head;
+	u32 tx_tail;
+	u32 rx_next;
+};
+
+static inline int get_tx_head_mb(struct ti_hecc_priv *priv)
+{
+	return priv->tx_head & HECC_TX_MB_MASK;
+}
+
+static inline int get_tx_tail_mb(struct ti_hecc_priv *priv)
+{
+	return priv->tx_tail & HECC_TX_MB_MASK;
+}
+
+static inline int get_tx_head_prio(struct ti_hecc_priv *priv)
+{
+	return (priv->tx_head >> HECC_TX_PRIO_SHIFT) & MAX_TX_PRIO;
+}
+
+static inline void hecc_write_lam(struct ti_hecc_priv *priv, u32 mbxno, u32 val)
+{
+	__raw_writel(val, priv->base + priv->hecc_ram_offset + mbxno * 4);
+}
+
+static inline void hecc_write_mbx(struct ti_hecc_priv *priv, u32 mbxno,
+	u32 reg, u32 val)
+{
+	__raw_writel(val, priv->base + priv->mbx_offset + mbxno * 0x10 +
+			reg);
+}
+
+static inline u32 hecc_read_mbx(struct ti_hecc_priv *priv, u32 mbxno, u32 reg)
+{
+	return __raw_readl(priv->base + priv->mbx_offset + mbxno * 0x10 +
+			reg);
+}
+
+static inline void hecc_write(struct ti_hecc_priv *priv, u32 reg, u32 val)
+{
+	__raw_writel(val, priv->base + reg);
+}
+
+static inline u32 hecc_read(struct ti_hecc_priv *priv, int reg)
+{
+	return __raw_readl(priv->base + reg);
+}
+
+static inline void hecc_set_bit(struct ti_hecc_priv *priv, int reg,
+	u32 bit_mask)
+{
+	hecc_write(priv, reg, hecc_read(priv, reg) | bit_mask);
+}
+
+static inline void hecc_clear_bit(struct ti_hecc_priv *priv, int reg,
+	u32 bit_mask)
+{
+	hecc_write(priv, reg, hecc_read(priv, reg) & ~bit_mask);
+}
+
+static inline u32 hecc_get_bit(struct ti_hecc_priv *priv, int reg, u32 bit_mask)
+{
+	return (hecc_read(priv, reg) & bit_mask) ? 1 : 0;
+}
+
+static int ti_hecc_get_state(const struct net_device *ndev,
+	enum can_state *state)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	*state = priv->can.state;
+	return 0;
+}
+
+static int ti_hecc_set_btc(struct ti_hecc_priv *priv)
+{
+	struct can_bittiming *bit_timing = &priv->can.bittiming;
+	u32 can_btc;
+
+	can_btc = (bit_timing->phase_seg2 - 1) & 0x7;
+	can_btc |= ((bit_timing->phase_seg1 + bit_timing->prop_seg - 1)
+			& 0xF) << 3;
+	if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) {
+		if (bit_timing->brp > 4)
+			can_btc |= HECC_CANBTC_SAM;
+		else
+			dev_warn(priv->ndev->dev.parent, "WARN: Triple" \
+				"sampling not set due to h/w limitations");
+	}
+	can_btc |= ((bit_timing->sjw - 1) & 0x3) << 8;
+	can_btc |= ((bit_timing->brp - 1) & 0xFF) << 16;
+
+	/* ERM being set to 0 by default meaning resync at falling edge */
+
+	hecc_write(priv, HECC_CANBTC, can_btc);
+	dev_info(priv->ndev->dev.parent, "setting CANBTC=%#x\n", can_btc);
+
+	return 0;
+}
+
+static void ti_hecc_reset(struct net_device *ndev)
+{
+	u32 cnt;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	dev_dbg(ndev->dev.parent, "resetting hecc ...\n");
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SRES);
+
+	/* Set change control request and wait till enabled */
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+
+	/*
+	 * INFO: It has been observed that at times CCE bit may not be
+	 * set and hw seems to be ok even if this bit is not set so
+	 * timing out with a timing of 1ms to respect the specs
+	 */
+	cnt = HECC_CCE_WAIT_COUNT;
+	while (!hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) {
+		--cnt;
+		udelay(10);
+	}
+
+	/*
+	 * Note: On HECC, BTC can be programmed only in initialization mode, so
+	 * it is expected that the can bittiming parameters are set via ip
+	 * utility before the device is opened
+	 */
+	ti_hecc_set_btc(priv);
+
+	/* Clear CCR (and CANMC register) and wait for CCE = 0 enable */
+	hecc_write(priv, HECC_CANMC, 0);
+
+	/*
+	 * INFO: CAN net stack handles bus off and hence disabling auto-bus-on
+	 * hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_ABO);
+	 */
+
+	/*
+	 * INFO: It has been observed that at times CCE bit may not be
+	 * set and hw seems to be ok even if this bit is not set so
+	 */
+	cnt = HECC_CCE_WAIT_COUNT;
+	while (hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) {
+		--cnt;
+		udelay(10);
+	}
+
+	/* Enable TX and RX I/O Control pins */
+	hecc_write(priv, HECC_CANTIOC, HECC_CANTIOC_EN);
+	hecc_write(priv, HECC_CANRIOC, HECC_CANRIOC_EN);
+
+	/* Clear registers for clean operation */
+	hecc_write(priv, HECC_CANTA, HECC_SET_REG);
+	hecc_write(priv, HECC_CANRMP, HECC_SET_REG);
+	hecc_write(priv, HECC_CANGIF0, HECC_SET_REG);
+	hecc_write(priv, HECC_CANGIF1, HECC_SET_REG);
+	hecc_write(priv, HECC_CANME, 0);
+	hecc_write(priv, HECC_CANMD, 0);
+
+	/* SCC compat mode NOT supported (and not needed too) */
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SCM);
+}
+
+static void ti_hecc_start(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	u32 cnt, mbxno, mbx_mask;
+
+	/* put HECC in initialization mode and set btc */
+	ti_hecc_reset(ndev);
+
+	priv->tx_head = priv->tx_tail = HECC_TX_MASK;
+	priv->rx_next = HECC_RX_FIRST_MBOX;
+
+	/* Enable local and global acceptance mask registers */
+	hecc_write(priv, HECC_CANGAM, HECC_SET_REG);
+
+	/* Prepare configured mailboxes to receive messages */
+	for (cnt = 0; cnt < HECC_MAX_RX_MBOX; cnt++) {
+		mbxno = HECC_MAX_MAILBOXES - 1 - cnt;
+		mbx_mask = BIT(mbxno);
+		hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+		hecc_write_mbx(priv, mbxno, HECC_CANMID, HECC_CANMID_AME);
+		hecc_write_lam(priv, mbxno, HECC_SET_REG);
+		hecc_set_bit(priv, HECC_CANMD, mbx_mask);
+		hecc_set_bit(priv, HECC_CANME, mbx_mask);
+		hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
+	}
+
+	/* Prevent message over-write & Enable interrupts */
+	hecc_write(priv, HECC_CANOPC, HECC_SET_REG);
+	if (priv->int_line) {
+		hecc_write(priv, HECC_CANMIL, HECC_SET_REG);
+		hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK |
+			HECC_CANGIM_I1EN | HECC_CANGIM_SIL);
+	} else {
+		hecc_write(priv, HECC_CANMIL, 0);
+		hecc_write(priv, HECC_CANGIM,
+			HECC_CANGIM_DEF_MASK | HECC_CANGIM_I0EN);
+	}
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+static void ti_hecc_stop(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	/* Disable interrupts and disable mailboxes */
+	hecc_write(priv, HECC_CANGIM, 0);
+	hecc_write(priv, HECC_CANMIM, 0);
+	hecc_write(priv, HECC_CANME, 0);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int ti_hecc_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	int ret = 0;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		ti_hecc_start(ndev);
+		netif_wake_queue(ndev);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * ti_hecc_xmit: HECC Transmit
+ *
+ * The transmit mailboxes start from 0 to HECC_MAX_TX_MBOX. In HECC the
+ * priority of the mailbox for tranmission is dependent upon priority setting
+ * field in mailbox registers. The mailbox with highest value in priority field
+ * is transmitted first. Only when two mailboxes have the same value in
+ * priority field the highest numbered mailbox is transmitted first.
+ *
+ * To utilize the HECC priority feature as described above we start with the
+ * highest numbered mailbox with highest priority level and move on to the next
+ * mailbox with the same priority level and so on. Once we loop through all the
+ * transmit mailboxes we choose the next priority level (lower) and so on
+ * until we reach the lowest priority level on the lowest numbered mailbox
+ * when we stop transmission until all mailboxes are transmitted and then
+ * restart at highest numbered mailbox with highest priority.
+ *
+ * Two counters (head and tail) are used to track the next mailbox to transmit
+ * and to track the echo buffer for already transmitted mailbox. The queue
+ * is stopped when all the mailboxes are busy or when there is a priority
+ * value roll-over happens.
+ */
+static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 mbxno, mbx_mask, data;
+	unsigned long flags;
+
+	mbxno = get_tx_head_mb(priv);
+	mbx_mask = BIT(mbxno);
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	if (unlikely(hecc_read(priv, HECC_CANME) & mbx_mask)) {
+		spin_unlock_irqrestore(&priv->mbx_lock, flags);
+		netif_stop_queue(ndev);
+		dev_err(priv->ndev->dev.parent,
+			"BUG: TX mbx not ready tx_head=%08X, tx_tail=%08X\n",
+			priv->tx_head, priv->tx_tail);
+		return NETDEV_TX_BUSY;
+	}
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	/* Prepare mailbox for transmission */
+	data = min_t(u8, cf->can_dlc, 8);
+	if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
+		data |= HECC_CANMCF_RTR;
+	data |= get_tx_head_prio(priv) << 8;
+	hecc_write_mbx(priv, mbxno, HECC_CANMCF, data);
+
+	if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */
+		data = (cf->can_id & CAN_EFF_MASK) | HECC_CANMID_IDE;
+	else /* Standard frame format */
+		data = (cf->can_id & CAN_SFF_MASK) << 18;
+	hecc_write_mbx(priv, mbxno, HECC_CANMID, data);
+	hecc_write_mbx(priv, mbxno, HECC_CANMDL,
+		be32_to_cpu(*(u32 *)(cf->data)));
+	if (cf->can_dlc > 4)
+		hecc_write_mbx(priv, mbxno, HECC_CANMDH,
+			be32_to_cpu(*(u32 *)(cf->data + 4)));
+	else
+		*(u32 *)(cf->data + 4) = 0;
+	can_put_echo_skb(skb, ndev, mbxno);
+
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	--priv->tx_head;
+	if ((hecc_read(priv, HECC_CANME) & BIT(get_tx_head_mb(priv))) ||
+		(priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK) {
+		netif_stop_queue(ndev);
+	}
+	hecc_set_bit(priv, HECC_CANME, mbx_mask);
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	hecc_clear_bit(priv, HECC_CANMD, mbx_mask);
+	hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
+	hecc_write(priv, HECC_CANTRS, mbx_mask);
+
+	return NETDEV_TX_OK;
+}
+
+static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 data, mbx_mask;
+	unsigned long flags;
+
+	skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame));
+	if (!skb) {
+		if (printk_ratelimit())
+			dev_err(priv->ndev->dev.parent,
+				"ti_hecc_rx_pkt: netdev_alloc_skb() failed\n");
+		return -ENOMEM;
+	}
+	skb->protocol = __constant_htons(ETH_P_CAN);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	mbx_mask = BIT(mbxno);
+	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
+	if (data & HECC_CANMID_IDE)
+		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (data >> 18) & CAN_SFF_MASK;
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMCF);
+	if (data & HECC_CANMCF_RTR)
+		cf->can_id |= CAN_RTR_FLAG;
+	cf->can_dlc = data & 0xF;
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMDL);
+	*(u32 *)(cf->data) = cpu_to_be32(data);
+	if (cf->can_dlc > 4) {
+		data = hecc_read_mbx(priv, mbxno, HECC_CANMDH);
+		*(u32 *)(cf->data + 4) = cpu_to_be32(data);
+	} else {
+		*(u32 *)(cf->data + 4) = 0;
+	}
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+	hecc_write(priv, HECC_CANRMP, mbx_mask);
+	/* enable mailbox only if it is part of rx buffer mailboxes */
+	if (priv->rx_next < HECC_RX_BUFFER_MBOX)
+		hecc_set_bit(priv, HECC_CANME, mbx_mask);
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	stats->rx_bytes += cf->can_dlc;
+	netif_receive_skb(skb);
+	stats->rx_packets++;
+
+	return 0;
+}
+
+/*
+ * ti_hecc_rx_poll - HECC receive pkts
+ *
+ * The receive mailboxes start from highest numbered mailbox till last xmit
+ * mailbox. On CAN frame reception the hardware places the data into highest
+ * numbered mailbox that matches the CAN ID filter. Since all receive mailboxes
+ * have same filtering (ALL CAN frames) packets will arrive in the highest
+ * available RX mailbox and we need to ensure in-order packet reception.
+ *
+ * To ensure the packets are received in the right order we logically divide
+ * the RX mailboxes into main and buffer mailboxes. Packets are received as per
+ * mailbox priotity (higher to lower) in the main bank and once it is full we
+ * disable further reception into main mailboxes. While the main mailboxes are
+ * processed in NAPI, further packets are received in buffer mailboxes.
+ *
+ * We maintain a RX next mailbox counter to process packets and once all main
+ * mailboxe packets are passed to the upper stack we enable all of them but
+ * continue to process packets received in buffer mailboxes. With each packet
+ * received from buffer mailbox we enable it immediately so as to handle the
+ * overflow from higher mailboxes.
+ */
+static int ti_hecc_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct net_device *ndev = napi->dev;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	u32 num_pkts = 0;
+	u32 mbx_mask;
+	unsigned long pending_pkts, flags;
+
+	if (!netif_running(ndev))
+		return 0;
+
+	while ((pending_pkts = hecc_read(priv, HECC_CANRMP)) &&
+		num_pkts < quota) {
+		mbx_mask = BIT(priv->rx_next); /* next rx mailbox to process */
+		if (mbx_mask & pending_pkts) {
+			if (ti_hecc_rx_pkt(priv, priv->rx_next) < 0)
+				return num_pkts;
+			++num_pkts;
+		} else if (priv->rx_next > HECC_RX_BUFFER_MBOX) {
+			break; /* pkt not received yet */
+		}
+		--priv->rx_next;
+		if (priv->rx_next == HECC_RX_BUFFER_MBOX) {
+			/* enable high bank mailboxes */
+			spin_lock_irqsave(&priv->mbx_lock, flags);
+			mbx_mask = hecc_read(priv, HECC_CANME);
+			mbx_mask |= HECC_RX_HIGH_MBOX_MASK;
+			hecc_write(priv, HECC_CANME, mbx_mask);
+			spin_unlock_irqrestore(&priv->mbx_lock, flags);
+		} else if (priv->rx_next == HECC_MAX_TX_MBOX - 1) {
+			priv->rx_next = HECC_RX_FIRST_MBOX;
+			break;
+		}
+	}
+
+	/* Enable packet interrupt if all pkts are handled */
+	if (hecc_read(priv, HECC_CANRMP) == 0) {
+		napi_complete(napi);
+		/* Re-enable RX mailbox interrupts */
+		mbx_mask = hecc_read(priv, HECC_CANMIM);
+		mbx_mask |= HECC_TX_MBOX_MASK;
+		hecc_write(priv, HECC_CANMIM, mbx_mask);
+	}
+
+	return num_pkts;
+}
+
+static int ti_hecc_error(struct net_device *ndev, int int_status,
+	int err_status)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+
+	/* propogate the error condition to the can stack */
+	skb = netdev_alloc_skb(ndev, sizeof(struct can_frame));
+	if (!skb) {
+		if (printk_ratelimit())
+			dev_err(priv->ndev->dev.parent,
+				"ti_hecc_error: netdev_alloc_skb() failed\n");
+		return -ENOMEM;
+	}
+	skb->protocol = __constant_htons(ETH_P_CAN);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	memset(cf, 0, sizeof(struct can_frame));
+	cf->can_id = CAN_ERR_FLAG;
+	cf->can_dlc = CAN_ERR_DLC;
+
+	if (int_status & HECC_CANGIF_WLIF) { /* warning level int */
+		if ((int_status & HECC_CANGIF_BOIF) == 0) {
+			priv->can.state = CAN_STATE_ERROR_WARNING;
+			++priv->can.can_stats.error_warning;
+			cf->can_id |= CAN_ERR_CRTL;
+			if (hecc_read(priv, HECC_CANTEC) > 96)
+				cf->data[1] |= CAN_ERR_CRTL_TX_WARNING;
+			if (hecc_read(priv, HECC_CANREC) > 96)
+				cf->data[1] |= CAN_ERR_CRTL_RX_WARNING;
+		}
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW);
+		dev_dbg(priv->ndev->dev.parent, "Error Warning interrupt\n");
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+	}
+
+	if (int_status & HECC_CANGIF_EPIF) { /* error passive int */
+		if ((int_status & HECC_CANGIF_BOIF) == 0) {
+			priv->can.state = CAN_STATE_ERROR_PASSIVE;
+			++priv->can.can_stats.error_passive;
+			cf->can_id |= CAN_ERR_CRTL;
+			if (hecc_read(priv, HECC_CANTEC) > 127)
+				cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE;
+			if (hecc_read(priv, HECC_CANREC) > 127)
+				cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE;
+		}
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP);
+		dev_dbg(priv->ndev->dev.parent, "Error passive interrupt\n");
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+	}
+
+	/*
+	 * Need to check busoff condition in error status register too to
+	 * ensure warning interrupts don't hog the system
+	 */
+	if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) {
+		priv->can.state = CAN_STATE_BUS_OFF;
+		cf->can_id |= CAN_ERR_BUSOFF;
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO);
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+		/* Disable all interrupts in bus-off to avoid int hog */
+		hecc_write(priv, HECC_CANGIM, 0);
+		can_bus_off(ndev);
+	}
+
+	if (err_status & HECC_BUS_ERROR) {
+		++priv->can.can_stats.bus_error;
+		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+		if (err_status & HECC_CANES_FE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
+			cf->data[2] |= CAN_ERR_PROT_FORM;
+		}
+		if (err_status & HECC_CANES_BE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE);
+			cf->data[2] |= CAN_ERR_PROT_BIT;
+		}
+		if (err_status & HECC_CANES_SE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE);
+			cf->data[2] |= CAN_ERR_PROT_STUFF;
+		}
+		if (err_status & HECC_CANES_CRCE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
+			cf->data[2] |= CAN_ERR_PROT_LOC_CRC_SEQ |
+					CAN_ERR_PROT_LOC_CRC_DEL;
+		}
+		if (err_status & HECC_CANES_ACKE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
+			cf->data[2] |= CAN_ERR_PROT_LOC_ACK |
+					CAN_ERR_PROT_LOC_ACK_DEL;
+		}
+	}
+
+	netif_receive_skb(skb);
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+	return 0;
+}
+
+static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u32 mbxno, mbx_mask, int_status, err_status;
+	unsigned long ack, flags;
+
+	int_status = hecc_read(priv,
+		(priv->int_line) ? HECC_CANGIF1 : HECC_CANGIF0);
+
+	if (!int_status)
+		return IRQ_NONE;
+
+	err_status = hecc_read(priv, HECC_CANES);
+	if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO |
+		HECC_CANES_EP | HECC_CANES_EW))
+			ti_hecc_error(ndev, int_status, err_status);
+
+	if (int_status & HECC_CANGIF_GMIF) {
+		while (priv->tx_tail - priv->tx_head > 0) {
+			mbxno = get_tx_tail_mb(priv);
+			mbx_mask = BIT(mbxno);
+			if (!(mbx_mask & hecc_read(priv, HECC_CANTA)))
+				break;
+			hecc_clear_bit(priv, HECC_CANMIM, mbx_mask);
+			hecc_write(priv, HECC_CANTA, mbx_mask);
+			spin_lock_irqsave(&priv->mbx_lock, flags);
+			hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+			spin_unlock_irqrestore(&priv->mbx_lock, flags);
+			stats->tx_bytes += hecc_read_mbx(priv, mbxno,
+						HECC_CANMCF) & 0xF;
+			stats->tx_packets++;
+			can_get_echo_skb(ndev, mbxno);
+			--priv->tx_tail;
+		}
+
+		/* restart queue if wrap-up or if queue stalled on last pkt */
+		if (((priv->tx_head == priv->tx_tail) &&
+		((priv->tx_head & HECC_TX_MASK) != HECC_TX_MASK)) ||
+		(((priv->tx_tail & HECC_TX_MASK) == HECC_TX_MASK) &&
+		((priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK)))
+			netif_wake_queue(ndev);
+
+		/* Disable RX mailbox interrupts and let NAPI reenable them */
+		if (hecc_read(priv, HECC_CANRMP)) {
+			ack = hecc_read(priv, HECC_CANMIM);
+			ack &= BIT(HECC_MAX_TX_MBOX) - 1;
+			hecc_write(priv, HECC_CANMIM, ack);
+			napi_schedule(&priv->napi);
+		}
+	}
+
+	/* clear all interrupt conditions - read back to avoid spurious ints */
+	if (priv->int_line) {
+		hecc_write(priv, HECC_CANGIF1, HECC_SET_REG);
+		int_status = hecc_read(priv, HECC_CANGIF1);
+	} else {
+		hecc_write(priv, HECC_CANGIF0, HECC_SET_REG);
+		int_status = hecc_read(priv, HECC_CANGIF0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ti_hecc_open(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	int err;
+
+	err = request_irq(ndev->irq, ti_hecc_interrupt, IRQF_SHARED,
+			ndev->name, ndev);
+	if (err) {
+		dev_err(ndev->dev.parent, "error requesting interrupt\n");
+		return err;
+	}
+
+	/* Open common can device */
+	err = open_candev(ndev);
+	if (err) {
+		dev_err(ndev->dev.parent, "open_candev() failed %d\n", err);
+		free_irq(ndev->irq, ndev);
+		return err;
+	}
+
+	clk_enable(priv->clk);
+	ti_hecc_start(ndev);
+	napi_enable(&priv->napi);
+	netif_start_queue(ndev);
+
+	return 0;
+}
+
+static int ti_hecc_close(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	napi_disable(&priv->napi);
+	ti_hecc_stop(ndev);
+	free_irq(ndev->irq, ndev);
+	clk_disable(priv->clk);
+	close_candev(ndev);
+
+	return 0;
+}
+
+static const struct net_device_ops ti_hecc_netdev_ops = {
+	.ndo_open		= ti_hecc_open,
+	.ndo_stop		= ti_hecc_close,
+	.ndo_start_xmit		= ti_hecc_xmit,
+};
+
+static int ti_hecc_probe(struct platform_device *pdev)
+{
+	struct net_device *ndev = (struct net_device *)0;
+	struct ti_hecc_priv *priv;
+	struct ti_hecc_platform_data *pdata;
+	struct resource *mem, *irq;
+	void __iomem *addr;
+	int err = -ENODEV;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data\n");
+		goto probe_exit;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "No mem resources\n");
+		goto probe_exit;
+	}
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "No irq resource\n");
+		goto probe_exit;
+	}
+	if (!request_mem_region(mem->start, resource_size(mem), pdev->name)) {
+		dev_err(&pdev->dev, "HECC region already claimed\n");
+		err = -EBUSY;
+		goto probe_exit;
+	}
+	addr = ioremap(mem->start, resource_size(mem));
+	if (!addr) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		err = -ENOMEM;
+		goto probe_exit_free_region;
+	}
+
+	ndev = alloc_candev(sizeof(struct ti_hecc_priv));
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev failed\n");
+		err = -ENOMEM;
+		goto probe_exit_iounmap;
+	}
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	priv->base = addr;
+	priv->scc_ram_offset = pdata->scc_ram_offset;
+	priv->hecc_ram_offset = pdata->hecc_ram_offset;
+	priv->mbx_offset = pdata->mbx_offset;
+	priv->int_line = pdata->int_line;
+
+	priv->can.bittiming_const = &ti_hecc_bittiming_const;
+	priv->can.do_set_mode = ti_hecc_do_set_mode;
+	priv->can.do_get_state = ti_hecc_get_state;
+
+	ndev->irq = irq->start;
+	ndev->flags |= IFF_ECHO;
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ndev->netdev_ops = &ti_hecc_netdev_ops;
+
+	priv->clk = clk_get(&pdev->dev, "hecc_ck");
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "No clock available\n");
+		err = PTR_ERR(priv->clk);
+		priv->clk = NULL;
+		goto probe_exit_candev;
+	}
+	priv->can.clock.freq = clk_get_rate(priv->clk);
+	netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll,
+		HECC_DEF_NAPI_WEIGHT);
+
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed\n");
+		goto probe_exit_clk;
+	}
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n",
+		priv->base, (u32) ndev->irq);
+
+	return 0;
+
+probe_exit_clk:
+	clk_put(priv->clk);
+probe_exit_candev:
+	free_candev(ndev);
+probe_exit_iounmap:
+	iounmap(addr);
+probe_exit_free_region:
+	release_mem_region(mem->start, resource_size(mem));
+probe_exit:
+	return err;
+}
+
+static int __devexit ti_hecc_remove(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	clk_put(priv->clk);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iounmap(priv->base);
+	release_mem_region(res->start, resource_size(res));
+	unregister_candev(ndev);
+	free_candev(ndev);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+/* TI HECC netdevice driver: platform driver structure */
+static struct platform_driver ti_hecc_driver = {
+	.driver = {
+		.name    = DRV_NAME,
+		.owner   = THIS_MODULE,
+	},
+	.probe = ti_hecc_probe,
+	.remove = __devexit_p(ti_hecc_remove),
+};
+
+static int __init ti_hecc_init_driver(void)
+{
+	printk(KERN_INFO DRV_DESC "\n");
+	return platform_driver_register(&ti_hecc_driver);
+}
+module_init(ti_hecc_init_driver);
+
+static void __exit ti_hecc_exit_driver(void)
+{
+	printk(KERN_INFO DRV_DESC " unloaded\n");
+	platform_driver_unregister(&ti_hecc_driver);
+}
+module_exit(ti_hecc_exit_driver);
+
+MODULE_AUTHOR("Anant Gole <anantgole@ti.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
new file mode 100644
index 000000000000..4688c7bb1bd1
--- /dev/null
+++ b/include/linux/can/platform/ti_hecc.h
@@ -0,0 +1,40 @@
+/*
+ * TI HECC (High End CAN Controller) driver platform header
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed as is WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/**
+ * struct hecc_platform_data - HECC Platform Data
+ *
+ * @scc_hecc_offset:	mostly 0 - should really never change
+ * @scc_ram_offset:	SCC RAM offset
+ * @hecc_ram_offset:	HECC RAM offset
+ * @mbx_offset:		Mailbox RAM offset
+ * @int_line:		Interrupt line to use - 0 or 1
+ * @version:		version for future use
+ *
+ * Platform data structure to get all platform specific settings.
+ * this structure also accounts the fact that the IP may have different
+ * RAM and mailbox offsets for different SOC's
+ */
+struct ti_hecc_platform_data {
+	u32 scc_hecc_offset;
+	u32 scc_ram_offset;
+	u32 hecc_ram_offset;
+	u32 mbx_offset;
+	u32 int_line;
+	u32 version;
+};
+
+
-- 
cgit v1.2.3


From e0e6f55d298af03ab88bfe8455b671d29d78f426 Mon Sep 17 00:00:00 2001
From: Jin Dongming <jin.dongming@np.css.fujitsu.com>
Date: Thu, 8 Oct 2009 22:44:47 -0700
Subject: ipv6: Fix the size overflow of addrconf_sysctl array

(This patch fixes bug of commit f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50
 title "make TLLAO option for NA packets configurable")

When the IPV6 conf is used, the function sysctl_set_parent is called and the
array addrconf_sysctl is used as a parameter of the function.

The above patch added new conf "force_tllao" into the array addrconf_sysctl,
but the size of the array was not modified, the static allocated size is
DEVCONF_MAX + 1 but the real size is DEVCONF_MAX + 2, so the problem is
that the function sysctl_set_parent accessed wrong address.

I got the following information.
Call Trace:
    [<ffffffff8106085d>] sysctl_set_parent+0x29/0x3e
    [<ffffffff8106085d>] sysctl_set_parent+0x29/0x3e
    [<ffffffff8106085d>] sysctl_set_parent+0x29/0x3e
    [<ffffffff8106085d>] sysctl_set_parent+0x29/0x3e
    [<ffffffff8106085d>] sysctl_set_parent+0x29/0x3e
    [<ffffffff810622d5>] __register_sysctl_paths+0xde/0x272
    [<ffffffff8110892d>] ? __kmalloc_track_caller+0x16e/0x180
    [<ffffffffa00cfac3>] ? __addrconf_sysctl_register+0xc5/0x144 [ipv6]
    [<ffffffff8141f2c9>] register_net_sysctl_table+0x48/0x4b
    [<ffffffffa00cfaf5>] __addrconf_sysctl_register+0xf7/0x144 [ipv6]
    [<ffffffffa00cfc16>] addrconf_init_net+0xd4/0x104 [ipv6]
    [<ffffffff8139195f>] setup_net+0x35/0x82
    [<ffffffff81391f6c>] copy_net_ns+0x76/0xe0
    [<ffffffff8107ad60>] create_new_namespaces+0xf0/0x16e
    [<ffffffff8107afee>] copy_namespaces+0x65/0x9f
    [<ffffffff81056dff>] copy_process+0xb2c/0x12c3
    [<ffffffff810576e1>] do_fork+0x14b/0x2d2
    [<ffffffff8107ac4e>] ? up_read+0xe/0x10
    [<ffffffff81438e73>] ? do_page_fault+0x27a/0x2aa
    [<ffffffff8101044b>] sys_clone+0x28/0x2a
    [<ffffffff81011fb3>] stub_clone+0x13/0x20
    [<ffffffff81011c72>] ? system_call_fastpath+0x16/0x1b

And the information of IPV6 in .config is as following.
IPV6 in .config:
    CONFIG_IPV6=m
    CONFIG_IPV6_PRIVACY=y
    CONFIG_IPV6_ROUTER_PREF=y
    CONFIG_IPV6_ROUTE_INFO=y
    CONFIG_IPV6_OPTIMISTIC_DAD=y
    CONFIG_IPV6_MIP6=m
    CONFIG_IPV6_SIT=m
    # CONFIG_IPV6_SIT_6RD is not set
    CONFIG_IPV6_NDISC_NODETYPE=y
    CONFIG_IPV6_TUNNEL=m
    CONFIG_IPV6_MULTIPLE_TABLES=y
    CONFIG_IPV6_SUBTREES=y
    CONFIG_IPV6_MROUTE=y
    CONFIG_IPV6_PIMSM_V2=y
    # CONFIG_IP_VS_IPV6 is not set
    CONFIG_NF_CONNTRACK_IPV6=m
    CONFIG_IP6_NF_MATCH_IPV6HEADER=m

I confirmed this patch fixes this problem.

Signed-off-by: Jin Dongming <jin.dongming@np.css.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ae74ede1abe7..56404251248c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -208,6 +208,7 @@ enum {
 	DEVCONF_MC_FORWARDING,
 	DEVCONF_DISABLE_IPV6,
 	DEVCONF_ACCEPT_DAD,
+	DEVCONF_FORCE_TLLAO,
 	DEVCONF_MAX
 };
 
-- 
cgit v1.2.3


From c01226c3145d173a0d38f9d5b4f229cc23d99ae2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 16:37:12 +0200
Subject: warn about use of uninstalled kernel headers

User applications frequently hit problems when they try to use
the kernel headers directly, rather than the exported headers.

This adds an explicit warning for this case, and points to
a URL holding an explanation of why this is wrong and what
to do about it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/kernel.h     | 6 ++++++
 scripts/headers_install.pl | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d3cd23f30039..f4e3184fa054 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -659,6 +659,12 @@ extern int do_sysinfo(struct sysinfo *info);
 
 #endif /* __KERNEL__ */
 
+#ifndef __EXPORTED_HEADERS__
+#ifndef __KERNEL__
+#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders
+#endif /* __KERNEL__ */
+#endif /* __EXPORTED_HEADERS__ */
+
 #define SI_LOAD_SHIFT	16
 struct sysinfo {
 	long uptime;			/* Seconds since boot */
diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
index c6ae4052ab43..b89ca2c58fdb 100644
--- a/scripts/headers_install.pl
+++ b/scripts/headers_install.pl
@@ -20,7 +20,7 @@ use strict;
 
 my ($readdir, $installdir, $arch, @files) = @ARGV;
 
-my $unifdef = "scripts/unifdef -U__KERNEL__";
+my $unifdef = "scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__";
 
 foreach my $file (@files) {
 	local *INFILE;
-- 
cgit v1.2.3


From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 4 Oct 2009 21:49:47 +0900
Subject: LSM: Add security_path_chmod() and security_path_chown().

This patch allows pathname based LSM modules to check chmod()/chown()
operations. Since notify_change() does not receive "struct vfsmount *",
we add security_path_chmod() and security_path_chown() to the caller of
notify_change().

These hooks are used by TOMOYO.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/open.c                | 24 ++++++++++++++++++++----
 include/linux/security.h | 30 ++++++++++++++++++++++++++++++
 security/capability.c    | 13 +++++++++++++
 security/security.c      | 15 +++++++++++++++
 4 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 4f01e06227c6..b5c294d35bd1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -616,6 +616,9 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
+	err = security_path_chmod(dentry, file->f_vfsmnt, mode);
+	if (err)
+		goto out_drop_write;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -623,6 +626,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	err = notify_change(dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
+out_drop_write:
 	mnt_drop_write(file->f_path.mnt);
 out_putf:
 	fput(file);
@@ -645,6 +649,9 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
+	error = security_path_chmod(path.dentry, path.mnt, mode);
+	if (error)
+		goto out_drop_write;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -652,6 +659,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	error = notify_change(path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
+out_drop_write:
 	mnt_drop_write(path.mnt);
 dput_and_out:
 	path_put(&path);
@@ -700,7 +708,9 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -725,7 +735,9 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -744,7 +756,9 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -767,7 +781,9 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 		goto out_fput;
 	dentry = file->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = chown_common(dentry, user, group);
+	error = security_path_chown(&file->f_path, user, group);
+	if (!error)
+		error = chown_common(dentry, user, group);
 	mnt_drop_write(file->f_path.mnt);
 out_fput:
 	fput(file);
diff --git a/include/linux/security.h b/include/linux/security.h
index 239e40d0450b..c8a584c26f7b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new_dir contains the path structure for parent of the new link.
  *	@new_dentry contains the dentry structure of the new link.
  *	Return 0 if permission is granted.
+ * @path_chmod:
+ *	Check for permission to change DAC's permission of a file or directory.
+ *	@dentry contains the dentry structure.
+ *	@mnt contains the vfsmnt structure.
+ *	@mode contains DAC's mode.
+ *	Return 0 if permission is granted.
+ * @path_chown:
+ *	Check for permission to change owner/group of a file or directory.
+ *	@path contains the path structure.
+ *	@uid contains new owner's ID.
+ *	@gid contains new group's ID.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -1488,6 +1500,9 @@ struct security_operations {
 			  struct dentry *new_dentry);
 	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
 			    struct path *new_dir, struct dentry *new_dentry);
+	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
+			   mode_t mode);
+	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
 #endif
 
 	int (*inode_alloc_security) (struct inode *inode);
@@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 		       struct dentry *new_dentry);
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 			 struct path *new_dir, struct dentry *new_dentry);
+int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			mode_t mode);
+int security_path_chown(struct path *path, uid_t uid, gid_t gid);
 #else	/* CONFIG_SECURITY_PATH */
 static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
 {
@@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir,
 {
 	return 0;
 }
+
+static inline int security_path_chmod(struct dentry *dentry,
+				      struct vfsmount *mnt,
+				      mode_t mode)
+{
+	return 0;
+}
+
+static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_PATH */
 
 #ifdef CONFIG_KEYS
diff --git a/security/capability.c b/security/capability.c
index fce07a7bc825..09279a8d4a14 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -308,6 +308,17 @@ static int cap_path_truncate(struct path *path, loff_t length,
 {
 	return 0;
 }
+
+static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			  mode_t mode)
+{
+	return 0;
+}
+
+static int cap_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	return 0;
+}
 #endif
 
 static int cap_file_permission(struct file *file, int mask)
@@ -977,6 +988,8 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, path_link);
 	set_to_cap_if_null(ops, path_rename);
 	set_to_cap_if_null(ops, path_truncate);
+	set_to_cap_if_null(ops, path_chmod);
+	set_to_cap_if_null(ops, path_chown);
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
diff --git a/security/security.c b/security/security.c
index c4c673240c1c..5259270e558f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -434,6 +434,21 @@ int security_path_truncate(struct path *path, loff_t length,
 		return 0;
 	return security_ops->path_truncate(path, length, time_attrs);
 }
+
+int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			mode_t mode)
+{
+	if (unlikely(IS_PRIVATE(dentry->d_inode)))
+		return 0;
+	return security_ops->path_chmod(dentry, mnt, mode);
+}
+
+int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_chown(path, uid, gid);
+}
 #endif
 
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
-- 
cgit v1.2.3


From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 4 Oct 2009 21:49:48 +0900
Subject: LSM: Add security_path_chroot().

This patch allows pathname based LSM modules to check chroot() operations.

This hook is used by TOMOYO.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/open.c                |  3 +++
 include/linux/security.h | 11 +++++++++++
 security/capability.c    |  6 ++++++
 security/security.c      |  5 +++++
 4 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index b5c294d35bd1..201041dfca57 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
 	error = -EPERM;
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
+	error = security_path_chroot(&path);
+	if (error)
+		goto dput_and_out;
 
 	set_fs_root(current->fs, &path);
 	error = 0;
diff --git a/include/linux/security.h b/include/linux/security.h
index c8a584c26f7b..ed0faea60b82 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@uid contains new owner's ID.
  *	@gid contains new group's ID.
  *	Return 0 if permission is granted.
+ * @path_chroot:
+ *	Check for permission to change root directory.
+ *	@path contains the path structure.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -1503,6 +1507,7 @@ struct security_operations {
 	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
 			   mode_t mode);
 	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
+	int (*path_chroot) (struct path *path);
 #endif
 
 	int (*inode_alloc_security) (struct inode *inode);
@@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
 			mode_t mode);
 int security_path_chown(struct path *path, uid_t uid, gid_t gid);
+int security_path_chroot(struct path *path);
 #else	/* CONFIG_SECURITY_PATH */
 static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
 {
@@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
 	return 0;
 }
+
+static inline int security_path_chroot(struct path *path)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_PATH */
 
 #ifdef CONFIG_KEYS
diff --git a/security/capability.c b/security/capability.c
index 09279a8d4a14..4f3ab476937f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -319,6 +319,11 @@ static int cap_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
 	return 0;
 }
+
+static int cap_path_chroot(struct path *root)
+{
+	return 0;
+}
 #endif
 
 static int cap_file_permission(struct file *file, int mask)
@@ -990,6 +995,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, path_truncate);
 	set_to_cap_if_null(ops, path_chmod);
 	set_to_cap_if_null(ops, path_chown);
+	set_to_cap_if_null(ops, path_chroot);
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
diff --git a/security/security.c b/security/security.c
index 5259270e558f..279757314a05 100644
--- a/security/security.c
+++ b/security/security.c
@@ -449,6 +449,11 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
 		return 0;
 	return security_ops->path_chown(path, uid, gid);
 }
+
+int security_path_chroot(struct path *path)
+{
+	return security_ops->path_chroot(path);
+}
 #endif
 
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
-- 
cgit v1.2.3


From d5e63bded6e819ca77ee1a1d97c783a31f6caf30 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 Oct 2009 03:00:31 -0700
Subject: Revert "af_packet: add interframe drop cmsg (v6)"

This reverts commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab.

Neil is reimplementing this generically, outside of AF_PACKET.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  2 --
 net/packet/af_packet.c    | 33 ---------------------------------
 2 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index e5d200f53fc3..dea7d6b7cf98 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,13 +48,11 @@ struct sockaddr_ll
 #define PACKET_RESERVE			12
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
-#define PACKET_GAPDATA			15
 
 struct tpacket_stats
 {
 	unsigned int	tp_packets;
 	unsigned int	tp_drops;
-	unsigned int    tp_gap;
 };
 
 struct tpacket_auxdata
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 70073a0dea5d..f87ed4803c11 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -524,31 +524,6 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	return res;
 }
 
-/*
- * If we've lost frames since the last time we queued one to the
- * sk_receive_queue, we need to record it here.
- * This must be called under the protection of the socket lock
- * to prevent racing with other softirqs and user space
- */
-static inline void record_packet_gap(struct sk_buff *skb,
-					struct packet_sock *po)
-{
-	/*
-	 * We overload the mark field here, since we're about
-	 * to enqueue to a receive queue and no body else will
-	 * use this field at this point
-	 */
-	skb->mark = po->stats.tp_gap;
-	po->stats.tp_gap = 0;
-	return;
-
-}
-
-static inline __u32 check_packet_gap(struct sk_buff *skb)
-{
-	return skb->mark;
-}
-
 /*
    This function makes lazy skb cloning in hope that most of packets
    are discarded by BPF.
@@ -652,7 +627,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
-	record_packet_gap(skb, po);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
@@ -661,7 +635,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 drop_n_acct:
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_drops++;
-	po->stats.tp_gap++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
@@ -839,7 +812,6 @@ drop:
 
 ring_is_full:
 	po->stats.tp_drops++;
-	po->stats.tp_gap++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 	sk->sk_data_ready(sk, 0);
@@ -1449,7 +1421,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
-	__u32 gap;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1528,10 +1499,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
 
-	gap = check_packet_gap(skb);
-	if (gap)
-		put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
-
 	/*
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
-- 
cgit v1.2.3


From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 12 Oct 2009 13:26:31 -0700
Subject: net: Generalize socket rx gap / receive queue overflow cmsg

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h   |  2 ++
 arch/arm/include/asm/socket.h     |  2 ++
 arch/avr32/include/asm/socket.h   |  2 ++
 arch/cris/include/asm/socket.h    |  2 ++
 arch/frv/include/asm/socket.h     |  2 ++
 arch/h8300/include/asm/socket.h   |  2 ++
 arch/ia64/include/asm/socket.h    |  2 ++
 arch/m32r/include/asm/socket.h    |  2 ++
 arch/m68k/include/asm/socket.h    |  2 ++
 arch/mips/include/asm/socket.h    |  2 ++
 arch/mn10300/include/asm/socket.h |  2 ++
 arch/parisc/include/asm/socket.h  |  2 ++
 arch/powerpc/include/asm/socket.h |  2 ++
 arch/s390/include/asm/socket.h    |  2 ++
 arch/sparc/include/asm/socket.h   |  2 ++
 arch/xtensa/include/asm/socket.h  |  2 ++
 include/asm-generic/socket.h      |  1 +
 include/linux/skbuff.h            |  6 ++++--
 include/net/sock.h                |  3 +++
 net/atm/common.c                  |  2 +-
 net/bluetooth/af_bluetooth.c      |  2 +-
 net/bluetooth/rfcomm/sock.c       |  2 +-
 net/can/bcm.c                     |  2 +-
 net/can/raw.c                     |  2 +-
 net/core/sock.c                   | 17 ++++++++++++++++-
 net/ieee802154/dgram.c            |  2 +-
 net/ieee802154/raw.c              |  2 +-
 net/ipv4/raw.c                    |  2 +-
 net/ipv4/udp.c                    |  2 +-
 net/ipv6/raw.c                    |  2 +-
 net/ipv6/udp.c                    |  2 +-
 net/key/af_key.c                  |  2 +-
 net/packet/af_packet.c            |  7 +++----
 net/rxrpc/ar-recvmsg.c            |  2 +-
 net/sctp/socket.c                 |  2 +-
 net/socket.c                      | 15 +++++++++++++++
 36 files changed, 88 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 26773e3246e2..06edfefc3373 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -67,6 +67,8 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_RXQ_OVFL             40
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 92ac61d294fd..90ffd04b8e74 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index fe863f9794d5..c8d1fae49476 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 45ec49bdb7b1..1a4a61909ca8 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -62,6 +62,8 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 2dea726095c2..a6b26880c1ec 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -60,5 +60,7 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 1547f01c8e22..04c0f4596eb5 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 0b0d5ff062e5..51427eaa51ba 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -69,4 +69,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index 3390a864f224..469787c30098 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index eee01cce921b..9bf49c87d954 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index ae05accd9fe4..9de5190f2487 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -80,6 +80,8 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_RXQ_OVFL             40
+
 #ifdef __KERNEL__
 
 /** sock_type - Socket types
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 4df75af29d76..4e60c4281288 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -60,4 +60,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 960b1e5d8e16..225b7d6a1a0a 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -59,6 +59,8 @@
 #define SO_TIMESTAMPING		0x4020
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_RXQ_OVFL             0x4021
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 3ab8b3e6feb0..866f7606da68 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -67,4 +67,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index e42df89a0b85..fdff1e995c73 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -68,4 +68,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 3a5ae3d12088..9d3fefcff2f5 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -56,6 +56,8 @@
 #define SO_TIMESTAMPING		0x0023
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_RXQ_OVFL             0x0024
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index beb3a6bdb61d..cbdf2ffaacff 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -71,4 +71,6 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991cef6f0..9a6115e7cf63 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23ac66e6..8c866b5cb97b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 98398bdec57d..10669b01eeab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -505,6 +505,7 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
+	SOCK_RXQ_OVFL,
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1493,6 +1494,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		sk->sk_stamp = kt;
 }
 
+extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
+
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @msg:	outgoing packet
diff --git a/net/atm/common.c b/net/atm/common.c
index 950bd16d2383..d61e051e0a3f 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -496,7 +496,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (error)
 		return error;
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
 	atm_return(vcc, skb->truesize);
 	skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1f6e49c1cde8..399e59c9c6cb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -257,7 +257,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err == 0)
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c70786503850..d3bfc1b0afb1 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -703,7 +703,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied += chunk;
 		size   -= chunk;
 
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 		if (!(flags & MSG_PEEK)) {
 			atomic_sub(chunk, &sk->sk_rmem_alloc);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 597da4f8f888..2f47039c79dd 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1534,7 +1534,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/can/raw.c b/net/can/raw.c
index b5e897922d32..962fc9f1d0c7 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -702,7 +702,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6aacd68..43ca2c995393 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -276,6 +276,8 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err = 0;
 	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
 
 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
@@ -305,7 +307,10 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +707,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			sock_set_flag(sk, SOCK_RXQ_OVFL);
+		else
+			sock_reset_flag(sk, SOCK_RXQ_OVFL);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +912,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index a413b1bf4465..25ad956a39d8 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -303,7 +303,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 30e74eee07d6..769c8d138fc3 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -191,7 +191,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 757c9171e7c2..f18172b07611 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 194bcdc6d9fc..71e5353b30c8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -955,7 +955,7 @@ try_again:
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4f24570b0869..d8375bc7f2d5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -497,7 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			sin6->sin6_scope_id = IP6CB(skb)->iif;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
 		datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ff778c172ef2..1f8e2afa4490 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -252,7 +252,7 @@ try_again:
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c078ae6e975b..472f6594184a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3606,7 +3606,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	err = (flags & MSG_TRUNC) ? skb->len : copied;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f87ed4803c11..bf3a2954cd4d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -627,15 +627,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
 	return 0;
 
 drop_n_acct:
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
@@ -1478,7 +1477,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index a39bf97f8830..60c2b94e6b54 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -146,7 +146,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 				memcpy(msg->msg_name,
 				       &call->conn->trans->peer->srx,
 				       sizeof(call->conn->trans->peer->srx));
-			sock_recv_timestamp(msg, &rx->sk, skb);
+			sock_recv_ts_and_drops(msg, &rx->sk, skb);
 		}
 
 		/* receive the message */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c8d05758661d..0970e92c6acd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,7 +1958,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
diff --git a/net/socket.c b/net/socket.c
index 954f3381cc8a..807935693846 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -668,6 +668,21 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+{
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
+			sizeof(__u32), &skb->dropcount);
+}
+
+void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_drops(msg, sk, skb);
+}
+EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
-- 
cgit v1.2.3


From a2e2725541fad72416326798c2d7fa4dafb7d337 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Oct 2009 23:40:10 -0700
Subject: net: Introduce recvmmsg socket syscall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/kernel/systbls.S            |   1 +
 arch/arm/kernel/calls.S                |   1 +
 arch/avr32/kernel/syscall_table.S      |   1 +
 arch/blackfin/mach-common/entry.S      |   1 +
 arch/ia64/kernel/entry.S               |   1 +
 arch/microblaze/kernel/syscall_table.S |   1 +
 arch/mips/kernel/scall32-o32.S         |   1 +
 arch/mips/kernel/scall64-64.S          |   1 +
 arch/mips/kernel/scall64-n32.S         |   1 +
 arch/mips/kernel/scall64-o32.S         |   1 +
 arch/sh/kernel/syscalls_64.S           |   1 +
 arch/sparc/kernel/systbls_32.S         |   2 +-
 arch/sparc/kernel/systbls_64.S         |   4 +-
 arch/x86/ia32/ia32entry.S              |   1 +
 arch/x86/include/asm/unistd_32.h       |   3 +-
 arch/x86/include/asm/unistd_64.h       |   2 +
 arch/x86/kernel/syscall_table_32.S     |   1 +
 arch/xtensa/include/asm/unistd.h       |   4 +-
 include/linux/net.h                    |   1 +
 include/linux/socket.h                 |  10 ++
 include/linux/syscalls.h               |   4 +
 include/net/compat.h                   |   8 ++
 kernel/sys_ni.c                        |   2 +
 net/compat.c                           |  33 ++++-
 net/socket.c                           | 225 +++++++++++++++++++++++++++------
 25 files changed, 261 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 95c9aef1c106..cda6b8b3d573 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -497,6 +497,7 @@ sys_call_table:
 	.quad sys_signalfd
 	.quad sys_ni_syscall
 	.quad sys_eventfd
+	.quad sys_recvmmsg
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index fafce1b5c69f..f58c1156e779 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -374,6 +374,7 @@
 		CALL(sys_pwritev)
 		CALL(sys_rt_tgsigqueueinfo)
 		CALL(sys_perf_event_open)
+/* 365 */	CALL(sys_recvmmsg)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 7ee0057613b3..e76bad16b0f0 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -295,4 +295,5 @@ sys_call_table:
 	.long	sys_signalfd
 	.long	sys_ni_syscall		/* 280, was sys_timerfd */
 	.long	sys_eventfd
+	.long	sys_recvmmsg
 	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 1e7cac23e25f..48692724b74c 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table)
 	.long _sys_pwritev
 	.long _sys_rt_tgsigqueueinfo
 	.long _sys_perf_event_open
+	.long _sys_recvmmsg		/* 370 */
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index d0e7d37017b4..d75b872ca4dc 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1806,6 +1806,7 @@ sys_call_table:
 	data8 sys_preadv
 	data8 sys_pwritev			// 1320
 	data8 sys_rt_tgsigqueueinfo
+	data8 sys_recvmmsg
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index ecec19155135..c1ab1dc10898 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -371,3 +371,4 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall
 	.long sys_rt_tgsigqueueinfo	/* 365 */
 	.long sys_perf_event_open
+	.long sys_recvmmsg
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index fd2a9bb620d6..17202bbe843f 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -583,6 +583,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_rt_tgsigqueueinfo	4
 	sys	sys_perf_event_open	5
 	sys	sys_accept4		4
+	sys     sys_recvmmsg            5
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 18bf7f32c5e4..a8a6c596eb04 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -420,4 +420,5 @@ sys_call_table:
 	PTR	sys_rt_tgsigqueueinfo
 	PTR	sys_perf_event_open
 	PTR	sys_accept4
+	PTR     sys_recvmmsg
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 6ebc07976694..5154e64f7cfe 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -418,4 +418,5 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_rt_tgsigqueueinfo	/* 5295 */
 	PTR	sys_perf_event_open
 	PTR	sys_accept4
+	PTR     compat_sys_recvmmsg
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9bbf9775e0bd..d0eff53d7cb9 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -538,4 +538,5 @@ sys_call_table:
 	PTR	compat_sys_rt_tgsigqueueinfo
 	PTR	sys_perf_event_open
 	PTR	sys_accept4
+	PTR     compat_sys_recvmmsg
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 5bfde6c77498..07d2aaea9ae8 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -391,3 +391,4 @@ sys_call_table:
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo
 	.long sys_perf_event_open
+	.long sys_recvmmsg		/* 365 */
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 0f1658d37490..ceb1530f8aa6 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
 
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 009825f6e73c..f37bef747e60 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
-	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg
 
 #endif /* CONFIG_COMPAT */
 
@@ -158,4 +158,4 @@ sys_call_table:
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..11a6c79d5f46 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -832,4 +832,5 @@ ia32_sys_call_table:
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
 	.quad sys_perf_event_open
+	.quad compat_sys_recvmmsg
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c209a7e3..3baf379fa840 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,11 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_recvmmsg		337
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 338
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0adbc68..4843f7ba754a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open			298
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg				299
+__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0157cd26d7cc..70c2125d55b9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
+	.long sys_recvmmsg
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index c092c8fbb2cf..4e55dc763021 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3)
 __SYSCALL(305, sys_ni_syscall, 0)
 #define __NR_eventfd				306
 __SYSCALL(306, sys_eventfd, 1)
+#define __NR_recvmmsg				307
+__SYSCALL(307, sys_recvmmsg, 5)
 
-#define __NR_syscall_count			307
+#define __NR_syscall_count			308
 
 /*
  * sysxtensa syscall handler
diff --git a/include/linux/net.h b/include/linux/net.h
index 529a0931711d..b42bb60fe92f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,7 @@
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
 #define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
+#define SYS_RECVMMSG	19		/* sys_recvmmsg(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3273a0c5043b..59966f12990c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -65,6 +65,12 @@ struct msghdr {
 	unsigned	msg_flags;
 };
 
+/* For recvmmsg/sendmmsg */
+struct mmsghdr {
+	struct msghdr   msg_hdr;
+	unsigned        msg_len;
+};
+
 /*
  *	POSIX 1003.1g - ancillary data object information
  *	Ancillary data consits of a sequence of pairs of
@@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
+struct timespec;
+
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+			  unsigned int flags, struct timespec *timeout);
 #endif
 #endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace1a838..714f063a3e6d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -25,6 +25,7 @@ struct linux_dirent64;
 struct list_head;
 struct msgbuf;
 struct msghdr;
+struct mmsghdr;
 struct msqid_ds;
 struct new_utsname;
 struct nfsctl_arg;
@@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
 asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int __user *);
 asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
+			     unsigned int vlen, unsigned flags,
+			     struct timespec __user *timeout);
 asmlinkage long sys_socket(int, int, int);
 asmlinkage long sys_socketpair(int, int, int, int __user *);
 asmlinkage long sys_socketcall(int call, unsigned long __user *args);
diff --git a/include/net/compat.h b/include/net/compat.h
index 7c3002832d05..9679f05e9896 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -18,6 +18,11 @@ struct compat_msghdr {
 	compat_uint_t	msg_flags;
 };
 
+struct compat_mmsghdr {
+	struct compat_msghdr msg_hdr;
+	compat_uint_t        msg_len;
+};
+
 struct compat_cmsghdr {
 	compat_size_t	cmsg_len;
 	compat_int_t	cmsg_level;
@@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
 extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
 extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
+extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
+					   unsigned, unsigned,
+					   struct timespec __user *);
 extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
 extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
 
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e06d0b8d1951..f050ba85d420 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -48,8 +48,10 @@ cond_syscall(sys_shutdown);
 cond_syscall(sys_sendmsg);
 cond_syscall(compat_sys_sendmsg);
 cond_syscall(sys_recvmsg);
+cond_syscall(sys_recvmmsg);
 cond_syscall(compat_sys_recvmsg);
 cond_syscall(compat_sys_recvfrom);
+cond_syscall(compat_sys_recvmmsg);
 cond_syscall(sys_socketcall);
 cond_syscall(sys_futex);
 cond_syscall(compat_sys_futex);
diff --git a/net/compat.c b/net/compat.c
index a407c3addbae..e13f5256fd20 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(4)};
+				AL(4),AL(5)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
 	return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
 }
 
+asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags,
+				    struct timespec __user *timeout)
+{
+	int datagrams;
+	struct timespec ktspec;
+	struct compat_timespec __user *utspec =
+			(struct compat_timespec __user *)timeout;
+
+	if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
+	    get_user(ktspec.tv_nsec, &utspec->tv_nsec))
+		return -EFAULT;
+
+	datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+				   flags | MSG_CMSG_COMPAT, &ktspec);
+	if (datagrams > 0 &&
+	    (put_user(ktspec.tv_sec, &utspec->tv_sec) ||
+	     put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
+		datagrams = -EFAULT;
+
+	return datagrams;
+}
+
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_ACCEPT4)
+	if (call < SYS_SOCKET || call > SYS_RECVMMSG)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_RECVMMSG:
+		ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
+					  compat_ptr(a[4]));
+		break;
 	case SYS_ACCEPT4:
 		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
 		break;
diff --git a/net/socket.c b/net/socket.c
index 807935693846..9dff31c9b799 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
 
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size, int flags)
+static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
+				       struct msghdr *msg, size_t size, int flags)
 {
-	int err;
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 
 	si->sock = sock;
@@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	si->size = size;
 	si->flags = flags;
 
-	err = security_socket_recvmsg(sock, msg, size, flags);
-	if (err)
-		return err;
-
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size, int flags)
+{
+	int err = security_socket_recvmsg(sock, msg, size, flags);
+
+	return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
+}
+
 int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
@@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	return ret;
 }
 
+static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
+			      size_t size, int flags)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
@@ -1983,22 +2001,15 @@ out:
 	return err;
 }
 
-/*
- *	BSD recvmsg interface
- */
-
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
-		unsigned int, flags)
+static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
 	    (struct compat_msghdr __user *)msg;
-	struct socket *sock;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
-	struct msghdr msg_sys;
 	unsigned long cmsg_ptr;
 	int err, iov_size, total_len, len;
-	int fput_needed;
 
 	/* kernel mode address */
 	struct sockaddr_storage addr;
@@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
 	int __user *uaddr_len;
 
 	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(&msg_sys, msg_compat))
+		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
 	}
-	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
-	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock)
-		goto out;
-
 	err = -EMSGSIZE;
-	if (msg_sys.msg_iovlen > UIO_MAXIOV)
-		goto out_put;
+	if (msg_sys->msg_iovlen > UIO_MAXIOV)
+		goto out;
 
 	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
-	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+	iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
+	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 		if (!iov)
-			goto out_put;
+			goto out;
 	}
 
 	/*
@@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
 	 *      kernel msghdr to use the kernel address space)
 	 */
 
-	uaddr = (__force void __user *)msg_sys.msg_name;
+	uaddr = (__force void __user *)msg_sys->msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov,
+		err = verify_compat_iovec(msg_sys, iov,
 					  (struct sockaddr *)&addr,
 					  VERIFY_WRITE);
 	} else
-		err = verify_iovec(&msg_sys, iov,
+		err = verify_iovec(msg_sys, iov,
 				   (struct sockaddr *)&addr,
 				   VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
 
-	cmsg_ptr = (unsigned long)msg_sys.msg_control;
-	msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
+	cmsg_ptr = (unsigned long)msg_sys->msg_control;
+	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
-	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
+	err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
+							  total_len, flags);
 	if (err < 0)
 		goto out_freeiov;
 	len = err;
 
 	if (uaddr != NULL) {
 		err = move_addr_to_user((struct sockaddr *)&addr,
-					msg_sys.msg_namelen, uaddr,
+					msg_sys->msg_namelen, uaddr,
 					uaddr_len);
 		if (err < 0)
 			goto out_freeiov;
 	}
-	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
+	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
 			 COMPAT_FLAGS(msg));
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)
-		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
 				 &msg_compat->msg_controllen);
 	else
-		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
 				 &msg->msg_controllen);
 	if (err)
 		goto out_freeiov;
@@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
-out_put:
+out:
+	return err;
+}
+
+/*
+ *	BSD recvmsg interface
+ */
+
+SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+		unsigned int, flags)
+{
+	int fput_needed, err;
+	struct msghdr msg_sys;
+	struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+
+	if (!sock)
+		goto out;
+
+	err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
-#ifdef __ARCH_WANT_SYS_SOCKETCALL
+/*
+ *     Linux recvmmsg interface
+ */
+
+int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+		   unsigned int flags, struct timespec *timeout)
+{
+	int fput_needed, err, datagrams;
+	struct socket *sock;
+	struct mmsghdr __user *entry;
+	struct msghdr msg_sys;
+	struct timespec end_time;
+
+	if (timeout &&
+	    poll_select_set_timeout(&end_time, timeout->tv_sec,
+				    timeout->tv_nsec))
+		return -EINVAL;
+
+	datagrams = 0;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		return err;
+
+	err = sock_error(sock->sk);
+	if (err)
+		goto out_put;
+
+	entry = mmsg;
+
+	while (datagrams < vlen) {
+		/*
+		 * No need to ask LSM for more than the first datagram.
+		 */
+		err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
+				    &msg_sys, flags, datagrams);
+		if (err < 0)
+			break;
+		err = put_user(err, &entry->msg_len);
+		if (err)
+			break;
+		++entry;
+		++datagrams;
+
+		if (timeout) {
+			ktime_get_ts(timeout);
+			*timeout = timespec_sub(end_time, *timeout);
+			if (timeout->tv_sec < 0) {
+				timeout->tv_sec = timeout->tv_nsec = 0;
+				break;
+			}
+
+			/* Timeout, return less than vlen datagrams */
+			if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
+				break;
+		}
+
+		/* Out of band data, return right away */
+		if (msg_sys.msg_flags & MSG_OOB)
+			break;
+	}
+
+out_put:
+	fput_light(sock->file, fput_needed);
 
+	if (err == 0)
+		return datagrams;
+
+	if (datagrams != 0) {
+		/*
+		 * We may return less entries than requested (vlen) if the
+		 * sock is non block and there aren't enough datagrams...
+		 */
+		if (err != -EAGAIN) {
+			/*
+			 * ... or  if recvmsg returns an error after we
+			 * received some datagrams, where we record the
+			 * error to return on the next call or if the
+			 * app asks about it using getsockopt(SO_ERROR).
+			 */
+			sock->sk->sk_err = -err;
+		}
+
+		return datagrams;
+	}
+
+	return err;
+}
+
+SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
+		unsigned int, vlen, unsigned int, flags,
+		struct timespec __user *, timeout)
+{
+	int datagrams;
+	struct timespec timeout_sys;
+
+	if (!timeout)
+		return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
+
+	if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
+		return -EFAULT;
+
+	datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
+
+	if (datagrams > 0 &&
+	    copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
+		datagrams = -EFAULT;
+
+	return datagrams;
+}
+
+#ifdef __ARCH_WANT_SYS_SOCKETCALL
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[19]={
+static const unsigned char nargs[20] = {
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(4)
+	AL(4),AL(5)
 };
 
 #undef AL
@@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	int err;
 	unsigned int len;
 
-	if (call < 1 || call > SYS_ACCEPT4)
+	if (call < 1 || call > SYS_RECVMMSG)
 		return -EINVAL;
 
 	len = nargs[call];
@@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_RECVMMSG:
+		err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
+				   (struct timespec __user *)a[4]);
+		break;
 	case SYS_ACCEPT4:
 		err = sys_accept4(a0, (struct sockaddr __user *)a1,
 				  (int __user *)a[2], a[3]);
-- 
cgit v1.2.3


From 61321bbd6235ca9a40ba3bc249e8906cc66233c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Oct 2009 17:11:23 +0000
Subject: net: Add netdev_alloc_skb_ip_align() helper

Instead of hardcoding NET_IP_ALIGN stuff in various network drivers,
we can add a helper around netdev_alloc_skb()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8c866b5cb97b..0c68fbd6faac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1491,6 +1491,16 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
 	return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
 }
 
+static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
+		unsigned int length)
+{
+	struct sk_buff *skb = netdev_alloc_skb(dev, length + NET_IP_ALIGN);
+
+	if (NET_IP_ALIGN && skb)
+		skb_reserve(skb, NET_IP_ALIGN);
+	return skb;
+}
+
 extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask);
 
 /**
-- 
cgit v1.2.3


From a6e4bc5304033e434fabccabb230b8e9ff55d76f Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Thu, 8 Oct 2009 22:17:11 +0000
Subject: can: make the number of echo skb's configurable

This patch allows the CAN controller driver to define the number of echo
skb's used for the local loopback (echo), as suggested by Kurt Van
Dijck, with the function:

  struct net_device *alloc_candev(int sizeof_priv,
                                  unsigned int echo_skb_max);

The CAN drivers have been adapted accordingly. For the ems_usb driver,
as suggested by Sebastian Haas, the number of echo skb's has been
increased to 10, which improves the transmission performance a lot.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  2 +-
 drivers/net/can/dev.c             | 32 ++++++++++++++++++++++++++------
 drivers/net/can/sja1000/sja1000.c |  3 ++-
 drivers/net/can/sja1000/sja1000.h |  2 ++
 drivers/net/can/ti_hecc.c         |  6 +-----
 drivers/net/can/usb/ems_usb.c     |  4 ++--
 include/linux/can/dev.h           | 16 ++++++++--------
 7 files changed, 42 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index f67ae285a35a..b13fd9114130 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1087,7 +1087,7 @@ static int __init at91_can_probe(struct platform_device *pdev)
 		goto exit_release;
 	}
 
-	dev = alloc_candev(sizeof(struct at91_priv));
+	dev = alloc_candev(sizeof(struct at91_priv), AT91_MB_TX_NUM);
 	if (!dev) {
 		err = -ENOMEM;
 		goto exit_iounmap;
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index f0b9a1e1db46..39b99f57c265 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -245,7 +245,7 @@ static void can_flush_echo_skb(struct net_device *dev)
 	struct net_device_stats *stats = &dev->stats;
 	int i;
 
-	for (i = 0; i < CAN_ECHO_SKB_MAX; i++) {
+	for (i = 0; i < priv->echo_skb_max; i++) {
 		if (priv->echo_skb[i]) {
 			kfree_skb(priv->echo_skb[i]);
 			priv->echo_skb[i] = NULL;
@@ -262,10 +262,13 @@ static void can_flush_echo_skb(struct net_device *dev)
  * of the device driver. The driver must protect access to
  * priv->echo_skb, if necessary.
  */
-void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx)
+void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
+		      unsigned int idx)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
+	BUG_ON(idx >= priv->echo_skb_max);
+
 	/* check flag whether this packet has to be looped back */
 	if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK) {
 		kfree_skb(skb);
@@ -311,10 +314,12 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb);
  * is handled in the device driver. The driver must protect
  * access to priv->echo_skb, if necessary.
  */
-void can_get_echo_skb(struct net_device *dev, int idx)
+void can_get_echo_skb(struct net_device *dev, unsigned int idx)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
+	BUG_ON(idx >= priv->echo_skb_max);
+
 	if (priv->echo_skb[idx]) {
 		netif_rx(priv->echo_skb[idx]);
 		priv->echo_skb[idx] = NULL;
@@ -327,10 +332,12 @@ EXPORT_SYMBOL_GPL(can_get_echo_skb);
   *
   * The function is typically called when TX failed.
   */
-void can_free_echo_skb(struct net_device *dev, int idx)
+void can_free_echo_skb(struct net_device *dev, unsigned int idx)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
+	BUG_ON(idx >= priv->echo_skb_max);
+
 	if (priv->echo_skb[idx]) {
 		kfree_skb(priv->echo_skb[idx]);
 		priv->echo_skb[idx] = NULL;
@@ -445,17 +452,30 @@ static void can_setup(struct net_device *dev)
 /*
  * Allocate and setup space for the CAN network device
  */
-struct net_device *alloc_candev(int sizeof_priv)
+struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 {
 	struct net_device *dev;
 	struct can_priv *priv;
+	int size;
 
-	dev = alloc_netdev(sizeof_priv, "can%d", can_setup);
+	if (echo_skb_max)
+		size = ALIGN(sizeof_priv, sizeof(struct sk_buff *)) +
+			echo_skb_max * sizeof(struct sk_buff *);
+	else
+		size = sizeof_priv;
+
+	dev = alloc_netdev(size, "can%d", can_setup);
 	if (!dev)
 		return NULL;
 
 	priv = netdev_priv(dev);
 
+	if (echo_skb_max) {
+		priv->echo_skb_max = echo_skb_max;
+		priv->echo_skb = (void *)priv +
+			ALIGN(sizeof_priv, sizeof(struct sk_buff *));
+	}
+
 	priv->state = CAN_STATE_STOPPED;
 
 	init_timer(&priv->restart_timer);
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 16d2ecd2a3b7..96d8be4253f8 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -565,7 +565,8 @@ struct net_device *alloc_sja1000dev(int sizeof_priv)
 	struct net_device *dev;
 	struct sja1000_priv *priv;
 
-	dev = alloc_candev(sizeof(struct sja1000_priv) + sizeof_priv);
+	dev = alloc_candev(sizeof(struct sja1000_priv) + sizeof_priv,
+		SJA1000_ECHO_SKB_MAX);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/net/can/sja1000/sja1000.h b/drivers/net/can/sja1000/sja1000.h
index 302d2c763ad7..97a622b9302f 100644
--- a/drivers/net/can/sja1000/sja1000.h
+++ b/drivers/net/can/sja1000/sja1000.h
@@ -50,6 +50,8 @@
 #include <linux/can/dev.h>
 #include <linux/can/platform/sja1000.h>
 
+#define SJA1000_ECHO_SKB_MAX	1 /* the SJA1000 has one TX buffer object */
+
 #define SJA1000_MAX_IRQ 20	/* max. number of interrupts handled in ISR */
 
 /* SJA1000 registers - manual section 6.4 (Pelican Mode) */
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 814e6c5c6386..23a7128e4eb7 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -74,10 +74,6 @@ MODULE_VERSION(HECC_MODULE_VERSION);
 #define HECC_MB_TX_SHIFT	2 /* as per table above */
 #define HECC_MAX_TX_MBOX	BIT(HECC_MB_TX_SHIFT)
 
-#if (HECC_MAX_TX_MBOX > CAN_ECHO_SKB_MAX)
-#error "HECC: MAX TX mailboxes should be equal or less than CAN_ECHO_SKB_MAX"
-#endif
-
 #define HECC_TX_PRIO_SHIFT	(HECC_MB_TX_SHIFT)
 #define HECC_TX_PRIO_MASK	(MAX_TX_PRIO << HECC_MB_TX_SHIFT)
 #define HECC_TX_MB_MASK		(HECC_MAX_TX_MBOX - 1)
@@ -902,7 +898,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
 		goto probe_exit_free_region;
 	}
 
-	ndev = alloc_candev(sizeof(struct ti_hecc_priv));
+	ndev = alloc_candev(sizeof(struct ti_hecc_priv), HECC_MAX_TX_MBOX);
 	if (!ndev) {
 		dev_err(&pdev->dev, "alloc_candev failed\n");
 		err = -ENOMEM;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 9012e0abc626..a65f56a9cd3d 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -232,7 +232,7 @@ MODULE_DEVICE_TABLE(usb, ems_usb_table);
 #define INTR_IN_BUFFER_SIZE 4
 
 #define MAX_RX_URBS 10
-#define MAX_TX_URBS CAN_ECHO_SKB_MAX
+#define MAX_TX_URBS 10
 
 struct ems_usb;
 
@@ -1012,7 +1012,7 @@ static int ems_usb_probe(struct usb_interface *intf,
 	struct ems_usb *dev;
 	int i, err = -ENOMEM;
 
-	netdev = alloc_candev(sizeof(struct ems_usb));
+	netdev = alloc_candev(sizeof(struct ems_usb), MAX_TX_URBS);
 	if (!netdev) {
 		dev_err(netdev->dev.parent, "Couldn't alloc candev\n");
 		return -ENOMEM;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5824b20b5fcb..1d3f7f00e3af 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -29,8 +29,6 @@ enum can_mode {
 /*
  * CAN common private data
  */
-#define CAN_ECHO_SKB_MAX  4
-
 struct can_priv {
 	struct can_device_stats can_stats;
 
@@ -44,15 +42,16 @@ struct can_priv {
 	int restart_ms;
 	struct timer_list restart_timer;
 
-	struct sk_buff *echo_skb[CAN_ECHO_SKB_MAX];
-
 	int (*do_set_bittiming)(struct net_device *dev);
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
+
+	unsigned int echo_skb_max;
+	struct sk_buff **echo_skb;
 };
 
-struct net_device *alloc_candev(int sizeof_priv);
+struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
 int open_candev(struct net_device *dev);
@@ -64,8 +63,9 @@ void unregister_candev(struct net_device *dev);
 int can_restart_now(struct net_device *dev);
 void can_bus_off(struct net_device *dev);
 
-void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx);
-void can_get_echo_skb(struct net_device *dev, int idx);
-void can_free_echo_skb(struct net_device *dev, int idx);
+void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
+		      unsigned int idx);
+void can_get_echo_skb(struct net_device *dev, unsigned int idx);
+void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
 #endif /* CAN_DEV_H */
-- 
cgit v1.2.3


From aace495933a981274b6491d71b915165a61defdc Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Tue, 13 Oct 2009 07:25:49 +0000
Subject: net: smsc911x: allow platform_data to specify mac address

Extend the driver to accept a MAC address specified in platform_data.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 3 +++
 include/linux/smsc911x.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index ccdd196f5297..6a9f51d1d9f2 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -2071,6 +2071,9 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	if (is_valid_ether_addr(dev->dev_addr)) {
 		smsc911x_set_hw_mac_address(pdata, dev->dev_addr);
 		SMSC_TRACE(PROBE, "MAC Address is specified by configuration");
+	} else if (is_valid_ether_addr(pdata->config.mac)) {
+		memcpy(dev->dev_addr, pdata->config.mac, 6);
+		SMSC_TRACE(PROBE, "MAC Address specified by platform data");
 	} else {
 		/* Try reading mac address from device. if EEPROM is present
 		 * it will already have been set */
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 5241e4fb4eca..7144e8aa1e41 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -30,6 +30,7 @@ struct smsc911x_platform_config {
 	unsigned int irq_type;
 	unsigned int flags;
 	phy_interface_t phy_interface;
+	unsigned char mac[6];
 };
 
 /* Constants for platform_device irq polarity configuration */
-- 
cgit v1.2.3


From 799e2205ec65e174f752b558c62a92c4752df313 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 9 Oct 2009 12:16:40 +0200
Subject: sched: Disable SD_PREFER_LOCAL for MC/CPU domains

Yanmin reported that both tbench and hackbench were significantly
hurt by trying to keep tasks local on these domains, esp on small
cache machines.

So disable it in order to promote spreading outside of the cache
domains.

Reported-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1255083400.8802.15.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 1 +
 include/linux/topology.h        | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 25a92842dd99..d823c245f63b 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[];
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index fc0bf3edeb67..57e63579bfdd 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -129,7 +129,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_PREFER_LOCAL			\
+				| 0*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
@@ -162,7 +162,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_PREFER_LOCAL			\
+				| 0*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-- 
cgit v1.2.3


From 43046b606673c9c991919ff75b980b72541e9ede Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 14 Oct 2009 09:16:42 -0700
Subject: workqueue: add 'flush_delayed_work()' to run and wait for delayed
 work

It basically turns a delayed work into an immediate work, and then waits
for it to finish.
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7ef0c7b94f31..cf24c20de9e4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -207,6 +207,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
+extern void flush_delayed_work(struct delayed_work *work);
 
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index addfe2df93b1..ccefe574dcf7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -639,6 +639,24 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
+/**
+ * flush_delayed_work - block until a dwork_struct's callback has terminated
+ * @dwork: the delayed work which is to be flushed
+ *
+ * Any timeout is cancelled, and any pending work is run immediately.
+ */
+void flush_delayed_work(struct delayed_work *dwork)
+{
+	if (del_timer(&dwork->timer)) {
+		struct cpu_workqueue_struct *cwq;
+		cwq = wq_per_cpu(keventd_wq, get_cpu());
+		__queue_work(cwq, &dwork->work);
+		put_cpu();
+	}
+	flush_work(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work);
+
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From 8e85973efc87dfae8508f1a3440fd44612897458 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 8 Oct 2009 00:41:59 +0200
Subject: firewire: optimize config ROM creation

The config ROM image of the local node was created in CPU byte order,
then a temporary big endian copy was created to compute the CRC, and
finally the card driver created its own big endian copy.

We now generate it in big endian byte order in the first place to avoid
one byte order conversion and the temporary on-stack copy of the ROM
image (1000 bytes stack usage in process context).  Furthermore, two
1000 bytes memset()s are replaced by one 1000 bytes - ROM length sized
memset.

The trivial fw_memcpy_{from,to}_be32() helpers are now superfluous and
removed.  The newly added __compute_block_crc() function will be folded
into fw_compute_block_crc() in a subsequent change.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c | 62 ++++++++++++++++++++++++++------------------
 drivers/firewire/core.h      |  7 ++---
 drivers/firewire/ohci.c      | 30 +++++++++++++--------
 include/linux/firewire.h     | 14 ----------
 4 files changed, 60 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 33898b63cdf7..f73e3bdfc84c 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -38,6 +38,18 @@
 
 #include "core.h"
 
+static int __compute_block_crc(__be32 *block)
+{
+	int length;
+	u16 crc;
+
+	length = (be32_to_cpu(block[0]) >> 16) & 0xff;
+	crc = crc_itu_t(0, (u8 *)&block[1], length * 4);
+	*block |= cpu_to_be32(crc);
+
+	return length;
+}
+
 int fw_compute_block_crc(u32 *block)
 {
 	__be32 be32_block[256];
@@ -72,11 +84,11 @@ static int descriptor_count;
 #define BIB_CMC			((1) << 30)
 #define BIB_IMC			((1) << 31)
 
-static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
+static __be32 *generate_config_rom(struct fw_card *card, size_t *rom_length)
 {
 	struct fw_descriptor *desc;
-	static u32 config_rom[256];
-	int i, j, length;
+	static __be32 config_rom[256];
+	int i, j, k, length;
 
 	/*
 	 * Initialize contents of config rom buffer.  On the OHCI
@@ -87,40 +99,39 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
 	 * the version stored in the OHCI registers.
 	 */
 
-	memset(config_rom, 0, sizeof(config_rom));
-	config_rom[0] = BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0);
-	config_rom[1] = 0x31333934;
-
-	config_rom[2] =
+	config_rom[0] = cpu_to_be32(
+		BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0));
+	config_rom[1] = cpu_to_be32(0x31333934);
+	config_rom[2] = cpu_to_be32(
 		BIB_LINK_SPEED(card->link_speed) |
 		BIB_GENERATION(card->config_rom_generation++ % 14 + 2) |
 		BIB_MAX_ROM(2) |
 		BIB_MAX_RECEIVE(card->max_receive) |
-		BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC;
-	config_rom[3] = card->guid >> 32;
-	config_rom[4] = card->guid;
+		BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC);
+	config_rom[3] = cpu_to_be32(card->guid >> 32);
+	config_rom[4] = cpu_to_be32(card->guid);
 
 	/* Generate root directory. */
-	i = 5;
-	config_rom[i++] = 0;
-	config_rom[i++] = 0x0c0083c0; /* node capabilities */
-	j = i + descriptor_count;
+	config_rom[6] = cpu_to_be32(0x0c0083c0); /* node capabilities */
+	i = 7;
+	j = 7 + descriptor_count;
 
 	/* Generate root directory entries for descriptors. */
 	list_for_each_entry (desc, &descriptor_list, link) {
 		if (desc->immediate > 0)
-			config_rom[i++] = desc->immediate;
-		config_rom[i] = desc->key | (j - i);
+			config_rom[i++] = cpu_to_be32(desc->immediate);
+		config_rom[i] = cpu_to_be32(desc->key | (j - i));
 		i++;
 		j += desc->length;
 	}
 
 	/* Update root directory length. */
-	config_rom[5] = (i - 5 - 1) << 16;
+	config_rom[5] = cpu_to_be32((i - 5 - 1) << 16);
 
 	/* End of root directory, now copy in descriptors. */
 	list_for_each_entry (desc, &descriptor_list, link) {
-		memcpy(&config_rom[i], desc->data, desc->length * 4);
+		for (k = 0; k < desc->length; k++)
+			config_rom[i + k] = cpu_to_be32(desc->data[k]);
 		i += desc->length;
 	}
 
@@ -129,9 +140,9 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
 	 * the bus info block, which is always the case for this
 	 * implementation. */
 	for (i = 0; i < j; i += length + 1)
-		length = fw_compute_block_crc(config_rom + i);
+		length = __compute_block_crc(config_rom + i);
 
-	*config_rom_length = j;
+	*rom_length = j;
 
 	return config_rom;
 }
@@ -139,7 +150,7 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
 static void update_config_roms(void)
 {
 	struct fw_card *card;
-	u32 *config_rom;
+	__be32 *config_rom;
 	size_t length;
 
 	list_for_each_entry (card, &card_list, link) {
@@ -432,7 +443,7 @@ EXPORT_SYMBOL(fw_card_initialize);
 int fw_card_add(struct fw_card *card,
 		u32 max_receive, u32 link_speed, u64 guid)
 {
-	u32 *config_rom;
+	__be32 *config_rom;
 	size_t length;
 	int ret;
 
@@ -462,7 +473,8 @@ EXPORT_SYMBOL(fw_card_add);
  * shutdown still need to be provided by the card driver.
  */
 
-static int dummy_enable(struct fw_card *card, u32 *config_rom, size_t length)
+static int dummy_enable(struct fw_card *card,
+			const __be32 *config_rom, size_t length)
 {
 	BUG();
 	return -1;
@@ -475,7 +487,7 @@ static int dummy_update_phy_reg(struct fw_card *card, int address,
 }
 
 static int dummy_set_config_rom(struct fw_card *card,
-				u32 *config_rom, size_t length)
+				const __be32 *config_rom, size_t length)
 {
 	/*
 	 * We take the card out of card_list before setting the dummy
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 7ff6e7585152..7adca7cb9f55 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -40,7 +40,8 @@ struct fw_card_driver {
 	 * enable the PHY or set the link_on bit and initiate a bus
 	 * reset.
 	 */
-	int (*enable)(struct fw_card *card, u32 *config_rom, size_t length);
+	int (*enable)(struct fw_card *card,
+		      const __be32 *config_rom, size_t length);
 
 	int (*update_phy_reg)(struct fw_card *card, int address,
 			      int clear_bits, int set_bits);
@@ -48,10 +49,10 @@ struct fw_card_driver {
 	/*
 	 * Update the config rom for an enabled card.  This function
 	 * should change the config rom that is presented on the bus
-	 * an initiate a bus reset.
+	 * and initiate a bus reset.
 	 */
 	int (*set_config_rom)(struct fw_card *card,
-			      u32 *config_rom, size_t length);
+			      const __be32 *config_rom, size_t length);
 
 	void (*send_request)(struct fw_card *card, struct fw_packet *packet);
 	void (*send_response)(struct fw_card *card, struct fw_packet *packet);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 5d524254499e..418415564791 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -205,7 +205,7 @@ struct fw_ohci {
 	dma_addr_t config_rom_bus;
 	__be32 *next_config_rom;
 	dma_addr_t next_config_rom_bus;
-	u32 next_header;
+	__be32 next_header;
 
 	struct ar_context ar_request_ctx;
 	struct ar_context ar_response_ctx;
@@ -1355,8 +1355,9 @@ static void bus_reset_tasklet(unsigned long data)
 		 */
 		reg_write(ohci, OHCI1394_BusOptions,
 			  be32_to_cpu(ohci->config_rom[2]));
-		ohci->config_rom[0] = cpu_to_be32(ohci->next_header);
-		reg_write(ohci, OHCI1394_ConfigROMhdr, ohci->next_header);
+		ohci->config_rom[0] = ohci->next_header;
+		reg_write(ohci, OHCI1394_ConfigROMhdr,
+			  be32_to_cpu(ohci->next_header));
 	}
 
 #ifdef CONFIG_FIREWIRE_OHCI_REMOTE_DMA
@@ -1464,7 +1465,17 @@ static int software_reset(struct fw_ohci *ohci)
 	return -EBUSY;
 }
 
-static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
+static void copy_config_rom(__be32 *dest, const __be32 *src, size_t length)
+{
+	size_t size = length * 4;
+
+	memcpy(dest, src, size);
+	if (size < CONFIG_ROM_SIZE)
+		memset(&dest[length], 0, CONFIG_ROM_SIZE - size);
+}
+
+static int ohci_enable(struct fw_card *card,
+		       const __be32 *config_rom, size_t length)
 {
 	struct fw_ohci *ohci = fw_ohci(card);
 	struct pci_dev *dev = to_pci_dev(card->device);
@@ -1565,8 +1576,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 		if (ohci->next_config_rom == NULL)
 			return -ENOMEM;
 
-		memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE);
-		fw_memcpy_to_be32(ohci->next_config_rom, config_rom, length * 4);
+		copy_config_rom(ohci->next_config_rom, config_rom, length);
 	} else {
 		/*
 		 * In the suspend case, config_rom is NULL, which
@@ -1576,7 +1586,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 		ohci->next_config_rom_bus = ohci->config_rom_bus;
 	}
 
-	ohci->next_header = be32_to_cpu(ohci->next_config_rom[0]);
+	ohci->next_header = ohci->next_config_rom[0];
 	ohci->next_config_rom[0] = 0;
 	reg_write(ohci, OHCI1394_ConfigROMhdr, 0);
 	reg_write(ohci, OHCI1394_BusOptions,
@@ -1610,7 +1620,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
 }
 
 static int ohci_set_config_rom(struct fw_card *card,
-			       u32 *config_rom, size_t length)
+			       const __be32 *config_rom, size_t length)
 {
 	struct fw_ohci *ohci;
 	unsigned long flags;
@@ -1659,9 +1669,7 @@ static int ohci_set_config_rom(struct fw_card *card,
 		ohci->next_config_rom = next_config_rom;
 		ohci->next_config_rom_bus = next_config_rom_bus;
 
-		memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE);
-		fw_memcpy_to_be32(ohci->next_config_rom, config_rom,
-				  length * 4);
+		copy_config_rom(ohci->next_config_rom, config_rom, length);
 
 		ohci->next_header = config_rom[0];
 		ohci->next_config_rom[0] = 0;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 7e1d4dec83e7..53b9217de86c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -20,20 +20,6 @@
 #define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
 #define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args)
 
-static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size)
-{
-	u32    *dst = _dst;
-	__be32 *src = _src;
-	int i;
-
-	for (i = 0; i < size / 4; i++)
-		dst[i] = be32_to_cpu(src[i]);
-}
-
-static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size)
-{
-	fw_memcpy_from_be32(_dst, _src, size);
-}
 #define CSR_REGISTER_BASE		0xfffff0000000ULL
 
 /* register offsets are relative to CSR_REGISTER_BASE */
-- 
cgit v1.2.3


From cb7c96da3651111efbe088fa12f9bed61836ea93 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 8 Oct 2009 00:42:53 +0200
Subject: firewire: core: optimize Topology Map creation

The Topology Map of the local node was created in CPU byte order,
then a temporary big endian copy was created to compute the CRC,
and when a read request to the Topology Map arrived it had to be
converted to big endian byte order again.

We now generate it in big endian byte order in the first place.
This also rids us of 1000 bytes stack usage in tasklet context.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        | 17 ++---------------
 drivers/firewire/core-topology.c    | 17 ++++++++++-------
 drivers/firewire/core-transaction.c |  9 ++-------
 drivers/firewire/core.h             |  2 +-
 include/linux/firewire.h            |  2 +-
 5 files changed, 16 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index f58130789990..7083bcc1b9c7 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -38,7 +38,7 @@
 
 #include "core.h"
 
-static int __compute_block_crc(__be32 *block)
+int fw_compute_block_crc(__be32 *block)
 {
 	int length;
 	u16 crc;
@@ -50,19 +50,6 @@ static int __compute_block_crc(__be32 *block)
 	return length;
 }
 
-int fw_compute_block_crc(u32 *block)
-{
-	__be32 be32_block[256];
-	int i, length;
-
-	length = (*block >> 16) & 0xff;
-	for (i = 0; i < length; i++)
-		be32_block[i] = cpu_to_be32(block[i + 1]);
-	*block |= crc_itu_t(0, (u8 *) be32_block, length * 4);
-
-	return length;
-}
-
 static DEFINE_MUTEX(card_mutex);
 static LIST_HEAD(card_list);
 
@@ -141,7 +128,7 @@ static size_t generate_config_rom(struct fw_card *card, __be32 *config_rom)
 	 * the bus info block, which is always the case for this
 	 * implementation. */
 	for (i = 0; i < j; i += length + 1)
-		length = __compute_block_crc(config_rom + i);
+		length = fw_compute_block_crc(config_rom + i);
 
 	return j;
 }
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index fddf2b358936..9a5f38c80b0e 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -28,9 +28,9 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/string.h>
 
 #include <asm/atomic.h>
+#include <asm/byteorder.h>
 #include <asm/system.h>
 
 #include "core.h"
@@ -510,13 +510,16 @@ static void update_tree(struct fw_card *card, struct fw_node *root)
 static void update_topology_map(struct fw_card *card,
 				u32 *self_ids, int self_id_count)
 {
-	int node_count;
+	int node_count = (card->root_node->node_id & 0x3f) + 1;
+	__be32 *map = card->topology_map;
+
+	*map++ = cpu_to_be32((self_id_count + 2) << 16);
+	*map++ = cpu_to_be32(be32_to_cpu(card->topology_map[1]) + 1);
+	*map++ = cpu_to_be32((node_count << 16) | self_id_count);
+
+	while (self_id_count--)
+		*map++ = cpu_to_be32p(self_ids++);
 
-	card->topology_map[1]++;
-	node_count = (card->root_node->node_id & 0x3f) + 1;
-	card->topology_map[2] = (node_count << 16) | self_id_count;
-	card->topology_map[0] = (self_id_count + 2) << 16;
-	memcpy(&card->topology_map[3], self_ids, self_id_count * 4);
 	fw_compute_block_crc(card->topology_map);
 }
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index da628c72a462..203e6428bada 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -810,8 +810,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request
 		int speed, unsigned long long offset,
 		void *payload, size_t length, void *callback_data)
 {
-	int i, start, end;
-	__be32 *map;
+	int start;
 
 	if (!TCODE_IS_READ_REQUEST(tcode)) {
 		fw_send_response(card, request, RCODE_TYPE_ERROR);
@@ -824,11 +823,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request
 	}
 
 	start = (offset - topology_map_region.start) / 4;
-	end = start + length / 4;
-	map = payload;
-
-	for (i = 0; i < length / 4; i++)
-		map[i] = cpu_to_be32(card->topology_map[start + i]);
+	memcpy(payload, &card->topology_map[start], length);
 
 	fw_send_response(card, request, RCODE_COMPLETE);
 }
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 7adca7cb9f55..ed3b1a765c00 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -94,7 +94,7 @@ int fw_card_add(struct fw_card *card,
 		u32 max_receive, u32 link_speed, u64 guid);
 void fw_core_remove_card(struct fw_card *card);
 int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
-int fw_compute_block_crc(u32 *block);
+int fw_compute_block_crc(__be32 *block);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 53b9217de86c..211a5d7d87b3 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -117,7 +117,7 @@ struct fw_card {
 
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
-	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+	__be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
 };
 
 struct fw_attribute_group {
-- 
cgit v1.2.3


From 205cb37b89ab37db553907e5ac17962eec561804 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 14 Oct 2009 23:22:17 +0200
Subject: kill-the-bkl/reiserfs: definitely drop the bkl from reiserfs_ioctl()

The reiserfs ioctl path doesn't need the big kernel lock anymore , now
that the filesystem synchronizes through its own lock.

We can then turn reiserfs_ioctl() into an unlocked_ioctl callback.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Laurent Riffard <laurent.riffard@free.fr>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 fs/reiserfs/dir.c           |  2 +-
 fs/reiserfs/file.c          |  2 +-
 fs/reiserfs/ioctl.c         | 11 +++--------
 include/linux/reiserfs_fs.h |  3 +--
 4 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 17f31ad379c8..c094f58c7448 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = reiserfs_readdir,
 	.fsync = reiserfs_dir_fsync,
-	.ioctl = reiserfs_ioctl,
+	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9f436668b7f8..da2dba082e2d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 const struct file_operations reiserfs_file_operations = {
 	.read = do_sync_read,
 	.write = reiserfs_file_write,
-	.ioctl = reiserfs_ioctl,
+	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index e30e8be09179..ace77451ceb1 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -20,9 +20,9 @@
  *  2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
  *  3) That's all for a while ...
  */
-int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-		   unsigned long arg)
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	unsigned int flags;
 	int err = 0;
 
@@ -132,9 +132,6 @@ setversion_out:
 long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int ret;
-
 	/* These are just misnamed, they actually get/put from/to user an int */
 	switch (cmd) {
 	case REISERFS_IOC32_UNPACK:
@@ -156,9 +153,7 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 		return -ENOIOCTLCMD;
 	}
 
-	ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-
-	return ret;
+	return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
 }
 #endif
 
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a498d9266d8c..a05b4a20768d 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2314,8 +2314,7 @@ __u32 r5_hash(const signed char *msg, int len);
 #define SPARE_SPACE 500
 
 /* prototypes from ioctl.c */
-int reiserfs_ioctl(struct inode *inode, struct file *filp,
-		   unsigned int cmd, unsigned long arg);
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long reiserfs_compat_ioctl(struct file *filp,
 		   unsigned int cmd, unsigned long arg);
 int reiserfs_unpack(struct inode *inode, struct file *filp);
-- 
cgit v1.2.3


From 019129d595caaa5bd0b41d128308da1be6a91869 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 14 Oct 2009 10:15:56 -0700
Subject: rcu: Stopgap fix for synchronize_rcu_expedited() for TREE_PREEMPT_RCU

For the short term, map synchronize_rcu_expedited() to
synchronize_rcu() for TREE_PREEMPT_RCU and to
synchronize_sched_expedited() for TREE_RCU.

Longer term, there needs to be a real expedited grace period for
TREE_PREEMPT_RCU, but candidate patches to date are considerably
more complex and intrusive.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: npiggin@suse.de
Cc: jens.axboe@oracle.com
LKML-Reference: <12555405592331-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutree.h |  6 +-----
 kernel/rcutree_plugin.h | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 46e9ab3ee6e1..9642c6bcb399 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -76,11 +76,7 @@ static inline void __rcu_read_unlock_bh(void)
 
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
-
-static inline void synchronize_rcu_expedited(void)
-{
-	synchronize_sched_expedited();
-}
+extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
 {
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c0cb783aa16a..ebd20ee7707d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -392,6 +392,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/*
+ * Wait for an rcu-preempt grace period.  We are supposed to expedite the
+ * grace period, but this is the crude slow compatability hack, so just
+ * invoke synchronize_rcu().
+ */
+void synchronize_rcu_expedited(void)
+{
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
 /*
  * Check to see if there is any immediate preemptable-RCU-related work
  * to be done.
@@ -564,6 +575,16 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/*
+ * Wait for an rcu-preempt grace period, but make it happen quickly.
+ * But because preemptable RCU does not exist, map to rcu-sched.
+ */
+void synchronize_rcu_expedited(void)
+{
+	synchronize_sched_expedited();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
 /*
  * Because preemptable RCU does not exist, it never has any work to do.
  */
-- 
cgit v1.2.3


From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 15 Oct 2009 11:21:42 +0800
Subject: tracing/profile: Add filter support

- Add an ioctl to allocate a filter for a perf event.

- Free the filter when the associated perf event is to be freed.

- Do the filtering in perf_swevent_match().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4AD69546.8050401@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  11 ++-
 include/linux/perf_counter.h       |   1 +
 include/linux/perf_event.h         |   6 ++
 kernel/perf_event.c                |  80 ++++++++++++++++++++--
 kernel/trace/trace.h               |   3 +-
 kernel/trace/trace_events_filter.c | 133 +++++++++++++++++++++++++++++--------
 6 files changed, 199 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4ec5e67e18cf..d11770472bc8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -144,7 +144,7 @@ extern char			*trace_profile_buf_nmi;
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_current_check_discard(struct ring_buffer *buffer,
 					struct ftrace_event_call *call,
 					void *rec,
@@ -186,4 +186,13 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
+#ifdef CONFIG_EVENT_PROFILE
+struct perf_event;
+extern int ftrace_profile_enable(int event_id);
+extern void ftrace_profile_disable(int event_id);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+				     char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+#endif
+
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..91a2b4309e7a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -225,6 +225,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..df9d964c15fc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -221,6 +221,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -633,7 +634,12 @@ struct perf_event {
 
 	struct pid_namespace		*ns;
 	u64				id;
+
+#ifdef CONFIG_EVENT_PROFILE
+	struct event_filter		*filter;
 #endif
+
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 /**
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9d0b5c665883..12b5ec39bf97 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/irq_regs.h>
 
@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 	return ERR_PTR(err);
 }
 
+static void perf_event_free_filter(struct perf_event *event);
+
 static void free_event_rcu(struct rcu_head *head)
 {
 	struct perf_event *event;
@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head)
 	event = container_of(head, struct perf_event, rcu_head);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	perf_event_free_filter(event);
 	kfree(event);
 }
 
@@ -1974,7 +1978,8 @@ unlock:
 	return ret;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_EVENT_IOC_SET_OUTPUT:
 		return perf_event_set_output(event, arg);
 
+	case PERF_EVENT_IOC_SET_FILTER:
+		return perf_event_set_filter(event, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event)
 	return 1;
 }
 
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data);
+
 static int perf_swevent_match(struct perf_event *event,
 				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
+				u32 event_id,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
 {
 	if (!perf_swevent_is_counting(event))
 		return 0;
@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event,
 			return 0;
 	}
 
+	if (event->attr.type == PERF_TYPE_TRACEPOINT &&
+	    !perf_tp_event_match(event, data))
+		return 0;
+
 	return 1;
 }
 
@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_swevent_match(event, type, event_id, regs))
+		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_add(event, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
@@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
+
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
@@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	void *record = data->raw->data;
+
+	if (likely(!event->filter) || filter_match_preds(event->filter, record))
+		return 1;
+	return 0;
+}
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
@@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 
 	return &perf_ops_generic;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	char *filter_str;
+	int ret;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	filter_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(filter_str))
+		return PTR_ERR(filter_str);
+
+	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+
+	kfree(filter_str);
+	return ret;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+	ftrace_profile_free_filter(event);
+}
+
 #else
+
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	return 1;
+}
+
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
-#endif
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
 
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
@@ -4394,7 +4460,7 @@ err_size:
 	goto out;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd)
+static int perf_event_set_output(struct perf_event *event, int output_fd)
 {
 	struct perf_event *output_event = NULL;
 	struct file *output_file = NULL;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ffe53ddbe67a..4959ada9e0bb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) &&
+	    !filter_match_preds(call->filter, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 273845fce393..e27bb6acc2dd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred)
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct event_filter *filter = call->filter;
 	int match, top = 0, val1 = 0, val2 = 0;
 	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
@@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void destroy_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-	struct event_filter *filter = call->filter;
 	int i;
 
 	if (!filter)
@@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
 	kfree(filter->preds);
 	kfree(filter->filter_string);
 	kfree(filter);
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+	__free_preds(call->filter);
 	call->filter = NULL;
+	call->filter_active = 0;
 }
 
-static int init_preds(struct ftrace_event_call *call)
+static struct event_filter *__alloc_preds(void)
 {
 	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	if (call->filter)
-		return 0;
-
-	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!call->filter)
-		return -ENOMEM;
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return ERR_PTR(-ENOMEM);
 
 	filter->n_preds = 0;
 
@@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
 		filter->preds[i] = pred;
 	}
 
-	return 0;
+	return filter;
 
 oom:
-	destroy_preds(call);
+	__free_preds(filter);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int init_preds(struct ftrace_event_call *call)
+{
+	if (call->filter)
+		return 0;
+
+	call->filter_active = 0;
+	call->filter = __alloc_preds();
+	if (IS_ERR(call->filter))
+		return PTR_ERR(call->filter);
 
-	return -ENOMEM;
+	return 0;
 }
 
 static int init_subsystem_preds(struct event_subsystem *system)
@@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
 			      struct ftrace_event_call *call,
+			      struct event_filter *filter,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
-	struct event_filter *filter = call->filter;
 	int idx, err;
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
@@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 		return err;
 
 	filter->n_preds++;
-	call->filter_active = 1;
 
 	return 0;
 }
@@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
 
 static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
+			   struct event_filter *filter,
 			   struct filter_pred *pred,
 			   bool dry_run)
 {
@@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
 	if (!dry_run)
-		return filter_add_pred_fn(ps, call, pred, fn);
+		return filter_add_pred_fn(ps, call, filter, pred, fn);
 	return 0;
 }
 
@@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps)
 }
 
 static int replace_preds(struct ftrace_event_call *call,
+			 struct event_filter *filter,
 			 struct filter_parse_state *ps,
 			 char *filter_string,
 			 bool dry_run)
@@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call,
 add_pred:
 		if (!pred)
 			return -ENOMEM;
-		err = filter_add_pred(ps, call, pred, dry_run);
+		err = filter_add_pred(ps, call, filter, pred, dry_run);
 		filter_free_pred(pred);
 		if (err)
 			return err;
@@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system,
 				char *filter_string)
 {
 	struct ftrace_event_call *call;
+	struct event_filter *filter;
 	int err;
 	bool fail = true;
 
@@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system,
 			continue;
 
 		/* try to see if the filter can be applied */
-		err = replace_preds(call, ps, filter_string, true);
+		err = replace_preds(call, filter, ps, filter_string, true);
 		if (err)
 			continue;
 
 		/* really apply the filter */
 		filter_disable_preds(call);
-		err = replace_preds(call, ps, filter_string, false);
+		err = replace_preds(call, filter, ps, filter_string, false);
 		if (err)
 			filter_disable_preds(call);
-		else
-			replace_filter_string(call->filter, filter_string);
+		else {
+			call->filter_active = 1;
+			replace_filter_string(filter, filter_string);
+		}
 		fail = false;
 	}
 
@@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system,
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		goto out;
 	}
 
-	err = replace_preds(call, ps, filter_string, false);
+	err = replace_preds(call, call->filter, ps, filter_string, false);
 	if (err)
 		append_filter_err(ps, call->filter);
-
+	else
+		call->filter_active = 1;
 out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
@@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 				 char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1345,3 +1362,67 @@ out_unlock:
 	return err;
 }
 
+#ifdef CONFIG_EVENT_PROFILE
+
+void ftrace_profile_free_filter(struct perf_event *event)
+{
+	struct event_filter *filter = event->filter;
+
+	event->filter = NULL;
+	__free_preds(filter);
+}
+
+int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+			      char *filter_str)
+{
+	int err;
+	struct event_filter *filter;
+	struct filter_parse_state *ps;
+	struct ftrace_event_call *call = NULL;
+
+	mutex_lock(&event_mutex);
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (call->id == event_id)
+			break;
+	}
+	if (!call)
+		return -EINVAL;
+
+	if (event->filter)
+		return -EEXIST;
+
+	filter = __alloc_preds();
+	if (IS_ERR(filter))
+		return PTR_ERR(filter);
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto free_preds;
+
+	parse_init(ps, filter_ops, filter_str);
+	err = filter_parse(ps);
+	if (err)
+		goto free_ps;
+
+	err = replace_preds(call, filter, ps, filter_str, false);
+	if (!err)
+		event->filter = filter;
+
+free_ps:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+free_preds:
+	if (err)
+		__free_preds(filter);
+
+	mutex_unlock(&event_mutex);
+
+	return err;
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
-- 
cgit v1.2.3


From 731581e6a653f6a68a4d7ba9df6b886a85c7d080 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:46 -0600
Subject: of: merge phandle, ihandle and struct property

Merge of common code duplicated between Sparc, PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 10 ----------
 arch/powerpc/include/asm/prom.h    | 12 ------------
 arch/sparc/include/asm/prom.h      | 12 ------------
 include/linux/of.h                 | 12 ++++++++++++
 4 files changed, 12 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 66368896f922..11cb48419c7d 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -73,16 +73,6 @@ struct boot_param_header {
 	u32	dt_struct_size; /* size of the DT structure block */
 };
 
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
-	char	*name;
-	int	length;
-	void	*value;
-	struct property *next;
-};
-
 struct device_node {
 	const char *name;
 	const char *type;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b0a84ea5f8ed..c236326177a8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -71,18 +71,6 @@ struct boot_param_header
 	u32	dt_struct_size;		/* size of the DT structure block */
 };
 
-
-
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
-	char	*name;
-	int	length;
-	void	*value;
-	struct property *next;
-};
-
 struct device_node {
 	const char *name;
 	const char *type;
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 0733170e02ca..b34f988a2aad 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -29,18 +29,6 @@
 #define of_prop_cmp(s1, s2)		strcasecmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcmp((s1), (s2))
 
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
-	char	*name;
-	int	length;
-	void	*value;
-	struct property *next;
-	unsigned long _flags;
-	unsigned int unique_id;
-};
-
 struct of_irq_controller;
 struct device_node {
 	const char	*name;
diff --git a/include/linux/of.h b/include/linux/of.h
index 7be2d1043c16..4668b298479a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -19,6 +19,18 @@
 #include <linux/bitops.h>
 #include <linux/mod_devicetable.h>
 
+typedef u32 phandle;
+typedef u32 ihandle;
+
+struct property {
+	char	*name;
+	int	length;
+	void	*value;
+	struct property *next;
+	unsigned long _flags;
+	unsigned int unique_id;
+};
+
 #include <asm/prom.h>
 
 /* flag descriptions */
-- 
cgit v1.2.3


From 6f1924928377bd035a9f64466f91a487c69271d2 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:49 -0600
Subject: of: merge struct device_node

Merge of common code duplicated between Sparc, PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 20 --------------------
 arch/powerpc/include/asm/prom.h    | 20 --------------------
 arch/sparc/include/asm/prom.h      | 24 ------------------------
 include/linux/of.h                 | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 11cb48419c7d..64e8b3a8c3cf 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -73,26 +73,6 @@ struct boot_param_header {
 	u32	dt_struct_size; /* size of the DT structure block */
 };
 
-struct device_node {
-	const char *name;
-	const char *type;
-	phandle	node;
-	phandle linux_phandle;
-	char	*full_name;
-
-	struct	property *properties;
-	struct	property *deadprops; /* removed properties */
-	struct	device_node *parent;
-	struct	device_node *child;
-	struct	device_node *sibling;
-	struct	device_node *next; /* next device of same type */
-	struct	device_node *allnext; /* next in list of all nodes */
-	struct	proc_dir_entry *pde; /* this node's proc directory */
-	struct	kref kref;
-	unsigned long _flags;
-	void	*data;
-};
-
 extern struct device_node *of_chosen;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index c236326177a8..c918db535f08 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -71,26 +71,6 @@ struct boot_param_header
 	u32	dt_struct_size;		/* size of the DT structure block */
 };
 
-struct device_node {
-	const char *name;
-	const char *type;
-	phandle	node;
-	phandle linux_phandle;
-	char	*full_name;
-
-	struct	property *properties;
-	struct  property *deadprops; /* removed properties */
-	struct	device_node *parent;
-	struct	device_node *child;
-	struct	device_node *sibling;
-	struct	device_node *next;	/* next device of same type */
-	struct	device_node *allnext;	/* next in list of all nodes */
-	struct  proc_dir_entry *pde;	/* this node's proc directory */
-	struct  kref kref;
-	unsigned long _flags;
-	void	*data;
-};
-
 extern struct device_node *of_chosen;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index b34f988a2aad..e5f4a1d8fc46 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -29,30 +29,6 @@
 #define of_prop_cmp(s1, s2)		strcasecmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcmp((s1), (s2))
 
-struct of_irq_controller;
-struct device_node {
-	const char	*name;
-	const char	*type;
-	phandle	node;
-	char	*path_component_name;
-	char	*full_name;
-
-	struct	property *properties;
-	struct  property *deadprops; /* removed properties */
-	struct	device_node *parent;
-	struct	device_node *child;
-	struct	device_node *sibling;
-	struct	device_node *next;	/* next device of same type */
-	struct	device_node *allnext;	/* next in list of all nodes */
-	struct  proc_dir_entry *pde;	/* this node's proc directory */
-	struct  kref kref;
-	unsigned long _flags;
-	void	*data;
-	unsigned int unique_id;
-
-	struct of_irq_controller *irq_trans;
-};
-
 struct of_irq_controller {
 	unsigned int	(*irq_build)(struct device_node *, unsigned int, void *);
 	void		*data;
diff --git a/include/linux/of.h b/include/linux/of.h
index 4668b298479a..65a158dc7257 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -17,6 +17,7 @@
  */
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/kref.h>
 #include <linux/mod_devicetable.h>
 
 typedef u32 phandle;
@@ -31,6 +32,37 @@ struct property {
 	unsigned int unique_id;
 };
 
+#if defined(CONFIG_SPARC)
+struct of_irq_controller;
+#endif
+
+struct device_node {
+	const char *name;
+	const char *type;
+	phandle	node;
+#if !defined(CONFIG_SPARC)
+	phandle linux_phandle;
+#endif
+	char	*full_name;
+
+	struct	property *properties;
+	struct	property *deadprops;	/* removed properties */
+	struct	device_node *parent;
+	struct	device_node *child;
+	struct	device_node *sibling;
+	struct	device_node *next;	/* next device of same type */
+	struct	device_node *allnext;	/* next in list of all nodes */
+	struct	proc_dir_entry *pde;	/* this node's proc directory */
+	struct	kref kref;
+	unsigned long _flags;
+	void	*data;
+#if defined(CONFIG_SPARC)
+	char	*path_component_name;
+	unsigned int unique_id;
+	struct of_irq_controller *irq_trans;
+#endif
+};
+
 #include <asm/prom.h>
 
 /* flag descriptions */
-- 
cgit v1.2.3


From 61e955db539e748cff2b8ea3bf7705259ebe9fb6 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:51 -0600
Subject: of: Move OF_IS_DYNAMIC and OF_MARK_DYNAMIC macros to of.h

Merge of common code duplicated between Sparc, PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/sparc/include/asm/prom.h | 3 ---
 include/linux/of.h            | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index e5f4a1d8fc46..ddbd870b5720 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -34,9 +34,6 @@ struct of_irq_controller {
 	void		*data;
 };
 
-#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
-#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
-
 extern struct device_node *of_find_node_by_cpuid(int cpuid);
 extern int of_set_property(struct device_node *node, const char *name, void *val, int len);
 extern struct mutex of_set_property_mutex;
diff --git a/include/linux/of.h b/include/linux/of.h
index 65a158dc7257..a66c1eb31693 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -69,6 +69,9 @@ struct device_node {
 #define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
 #define OF_DETACHED	2 /* node has been detached from the device tree */
 
+#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
+#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
+
 #define OF_BAD_ADDR	((u64)-1)
 
 extern struct device_node *of_find_node_by_name(struct device_node *from,
-- 
cgit v1.2.3


From d8678b58708d7e6bf947ebd03eaf44baf2adfad8 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:53 -0600
Subject: of: add common header for flattened device tree representation

Add a common header file for working with the flattened device tree
data structure and merge the shared data tags used by Microblaze and
PowerPC

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 12 +-----------
 arch/microblaze/kernel/head.S      |  2 +-
 arch/powerpc/include/asm/prom.h    | 12 +-----------
 include/linux/of_fdt.h             | 26 ++++++++++++++++++++++++++
 4 files changed, 29 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/of_fdt.h

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 64e8b3a8c3cf..5f461f08db11 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -17,20 +17,10 @@
 #ifndef _ASM_MICROBLAZE_PROM_H
 #define _ASM_MICROBLAZE_PROM_H
 #ifdef __KERNEL__
-
-/* Definitions used by the flattened device tree */
-#define OF_DT_HEADER		0xd00dfeed /* marker */
-#define OF_DT_BEGIN_NODE	0x1 /* Start of node, full name */
-#define OF_DT_END_NODE		0x2 /* End node */
-#define OF_DT_PROP		0x3 /* Property: name off, size, content */
-#define OF_DT_NOP		0x4 /* nop */
-#define OF_DT_END		0x9
-
-#define OF_DT_VERSION		0x10
-
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/of_fdt.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
 #include <asm/irq.h>
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 697ce3007f30..30916193fcc7 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -31,7 +31,7 @@
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
-#include <asm/prom.h>		/* for OF_DT_HEADER */
+#include <linux/of_fdt.h>		/* for OF_DT_HEADER */
 
 #ifdef CONFIG_MMU
 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index c918db535f08..7181f8ac40f9 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -17,6 +17,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
+#include <linux/of_fdt.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
 #include <asm/irq.h>
@@ -29,17 +30,6 @@
 #define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 
-/* Definitions used by the flattened device tree */
-#define OF_DT_HEADER		0xd00dfeed	/* marker */
-#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
-#define OF_DT_END_NODE		0x2		/* End node */
-#define OF_DT_PROP		0x3		/* Property: name off, size,
-						 * content */
-#define OF_DT_NOP		0x4		/* nop */
-#define OF_DT_END		0x9
-
-#define OF_DT_VERSION		0x10
-
 /*
  * This is what gets passed to the kernel by prom_init or kexec
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
new file mode 100644
index 000000000000..8b5ecc1cb6aa
--- /dev/null
+++ b/include/linux/of_fdt.h
@@ -0,0 +1,26 @@
+/*
+ * Definitions for working with the Flattened Device Tree data format
+ *
+ * Copyright 2009 Benjamin Herrenschmidt, IBM Corp
+ * benh@kernel.crashing.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_OF_FDT_H
+#define _LINUX_OF_FDT_H
+
+/* Definitions used by the flattened device tree */
+#define OF_DT_HEADER		0xd00dfeed	/* marker */
+#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
+#define OF_DT_END_NODE		0x2		/* End node */
+#define OF_DT_PROP		0x3		/* Property: name off, size,
+						 * content */
+#define OF_DT_NOP		0x4		/* nop */
+#define OF_DT_END		0x9
+
+#define OF_DT_VERSION		0x10
+
+#endif /* _LINUX_OF_FDT_H */
-- 
cgit v1.2.3


From d45d94f672e3c79b0db1e6d76e1638ee521d56c0 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:55 -0600
Subject: of: merge struct boot_param_header from Microblaze and PowerPC

Merge common code for working with Flattened Device Tree data structure

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 30 ------------------------------
 arch/powerpc/include/asm/prom.h    | 31 -------------------------------
 include/linux/of_fdt.h             | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 5f461f08db11..dfc4afcdbd2b 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -33,36 +33,6 @@
 #define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header {
-	u32	magic; /* magic word OF_DT_HEADER */
-	u32	totalsize; /* total size of DT block */
-	u32	off_dt_struct; /* offset to structure */
-	u32	off_dt_strings; /* offset to strings */
-	u32	off_mem_rsvmap; /* offset to memory reserve map */
-	u32	version; /* format version */
-	u32	last_comp_version; /* last compatible version */
-	/* version 2 fields below */
-	u32	boot_cpuid_phys; /* Physical CPU id we're booting on */
-	/* version 3 fields below */
-	u32	dt_strings_size; /* size of the DT strings block */
-	/* version 17 fields below */
-	u32	dt_struct_size; /* size of the DT structure block */
-};
-
 extern struct device_node *of_chosen;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7181f8ac40f9..ef20e6c23235 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -30,37 +30,6 @@
 #define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header
-{
-	u32	magic;			/* magic word OF_DT_HEADER */
-	u32	totalsize;		/* total size of DT block */
-	u32	off_dt_struct;		/* offset to structure */
-	u32	off_dt_strings;		/* offset to strings */
-	u32	off_mem_rsvmap;		/* offset to memory reserve map */
-	u32	version;		/* format version */
-	u32	last_comp_version;	/* last compatible version */
-	/* version 2 fields below */
-	u32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
-	/* version 3 fields below */
-	u32	dt_strings_size;	/* size of the DT strings block */
-	/* version 17 fields below */
-	u32	dt_struct_size;		/* size of the DT structure block */
-};
-
 extern struct device_node *of_chosen;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 8b5ecc1cb6aa..b37ad3a973b9 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -23,4 +23,36 @@
 
 #define OF_DT_VERSION		0x10
 
+#ifndef __ASSEMBLY__
+/*
+ * This is what gets passed to the kernel by prom_init or kexec
+ *
+ * The dt struct contains the device tree structure, full pathes and
+ * property contents. The dt strings contain a separate block with just
+ * the strings for the property names, and is fully page aligned and
+ * self contained in a page, so that it can be kept around by the kernel,
+ * each property name appears only once in this page (cheap compression)
+ *
+ * the mem_rsvmap contains a map of reserved ranges of physical memory,
+ * passing it here instead of in the device-tree itself greatly simplifies
+ * the job of everybody. It's just a list of u64 pairs (base/size) that
+ * ends when size is 0
+ */
+struct boot_param_header {
+	u32	magic;			/* magic word OF_DT_HEADER */
+	u32	totalsize;		/* total size of DT block */
+	u32	off_dt_struct;		/* offset to structure */
+	u32	off_dt_strings;		/* offset to strings */
+	u32	off_mem_rsvmap;		/* offset to memory reserve map */
+	u32	version;		/* format version */
+	u32	last_comp_version;	/* last compatible version */
+	/* version 2 fields below */
+	u32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	u32	dt_strings_size;	/* size of the DT strings block */
+	/* version 17 fields below */
+	u32	dt_struct_size;		/* size of the DT structure block */
+};
+
+#endif /* __ASSEMBLY__ */
 #endif /* _LINUX_OF_FDT_H */
-- 
cgit v1.2.3


From 50436312f47f1fd2bf82c983638fe27ca7e03238 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:57:58 -0600
Subject: of: merge of_node_*_flag() and set_node_proc_entry()

Merge common code between PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 16 ----------------
 arch/powerpc/include/asm/prom.h    | 17 -----------------
 include/linux/of.h                 | 16 ++++++++++++++++
 3 files changed, 16 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index dfc4afcdbd2b..180d84481306 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -35,24 +35,8 @@
 
 extern struct device_node *of_chosen;
 
-static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
-{
-	return test_bit(flag, &n->_flags);
-}
-
-static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
-{
-	set_bit(flag, &n->_flags);
-}
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-static inline void set_node_proc_entry(struct device_node *dn,
-					struct proc_dir_entry *de)
-{
-	dn->pde = de;
-}
-
 extern struct device_node *allnodes;	/* temporary while merging */
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index ef20e6c23235..2cfd43288a3e 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -32,25 +32,8 @@
 
 extern struct device_node *of_chosen;
 
-static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
-{
-	return test_bit(flag, &n->_flags);
-}
-
-static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
-{
-	set_bit(flag, &n->_flags);
-}
-
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
-{
-	dn->pde = de;
-}
-
-
 extern struct device_node *of_find_all_nodes(struct device_node *prev);
 extern struct device_node *of_node_get(struct device_node *node);
 extern void of_node_put(struct device_node *node);
diff --git a/include/linux/of.h b/include/linux/of.h
index a66c1eb31693..d5f666290f6b 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -63,6 +63,22 @@ struct device_node {
 #endif
 };
 
+static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
+{
+	return test_bit(flag, &n->_flags);
+}
+
+static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
+{
+	set_bit(flag, &n->_flags);
+}
+
+static inline void
+set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
+{
+	dn->pde = de;
+}
+
 #include <asm/prom.h>
 
 /* flag descriptions */
-- 
cgit v1.2.3


From b6caf2ad7ce30648b89c1cf40d8f7cf6f4b58033 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:58:00 -0600
Subject: of: merge of_read_number() an of_read_ulong()

Merge common code between Microblaze and PowerPC

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 12 ------------
 arch/powerpc/include/asm/prom.h    | 20 --------------------
 include/linux/of.h                 | 23 +++++++++++++++++++++++
 3 files changed, 23 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 180d84481306..d4f57ffdae3f 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -82,18 +82,6 @@ extern int release_OF_resource(struct device_node *node, int index);
  * OF address retreival & translation
  */
 
-/* Helper to read a big number; size is in cells (not bytes) */
-static inline u64 of_read_number(const u32 *cell, int size)
-{
-	u64 r = 0;
-	while (size--)
-		r = (r << 32) | *(cell++);
-	return r;
-}
-
-/* Like of_read_number, but we want an unsigned long result */
-#define of_read_ulong(cell, size)	of_read_number(cell, size)
-
 /* Translate an OF address block into a CPU physical address
  */
 extern u64 of_translate_address(struct device_node *np, const u32 *addr);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 2cfd43288a3e..d8c0525c3139 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -89,26 +89,6 @@ extern int release_OF_resource(struct device_node* node, int index);
  * OF address retreival & translation
  */
 
-
-/* Helper to read a big number; size is in cells (not bytes) */
-static inline u64 of_read_number(const u32 *cell, int size)
-{
-	u64 r = 0;
-	while (size--)
-		r = (r << 32) | *(cell++);
-	return r;
-}
-
-/* Like of_read_number, but we want an unsigned long result */
-#ifdef CONFIG_PPC32
-static inline unsigned long of_read_ulong(const u32 *cell, int size)
-{
-	return cell[size-1];
-}
-#else
-#define of_read_ulong(cell, size)	of_read_number(cell, size)
-#endif
-
 /* Translate an OF address block into a CPU physical address
  */
 extern u64 of_translate_address(struct device_node *np, const u32 *addr);
diff --git a/include/linux/of.h b/include/linux/of.h
index d5f666290f6b..18e4379b8b7f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -79,6 +79,29 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
 	dn->pde = de;
 }
 
+/*
+ * OF address retreival & translation
+ */
+
+/* Helper to read a big number; size is in cells (not bytes) */
+static inline u64 of_read_number(const u32 *cell, int size)
+{
+	u64 r = 0;
+	while (size--)
+		r = (r << 32) | *(cell++);
+	return r;
+}
+
+/* Like of_read_number, but we want an unsigned long result */
+#ifdef CONFIG_PPC32
+static inline unsigned long of_read_ulong(const u32 *cell, int size)
+{
+	return cell[size-1];
+}
+#else
+#define of_read_ulong(cell, size)	of_read_number(cell, size)
+#endif
+
 #include <asm/prom.h>
 
 /* flag descriptions */
-- 
cgit v1.2.3


From 526b5b3ed97bac22ed0c9feed97adcdc3a25244c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:58:02 -0600
Subject: of: merge of_node_get(), of_node_put() and of_find_all_nodes()

Merge common code between Sparc, PowerPC and Microblaze.

Sparc differs in the implementation at this point, so this patch uses
a #ifdef to handle sparc differently for now.  The merging of
implementations will occur in a later patch

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h |  4 ----
 arch/powerpc/include/asm/prom.h    |  4 ----
 arch/sparc/include/asm/prom.h      |  9 ---------
 include/linux/of.h                 | 16 ++++++++++++++++
 4 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index d4f57ffdae3f..c92b4a9e4397 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -40,10 +40,6 @@ extern struct device_node *of_chosen;
 extern struct device_node *allnodes;	/* temporary while merging */
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-
 /* For scanning the flat device-tree at boot time */
 extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
 					const char *uname, int depth,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index d8c0525c3139..622769cd1d62 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -34,10 +34,6 @@ extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-
 /* For scanning the flat device-tree at boot time */
 extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
 					    const char *uname, int depth,
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index ddbd870b5720..f845828ca4c6 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -51,15 +51,6 @@ extern void prom_build_devicetree(void);
 extern void of_populate_present_mask(void);
 extern void of_fill_in_cpu_data(void);
 
-/* Dummy ref counting routines - to be implemented later */
-static inline struct device_node *of_node_get(struct device_node *node)
-{
-	return node;
-}
-static inline void of_node_put(struct device_node *node)
-{
-}
-
 /* These routines are here to provide compatibility with how powerpc
  * handles IRQ mapping for OF device nodes.  We precompute and permanently
  * register them in the of_device objects, whereas powerpc computes them
diff --git a/include/linux/of.h b/include/linux/of.h
index 18e4379b8b7f..4636bba93afa 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -79,6 +79,22 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
 	dn->pde = de;
 }
 
+#if defined(CONFIG_SPARC)
+/* Dummy ref counting routines - to be implemented later */
+static inline struct device_node *of_node_get(struct device_node *node)
+{
+	return node;
+}
+static inline void of_node_put(struct device_node *node)
+{
+}
+
+#else
+extern struct device_node *of_find_all_nodes(struct device_node *prev);
+extern struct device_node *of_node_get(struct device_node *node);
+extern void of_node_put(struct device_node *node);
+#endif
+
 /*
  * OF address retreival & translation
  */
-- 
cgit v1.2.3


From 8482f56803b9498af84bc09e7bc769a5924f6443 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:58:04 -0600
Subject: of: merge of_*_flat_dt*() functions

Merge common flattened device tree code between Microblaze and PowerPC

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 11 -----------
 arch/powerpc/include/asm/prom.h    | 10 ----------
 include/linux/of_fdt.h             | 14 ++++++++++++++
 3 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index c92b4a9e4397..be4db2a22245 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -40,17 +40,6 @@ extern struct device_node *of_chosen;
 extern struct device_node *allnodes;	/* temporary while merging */
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
-/* For scanning the flat device-tree at boot time */
-extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
-					const char *uname, int depth,
-					void *data),
-				void *data);
-extern void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
-					unsigned long *size);
-extern int __init
-		of_flat_dt_is_compatible(unsigned long node, const char *name);
-extern unsigned long __init of_get_flat_dt_root(void);
-
 /* For updating the device tree at runtime */
 extern void of_attach_node(struct device_node *);
 extern void of_detach_node(struct device_node *);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 622769cd1d62..c8b59330b04a 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -34,16 +34,6 @@ extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-/* For scanning the flat device-tree at boot time */
-extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
-					    const char *uname, int depth,
-					    void *data),
-				  void *data);
-extern void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
-					unsigned long *size);
-extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name);
-extern unsigned long __init of_get_flat_dt_root(void);
-
 /* For updating the device tree at runtime */
 extern void of_attach_node(struct device_node *);
 extern void of_detach_node(struct device_node *);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b37ad3a973b9..b363eadea819 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -12,6 +12,9 @@
 #ifndef _LINUX_OF_FDT_H
 #define _LINUX_OF_FDT_H
 
+#include <linux/types.h>
+#include <linux/init.h>
+
 /* Definitions used by the flattened device tree */
 #define OF_DT_HEADER		0xd00dfeed	/* marker */
 #define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
@@ -54,5 +57,16 @@ struct boot_param_header {
 	u32	dt_struct_size;		/* size of the DT structure block */
 };
 
+/* For scanning the flat device-tree at boot time */
+extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
+					    const char *uname, int depth,
+					    void *data),
+				  void *data);
+extern void __init *of_get_flat_dt_prop(unsigned long node, const char *name,
+					unsigned long *size);
+extern int __init of_flat_dt_is_compatible(unsigned long node,
+					   const char *name);
+extern unsigned long __init of_get_flat_dt_root(void);
+
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_OF_FDT_H */
-- 
cgit v1.2.3


From 82b2928c95d824afd9af3bb41660f3c3fa1f234e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:58:07 -0600
Subject: of: merge other miscellaneous prototypes

Merge common prototypes used by Microblaze and PowerPC

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/include/asm/prom.h | 12 ------------
 arch/powerpc/include/asm/prom.h    | 14 --------------
 include/linux/of_fdt.h             | 14 ++++++++++++++
 3 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index be4db2a22245..ef3ec1d6ceb3 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -45,19 +45,7 @@ extern void of_attach_node(struct device_node *);
 extern void of_detach_node(struct device_node *);
 
 /* Other Prototypes */
-extern void finish_device_tree(void);
-extern void unflatten_device_tree(void);
 extern int early_uartlite_console(void);
-extern void early_init_devtree(void *);
-extern int machine_is_compatible(const char *compat);
-extern void print_properties(struct device_node *node);
-extern int prom_n_intr_cells(struct device_node *np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node *np, struct property *prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop,
-				struct property *oldprop);
 
 extern struct resource *request_OF_resource(struct device_node *node,
 				int index, const char *name_postfix);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index c8b59330b04a..2ab9cbd98826 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -38,20 +38,6 @@ extern struct device_node *of_chosen;
 extern void of_attach_node(struct device_node *);
 extern void of_detach_node(struct device_node *);
 
-/* Other Prototypes */
-extern void finish_device_tree(void);
-extern void unflatten_device_tree(void);
-extern void early_init_devtree(void *);
-extern int machine_is_compatible(const char *compat);
-extern void print_properties(struct device_node *node);
-extern int prom_n_intr_cells(struct device_node* np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop,
-				struct property *oldprop);
-
 #ifdef CONFIG_PPC32
 /*
  * PCI <-> OF matching functions
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b363eadea819..41d432b13553 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -68,5 +68,19 @@ extern int __init of_flat_dt_is_compatible(unsigned long node,
 					   const char *name);
 extern unsigned long __init of_get_flat_dt_root(void);
 
+/* Other Prototypes */
+extern void finish_device_tree(void);
+extern void unflatten_device_tree(void);
+extern void early_init_devtree(void *);
+extern int machine_is_compatible(const char *compat);
+extern void print_properties(struct device_node *node);
+extern int prom_n_intr_cells(struct device_node* np);
+extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
+extern int prom_add_property(struct device_node* np, struct property* prop);
+extern int prom_remove_property(struct device_node *np, struct property *prop);
+extern int prom_update_property(struct device_node *np,
+				struct property *newprop,
+				struct property *oldprop);
+
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_OF_FDT_H */
-- 
cgit v1.2.3


From e91edcf5a2940bb7f1f316c871dfe9e2aaf9d6d9 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 Oct 2009 10:58:09 -0600
Subject: of: merge of_find_all_nodes() implementations

Merge common code between Microblaze and PowerPC, and make it available
to Sparc

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/microblaze/kernel/prom.c | 23 -----------------------
 arch/powerpc/kernel/prom.c    | 23 -----------------------
 drivers/of/base.c             | 26 +++++++++++++++++++++++++-
 include/linux/of.h            |  3 ++-
 4 files changed, 27 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index c005cc6f1aaf..b817df172aa9 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -859,29 +859,6 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 }
 EXPORT_SYMBOL(of_find_node_by_phandle);
 
-/**
- *	of_find_all_nodes - Get next node in global list
- *	@prev:	Previous node or NULL to start iteration
- *		of_node_put() will be called on it
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_all_nodes(struct device_node *prev)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = prev ? prev->allnext : allnodes;
-	for (; np != NULL; np = np->allnext)
-		if (of_node_get(np))
-			break;
-	of_node_put(prev);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_all_nodes);
-
 /**
  *	of_node_get - Increment refcount of a node
  *	@node:	Node to inc refcount, NULL is supported to
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d4405b95bfaa..4ec300862466 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1316,29 +1316,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np)
 	return NULL;
 }
 
-/**
- *	of_find_all_nodes - Get next node in global list
- *	@prev:	Previous node or NULL to start iteration
- *		of_node_put() will be called on it
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_all_nodes(struct device_node *prev)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = prev ? prev->allnext : allnodes;
-	for (; np != 0; np = np->allnext)
-		if (of_node_get(np))
-			break;
-	of_node_put(prev);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_all_nodes);
-
 /**
  *	of_node_get - Increment refcount of a node
  *	@node:	Node to inc refcount, NULL is supported to
diff --git a/drivers/of/base.c b/drivers/of/base.c
index ddf224d456b2..e6627b2320f1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -9,7 +9,8 @@
  *
  *  Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net
  *
- *  Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell.
+ *  Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and
+ *  Grant Likely.
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -82,6 +83,29 @@ struct property *of_find_property(const struct device_node *np,
 }
 EXPORT_SYMBOL(of_find_property);
 
+/**
+ * of_find_all_nodes - Get next node in global list
+ * @prev:	Previous node or NULL to start iteration
+ *		of_node_put() will be called on it
+ *
+ * Returns a node pointer with refcount incremented, use
+ * of_node_put() on it when done.
+ */
+struct device_node *of_find_all_nodes(struct device_node *prev)
+{
+	struct device_node *np;
+
+	read_lock(&devtree_lock);
+	np = prev ? prev->allnext : allnodes;
+	for (; np != NULL; np = np->allnext)
+		if (of_node_get(np))
+			break;
+	of_node_put(prev);
+	read_unlock(&devtree_lock);
+	return np;
+}
+EXPORT_SYMBOL(of_find_all_nodes);
+
 /*
  * Find a property with a given name for a given node
  * and return the value.
diff --git a/include/linux/of.h b/include/linux/of.h
index 4636bba93afa..e7facd8fbce8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -79,6 +79,8 @@ set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
 	dn->pde = de;
 }
 
+extern struct device_node *of_find_all_nodes(struct device_node *prev);
+
 #if defined(CONFIG_SPARC)
 /* Dummy ref counting routines - to be implemented later */
 static inline struct device_node *of_node_get(struct device_node *node)
@@ -90,7 +92,6 @@ static inline void of_node_put(struct device_node *node)
 }
 
 #else
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
 extern struct device_node *of_node_get(struct device_node *node);
 extern void of_node_put(struct device_node *node);
 #endif
-- 
cgit v1.2.3


From 5deab536654f95345ea11e8ec6ed5c778df348b5 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Tue, 13 Oct 2009 11:14:00 +0800
Subject: ahci / atiixp / pci quirks: rename AMD SB900 into Hudson-2

This patch renames the code name SB900 into Hudson-2

Signed-off-by: Shane Huang <shane.huang@amd.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c        | 2 +-
 drivers/ata/pata_atiixp.c | 2 +-
 drivers/ide/atiixp.c      | 2 +-
 drivers/pci/quirks.c      | 6 +++---
 include/linux/pci_ids.h   | 6 ++----
 5 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8c5c0dd4f6fe..a06f5d6375a8 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -575,7 +575,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, 0x4395), board_ahci_sb700 }, /* ATI SB700/800 */
 
 	/* AMD */
-	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD SB900 */
+	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
 	/* AMD is using RAID class only for ahci controllers */
 	{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index aa4b3f6ae771..ae4454d4e955 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -246,7 +246,7 @@ static const struct pci_device_id atiixp[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_HUDSON2_IDE), },
 
 	{ },
 };
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 6396c3ad3252..837322b10a4c 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -177,7 +177,7 @@ static const struct pci_device_id atiixp_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), 0 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), 1 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), 0 },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), 0 },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_HUDSON2_IDE), 0 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, atiixp_pci_tbl);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a790b1771f9f..245d2cdb4765 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1009,7 +1009,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454NX,
 
 static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
 {
-	/* set SBX00 SATA in IDE mode to AHCI mode */
+	/* set SBX00/Hudson-2 SATA in IDE mode to AHCI mode */
 	u8 tmp;
 
 	pci_read_config_byte(pdev, PCI_CLASS_DEVICE, &tmp);
@@ -1028,8 +1028,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
 
 /*
  *	Serverworks CSB5 IDE does not fully support native mode
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f490e7a7307a..86257a412732 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -379,9 +379,6 @@
 #define PCI_DEVICE_ID_ATI_IXP600_IDE	0x438c
 #define PCI_DEVICE_ID_ATI_IXP700_SATA	0x4390
 #define PCI_DEVICE_ID_ATI_IXP700_IDE	0x439c
-/* AMD SB Chipset */
-#define PCI_DEVICE_ID_AMD_SB900_IDE	 0x780c
-#define PCI_DEVICE_ID_AMD_SB900_SATA_IDE 0x7800
 
 #define PCI_VENDOR_ID_VLSI		0x1004
 #define PCI_DEVICE_ID_VLSI_82C592	0x0005
@@ -553,9 +550,10 @@
 #define PCI_DEVICE_ID_AMD_CS5536_UDC    0x2096
 #define PCI_DEVICE_ID_AMD_CS5536_UOC    0x2097
 #define PCI_DEVICE_ID_AMD_CS5536_IDE    0x209A
-
 #define PCI_DEVICE_ID_AMD_LX_VIDEO  0x2081
 #define PCI_DEVICE_ID_AMD_LX_AES    0x2082
+#define PCI_DEVICE_ID_AMD_HUDSON2_IDE		0x780c
+#define PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE	0x7800
 
 #define PCI_VENDOR_ID_TRIDENT		0x1023
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
-- 
cgit v1.2.3


From b0aba1e66c38d64be2c7dbf4b08c71857031ab67 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Sun, 18 Oct 2009 00:38:57 -0700
Subject: Input: add open and close methods for polled devices

Optional open and close methods for preparing and closing
the device.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input-polldev.c     |  7 +++++--
 drivers/input/misc/wistron_btns.c |  2 +-
 include/linux/input-polldev.h     | 11 +++++++----
 3 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 0d3ce7a50fb1..910220c127cb 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -80,8 +80,8 @@ static int input_open_polled_device(struct input_dev *input)
 	if (error)
 		return error;
 
-	if (dev->flush)
-		dev->flush(dev);
+	if (dev->open)
+		dev->open(dev);
 
 	queue_delayed_work(polldev_wq, &dev->work,
 			   msecs_to_jiffies(dev->poll_interval));
@@ -95,6 +95,9 @@ static void input_close_polled_device(struct input_dev *input)
 
 	cancel_delayed_work_sync(&dev->work);
 	input_polldev_stop_workqueue();
+
+	if (dev->close)
+		dev->close(dev);
 }
 
 /**
diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index a932179c4c9e..00eb9d651d97 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c
@@ -1263,7 +1263,7 @@ static int __devinit setup_input_dev(void)
 	if (!wistron_idev)
 		return -ENOMEM;
 
-	wistron_idev->flush = wistron_flush;
+	wistron_idev->open = wistron_flush;
 	wistron_idev->poll = wistron_poll;
 	wistron_idev->poll_interval = POLL_INTERVAL_DEFAULT;
 
diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h
index 597a0077b3c5..5c0ec68a965e 100644
--- a/include/linux/input-polldev.h
+++ b/include/linux/input-polldev.h
@@ -14,9 +14,11 @@
 
 /**
  * struct input_polled_dev - simple polled input device
- * @private: private driver data
- * @flush: driver-supplied method that flushes device's state upon
- *	opening (optional)
+ * @private: private driver data.
+ * @open: driver-supplied method that prepares device for polling
+ *	(enabled the device and maybe flushes device state).
+ * @close: driver-supplied method that is called when device is no
+ *	longer being polled. Used to put device into low power mode.
  * @poll: driver-supplied method that polls the device and posts
  *	input events (mandatory).
  * @poll_interval: specifies how often the poll() method shoudl be called.
@@ -30,7 +32,8 @@
 struct input_polled_dev {
 	void *private;
 
-	void (*flush)(struct input_polled_dev *dev);
+	void (*open)(struct input_polled_dev *dev);
+	void (*close)(struct input_polled_dev *dev);
 	void (*poll)(struct input_polled_dev *dev);
 	unsigned int poll_interval; /* msec */
 
-- 
cgit v1.2.3


From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Oct 2009 06:30:45 +0000
Subject: inet: rename some inet_sock fields

In order to have better cache layouts of struct sock (separate zones
for rx/tx paths), we need this preliminary patch.

Goal is to transfert fields used at lookup time in the first
read-mostly cache line (inside struct sock_common) and move sk_refcnt
to a separate cache line (only written by rx path)

This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr,
sport and id fields. This allows a future patch to define these
fields as macros, like sk_refcnt, without name clashes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppol2tp.c                         | 22 ++++----
 fs/ocfs2/cluster/netdebug.c                    |  8 +--
 include/linux/ipv6.h                           |  2 +-
 include/net/inet6_hashtables.h                 |  4 +-
 include/net/inet_hashtables.h                  | 12 ++---
 include/net/inet_sock.h                        | 36 ++++++-------
 include/net/inet_timewait_sock.h               |  2 +-
 include/net/ip.h                               | 12 ++---
 net/dccp/ipv4.c                                | 40 ++++++++-------
 net/dccp/ipv6.c                                | 27 +++++-----
 net/dccp/output.c                              |  4 +-
 net/dccp/probe.c                               | 13 ++---
 net/dccp/proto.c                               |  4 +-
 net/ipv4/af_inet.c                             | 62 +++++++++++------------
 net/ipv4/datagram.c                            | 18 +++----
 net/ipv4/inet_connection_sock.c                | 20 ++++----
 net/ipv4/inet_diag.c                           | 26 +++++-----
 net/ipv4/inet_hashtables.c                     | 34 +++++++------
 net/ipv4/inet_timewait_sock.c                  | 12 ++---
 net/ipv4/ip_input.c                            |  2 +-
 net/ipv4/ip_output.c                           | 15 +++---
 net/ipv4/ip_sockglue.c                         |  8 +--
 net/ipv4/ipmr.c                                |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 +--
 net/ipv4/raw.c                                 | 30 +++++------
 net/ipv4/tcp.c                                 |  4 +-
 net/ipv4/tcp_ipv4.c                            | 70 +++++++++++++-------------
 net/ipv4/tcp_output.c                          |  4 +-
 net/ipv4/tcp_probe.c                           | 11 ++--
 net/ipv4/tcp_timer.c                           |  8 +--
 net/ipv4/udp.c                                 | 51 ++++++++++---------
 net/ipv6/af_inet6.c                            | 28 +++++------
 net/ipv6/datagram.c                            | 15 +++---
 net/ipv6/inet6_connection_sock.c               |  6 +--
 net/ipv6/inet6_hashtables.c                    | 12 ++---
 net/ipv6/ip6mr.c                               |  2 +-
 net/ipv6/ipv6_sockglue.c                       |  6 +--
 net/ipv6/raw.c                                 | 30 +++++------
 net/ipv6/syncookies.c                          |  2 +-
 net/ipv6/tcp_ipv6.c                            | 32 ++++++------
 net/ipv6/udp.c                                 | 24 ++++-----
 net/netfilter/xt_socket.c                      |  2 +-
 net/rds/tcp_listen.c                           |  8 +--
 net/sctp/protocol.c                            |  8 +--
 net/sctp/socket.c                              | 30 +++++------
 net/sunrpc/svcsock.c                           |  8 +--
 security/lsm_audit.c                           | 12 ++---
 47 files changed, 408 insertions(+), 388 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 5910df60c93e..849cc9c62c2a 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -516,7 +516,7 @@ static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
 		return 0;
 
 	inet = inet_sk(sk);
-	psum = csum_tcpudp_nofold(inet->saddr, inet->daddr, ulen,
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
 				  IPPROTO_UDP, 0);
 
 	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
@@ -949,8 +949,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	inet = inet_sk(sk_tun);
 	udp_len = hdr_len + sizeof(ppph) + total_len;
 	uh = (struct udphdr *) skb->data;
-	uh->source = inet->sport;
-	uh->dest = inet->dport;
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
 	uh->len = htons(udp_len);
 	uh->check = 0;
 	skb_put(skb, sizeof(struct udphdr));
@@ -978,7 +978,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr,
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
 					      udp_len, IPPROTO_UDP, csum);
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
@@ -986,7 +987,8 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->saddr, inet->daddr,
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
 					       udp_len, IPPROTO_UDP, 0);
 	}
 
@@ -1136,8 +1138,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	__skb_push(skb, sizeof(*uh));
 	skb_reset_transport_header(skb);
 	uh = udp_hdr(skb);
-	uh->source = inet->sport;
-	uh->dest = inet->dport;
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
 	uh->len = htons(udp_len);
 	uh->check = 0;
 
@@ -1181,7 +1183,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr,
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
 					      udp_len, IPPROTO_UDP, csum);
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
@@ -1189,7 +1192,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->saddr, inet->daddr,
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
 					       udp_len, IPPROTO_UDP, 0);
 	}
 
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index da794bc07a6c..a3f150e52b02 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v)
 		if (sc->sc_sock) {
 			inet = inet_sk(sc->sc_sock->sk);
 			/* the stack's structs aren't sparse endian clean */
-			saddr = (__force __be32)inet->saddr;
-			daddr = (__force __be32)inet->daddr;
-			sport = (__force __be16)inet->sport;
-			dport = (__force __be16)inet->dport;
+			saddr = (__force __be32)inet->inet_saddr;
+			daddr = (__force __be32)inet->inet_daddr;
+			sport = (__force __be16)inet->inet_sport;
+			dport = (__force __be16)inet->inet_dport;
 		}
 
 		/* XXX sigh, inet-> doesn't have sparse annotation so any
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 56404251248c..e0cc9a7db2b5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -505,7 +505,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 
 #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&& \
-	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
+	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 22c73a77cd99..92838d3a1ab7 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -46,8 +46,8 @@ static inline int inet6_sk_ehashfn(const struct sock *sk)
 	const struct ipv6_pinfo *np = inet6_sk(sk);
 	const struct in6_addr *laddr = &np->rcv_saddr;
 	const struct in6_addr *faddr = &np->daddr;
-	const __u16 lport = inet->num;
-	const __be16 fport = inet->dport;
+	const __u16 lport = inet->inet_num;
+	const __be16 fport = inet->inet_dport;
 	struct net *net = sock_net(sk);
 
 	return inet6_ehashfn(net, laddr, lport, faddr, fport);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5f11c4a0daca..5b698b3b463d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -241,7 +241,7 @@ static inline int inet_lhashfn(struct net *net, const unsigned short num)
 
 static inline int inet_sk_listen_hashfn(const struct sock *sk)
 {
-	return inet_lhashfn(sock_net(sk), inet_sk(sk)->num);
+	return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num);
 }
 
 /* Caller must disable local BH processing. */
@@ -301,8 +301,8 @@ typedef __u64 __bitwise __addrpair;
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
-	 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
-	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+	 ((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie))  &&	\
+	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))   &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
@@ -313,9 +313,9 @@ typedef __u64 __bitwise __addrpair;
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
 #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
-	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
-	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
-	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+	 (inet_sk(__sk)->inet_daddr	== (__saddr))		&&	\
+	 (inet_sk(__sk)->inet_rcv_saddr	== (__daddr))		&&	\
+	 ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 47004f35cc7e..bd4c53f75ac0 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -93,14 +93,14 @@ struct rtable;
  *
  * @sk - ancestor class
  * @pinet6 - pointer to IPv6 control block
- * @daddr - Foreign IPv4 addr
- * @rcv_saddr - Bound local IPv4 addr
- * @dport - Destination port
- * @num - Local port
- * @saddr - Sending source
+ * @inet_daddr - Foreign IPv4 addr
+ * @inet_rcv_saddr - Bound local IPv4 addr
+ * @inet_dport - Destination port
+ * @inet_num - Local port
+ * @inet_saddr - Sending source
  * @uc_ttl - Unicast TTL
- * @sport - Source port
- * @id - ID counter for DF pkts
+ * @inet_sport - Source port
+ * @inet_id - ID counter for DF pkts
  * @tos - TOS
  * @mc_ttl - Multicasting TTL
  * @is_icsk - is this an inet_connection_sock?
@@ -115,16 +115,16 @@ struct inet_sock {
 	struct ipv6_pinfo	*pinet6;
 #endif
 	/* Socket demultiplex comparisons on incoming packets. */
-	__be32			daddr;
-	__be32			rcv_saddr;
-	__be16			dport;
-	__u16			num;
-	__be32			saddr;
+	__be32			inet_daddr;
+	__be32			inet_rcv_saddr;
+	__be16			inet_dport;
+	__u16			inet_num;
+	__be32			inet_saddr;
 	__s16			uc_ttl;
 	__u16			cmsg_flags;
 	struct ip_options	*opt;
-	__be16			sport;
-	__u16			id;
+	__be16			inet_sport;
+	__u16			inet_id;
 	__u8			tos;
 	__u8			mc_ttl;
 	__u8			pmtudisc;
@@ -190,10 +190,10 @@ static inline unsigned int inet_ehashfn(struct net *net,
 static inline int inet_sk_ehashfn(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const __be32 laddr = inet->rcv_saddr;
-	const __u16 lport = inet->num;
-	const __be32 faddr = inet->daddr;
-	const __be16 fport = inet->dport;
+	const __be32 laddr = inet->inet_rcv_saddr;
+	const __u16 lport = inet->inet_num;
+	const __be32 faddr = inet->inet_daddr;
+	const __be16 fport = inet->inet_dport;
 	struct net *net = sock_net(sk);
 
 	return inet_ehashfn(net, laddr, lport, faddr, fport);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index b63b80fac567..37f3aea074a5 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -194,7 +194,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 static inline __be32 inet_rcv_saddr(const struct sock *sk)
 {
 	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
+		inet_sk(sk)->inet_rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
 }
 
 extern void inet_twsk_put(struct inet_timewait_sock *tw);
diff --git a/include/net/ip.h b/include/net/ip.h
index 2f47e5482b55..376adf47764e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -240,8 +240,8 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str
 		 * does not change, they drop every other packet in
 		 * a TCP stream using header compression.
 		 */
-		iph->id = (sk && inet_sk(sk)->daddr) ?
-					htons(inet_sk(sk)->id++) : 0;
+		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
+					htons(inet_sk(sk)->inet_id++) : 0;
 	} else
 		__ip_select_ident(iph, dst, 0);
 }
@@ -249,9 +249,9 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str
 static inline void ip_select_ident_more(struct iphdr *iph, struct dst_entry *dst, struct sock *sk, int more)
 {
 	if (iph->frag_off & htons(IP_DF)) {
-		if (sk && inet_sk(sk)->daddr) {
-			iph->id = htons(inet_sk(sk)->id);
-			inet_sk(sk)->id += 1 + more;
+		if (sk && inet_sk(sk)->inet_daddr) {
+			iph->id = htons(inet_sk(sk)->inet_id);
+			inet_sk(sk)->inet_id += 1 + more;
 		} else
 			iph->id = 0;
 	} else
@@ -317,7 +317,7 @@ static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, ch
 
 static __inline__ void inet_reset_saddr(struct sock *sk)
 {
-	inet_sk(sk)->rcv_saddr = inet_sk(sk)->saddr = 0;
+	inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	if (sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7302e1498d46..00028d4b09d9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -62,10 +62,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		nexthop = inet->opt->faddr;
 	}
 
-	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			       IPPROTO_DCCP,
-			       inet->sport, usin->sin_port, sk, 1);
+			       inet->inet_sport, usin->sin_port, sk, 1);
 	if (tmp < 0)
 		return tmp;
 
@@ -77,12 +77,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (inet->opt == NULL || !inet->opt->srr)
 		daddr = rt->rt_dst;
 
-	if (inet->saddr == 0)
-		inet->saddr = rt->rt_src;
-	inet->rcv_saddr = inet->saddr;
+	if (inet->inet_saddr == 0)
+		inet->inet_saddr = rt->rt_src;
+	inet->inet_rcv_saddr = inet->inet_saddr;
 
-	inet->dport = usin->sin_port;
-	inet->daddr = daddr;
+	inet->inet_dport = usin->sin_port;
+	inet->inet_daddr = daddr;
 
 	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt != NULL)
@@ -98,17 +98,19 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err != 0)
 		goto failure;
 
-	err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport,
-				sk);
+	err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport,
+				inet->inet_dport, sk);
 	if (err != 0)
 		goto failure;
 
 	/* OK, now commit destination to socket.  */
 	sk_setup_caps(sk, &rt->u.dst);
 
-	dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr,
-						    inet->sport, inet->dport);
-	inet->id = dp->dccps_iss ^ jiffies;
+	dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
+						    inet->inet_daddr,
+						    inet->inet_sport,
+						    inet->inet_dport);
+	inet->inet_id = dp->dccps_iss ^ jiffies;
 
 	err = dccp_connect(sk);
 	rt = NULL;
@@ -123,7 +125,7 @@ failure:
 	dccp_set_state(sk, DCCP_CLOSED);
 	ip_rt_put(rt);
 	sk->sk_route_caps = 0;
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	goto out;
 }
 
@@ -352,7 +354,9 @@ void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
 	struct dccp_hdr *dh = dccp_hdr(skb);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr);
+	dh->dccph_checksum = dccp_v4_csum_finish(skb,
+						 inet->inet_saddr,
+						 inet->inet_daddr);
 }
 
 EXPORT_SYMBOL_GPL(dccp_v4_send_check);
@@ -393,14 +397,14 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet		   = inet_sk(newsk);
 	ireq		   = inet_rsk(req);
-	newinet->daddr	   = ireq->rmt_addr;
-	newinet->rcv_saddr = ireq->loc_addr;
-	newinet->saddr	   = ireq->loc_addr;
+	newinet->inet_daddr	= ireq->rmt_addr;
+	newinet->inet_rcv_saddr = ireq->loc_addr;
+	newinet->inet_saddr	= ireq->loc_addr;
 	newinet->opt	   = ireq->opt;
 	ireq->opt	   = NULL;
 	newinet->mc_index  = inet_iif(skb);
 	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
-	newinet->id	   = jiffies;
+	newinet->inet_id   = jiffies;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index a2afb553d8b3..6d89f9f7d5d8 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -158,8 +158,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 			fl.oif = sk->sk_bound_dev_if;
-			fl.fl_ip_dport = inet->dport;
-			fl.fl_ip_sport = inet->sport;
+			fl.fl_ip_dport = inet->inet_dport;
+			fl.fl_ip_sport = inet->inet_sport;
 			security_sk_classify_flow(sk, &fl);
 
 			err = ip6_dst_lookup(sk, &dst, &fl);
@@ -510,9 +510,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
 
-		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
+		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
@@ -640,7 +640,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
-	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	__inet6_hash(newsk);
 	__inet_inherit_port(sk, newsk);
@@ -968,10 +969,9 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
 			goto failure;
-		} else {
-			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
-			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 		}
+		ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
 
 		return err;
 	}
@@ -984,7 +984,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_dport = usin->sin6_port;
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt != NULL && np->opt->srcrt != NULL) {
@@ -1017,7 +1017,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	/* set the source address */
 	ipv6_addr_copy(&np->saddr, saddr);
-	inet->rcv_saddr = LOOPBACK4_IPV6;
+	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	__ip6_dst_store(sk, dst, NULL, NULL);
 
@@ -1026,7 +1026,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
 					  np->opt->opt_nflen);
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	dccp_set_state(sk, DCCP_REQUESTING);
 	err = inet6_hash_connect(&dccp_death_row, sk);
@@ -1035,7 +1035,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
 						      np->daddr.s6_addr32,
-						      inet->sport, inet->dport);
+						      inet->inet_sport,
+						      inet->inet_dport);
 	err = dccp_connect(sk);
 	if (err)
 		goto late_failure;
@@ -1046,7 +1047,7 @@ late_failure:
 	dccp_set_state(sk, DCCP_CLOSED);
 	__sk_dst_reset(sk);
 failure:
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	sk->sk_route_caps = 0;
 	return err;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index c96119fda688..d6bb753bf6ad 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -99,8 +99,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		/* Build DCCP header and checksum it. */
 		dh = dccp_zeroed_hdr(skb, dccp_header_size);
 		dh->dccph_type	= dcb->dccpd_type;
-		dh->dccph_sport	= inet->sport;
-		dh->dccph_dport	= inet->dport;
+		dh->dccph_sport	= inet->inet_sport;
+		dh->dccph_dport	= inet->inet_dport;
 		dh->dccph_doff	= (dccp_header_size + dcb->dccpd_opt_len) / 4;
 		dh->dccph_ccval	= dcb->dccpd_ccval;
 		dh->dccph_cscov = dp->dccps_pcslen;
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 5e6ec8b9b7b6..dc328425fa20 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -80,19 +80,20 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
 		hc = ccid3_hc_tx_sk(sk);
 
-	if (port == 0 || ntohs(inet->dport) == port ||
-	    ntohs(inet->sport) == port) {
+	if (port == 0 || ntohs(inet->inet_dport) == port ||
+	    ntohs(inet->inet_sport) == port) {
 		if (hc)
 			printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n",
-			       &inet->saddr, ntohs(inet->sport),
-			       &inet->daddr, ntohs(inet->dport), size,
+			       &inet->inet_saddr, ntohs(inet->inet_sport),
+			       &inet->inet_daddr, ntohs(inet->inet_dport), size,
 			       hc->tx_s, hc->tx_rtt, hc->tx_p,
 			       hc->tx_x_calc, hc->tx_x_recv >> 6,
 			       hc->tx_x >> 6, hc->tx_t_ipi);
 		else
 			printl("%pI4:%u %pI4:%u %d\n",
-			       &inet->saddr, ntohs(inet->sport),
-			       &inet->daddr, ntohs(inet->dport), size);
+			       &inet->inet_saddr, ntohs(inet->inet_sport),
+			       &inet->inet_daddr, ntohs(inet->inet_dport),
+			       size);
 	}
 
 	jprobe_return();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ecb203fff501..671cd1413d59 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -278,7 +278,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 		sk->sk_send_head = NULL;
 	}
 
-	inet->dport = 0;
+	inet->inet_dport = 0;
 
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -290,7 +290,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 	inet_csk_delack_init(sk);
 	__sk_dst_reset(sk);
 
-	WARN_ON(inet->num && !icsk->icsk_bind_hash);
+	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1deff48b122e..04a14b1600ac 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -174,12 +174,12 @@ static int inet_autobind(struct sock *sk)
 	/* We may need to bind the socket. */
 	lock_sock(sk);
 	inet = inet_sk(sk);
-	if (!inet->num) {
+	if (!inet->inet_num) {
 		if (sk->sk_prot->get_port(sk, 0)) {
 			release_sock(sk);
 			return -EAGAIN;
 		}
-		inet->sport = htons(inet->num);
+		inet->inet_sport = htons(inet->inet_num);
 	}
 	release_sock(sk);
 	return 0;
@@ -354,7 +354,7 @@ lookup_protocol:
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
 	if (SOCK_RAW == sock->type) {
-		inet->num = protocol;
+		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
 			inet->hdrincl = 1;
 	}
@@ -364,7 +364,7 @@ lookup_protocol:
 	else
 		inet->pmtudisc = IP_PMTUDISC_WANT;
 
-	inet->id = 0;
+	inet->inet_id = 0;
 
 	sock_init_data(sock, sk);
 
@@ -381,13 +381,13 @@ lookup_protocol:
 
 	sk_refcnt_debug_inc(sk);
 
-	if (inet->num) {
+	if (inet->inet_num) {
 		/* It assumes that any protocol which allows
 		 * the user to assign a number at socket
 		 * creation time automatically
 		 * shares.
 		 */
-		inet->sport = htons(inet->num);
+		inet->inet_sport = htons(inet->inet_num);
 		/* Add to protocol hash chains. */
 		sk->sk_prot->hash(sk);
 	}
@@ -494,27 +494,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	/* Check these errors (active socket, double bind). */
 	err = -EINVAL;
-	if (sk->sk_state != TCP_CLOSE || inet->num)
+	if (sk->sk_state != TCP_CLOSE || inet->inet_num)
 		goto out_release_sock;
 
-	inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
-		inet->saddr = 0;  /* Use device */
+		inet->inet_saddr = 0;  /* Use device */
 
 	/* Make sure we are allowed to bind here. */
 	if (sk->sk_prot->get_port(sk, snum)) {
-		inet->saddr = inet->rcv_saddr = 0;
+		inet->inet_saddr = inet->inet_rcv_saddr = 0;
 		err = -EADDRINUSE;
 		goto out_release_sock;
 	}
 
-	if (inet->rcv_saddr)
+	if (inet->inet_rcv_saddr)
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-	inet->sport = htons(inet->num);
-	inet->daddr = 0;
-	inet->dport = 0;
+	inet->inet_sport = htons(inet->inet_num);
+	inet->inet_daddr = 0;
+	inet->inet_dport = 0;
 	sk_dst_reset(sk);
 	err = 0;
 out_release_sock:
@@ -532,7 +532,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 	if (uaddr->sa_family == AF_UNSPEC)
 		return sk->sk_prot->disconnect(sk, flags);
 
-	if (!inet_sk(sk)->num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
 	return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
 }
@@ -689,17 +689,17 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	sin->sin_family = AF_INET;
 	if (peer) {
-		if (!inet->dport ||
+		if (!inet->inet_dport ||
 		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
 		     peer == 1))
 			return -ENOTCONN;
-		sin->sin_port = inet->dport;
-		sin->sin_addr.s_addr = inet->daddr;
+		sin->sin_port = inet->inet_dport;
+		sin->sin_addr.s_addr = inet->inet_daddr;
 	} else {
-		__be32 addr = inet->rcv_saddr;
+		__be32 addr = inet->inet_rcv_saddr;
 		if (!addr)
-			addr = inet->saddr;
-		sin->sin_port = inet->sport;
+			addr = inet->inet_saddr;
+		sin->sin_port = inet->inet_sport;
 		sin->sin_addr.s_addr = addr;
 	}
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -714,7 +714,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	struct sock *sk = sock->sk;
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
 
 	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
@@ -728,7 +728,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 	struct sock *sk = sock->sk;
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
@@ -1059,9 +1059,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	int err;
 	struct rtable *rt;
-	__be32 old_saddr = inet->saddr;
+	__be32 old_saddr = inet->inet_saddr;
 	__be32 new_saddr;
-	__be32 daddr = inet->daddr;
+	__be32 daddr = inet->inet_daddr;
 
 	if (inet->opt && inet->opt->srr)
 		daddr = inet->opt->faddr;
@@ -1071,7 +1071,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 			       RT_CONN_FLAGS(sk),
 			       sk->sk_bound_dev_if,
 			       sk->sk_protocol,
-			       inet->sport, inet->dport, sk, 0);
+			       inet->inet_sport, inet->inet_dport, sk, 0);
 	if (err)
 		return err;
 
@@ -1087,7 +1087,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 		       __func__, &old_saddr, &new_saddr);
 	}
 
-	inet->saddr = inet->rcv_saddr = new_saddr;
+	inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
 
 	/*
 	 * XXX The only one ugly spot where we need to
@@ -1113,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 		return 0;
 
 	/* Reroute. */
-	daddr = inet->daddr;
+	daddr = inet->inet_daddr;
 	if (inet->opt && inet->opt->srr)
 		daddr = inet->opt->faddr;
 {
@@ -1123,7 +1123,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 		.nl_u = {
 			.ip4_u = {
 				.daddr	= daddr,
-				.saddr	= inet->saddr,
+				.saddr	= inet->inet_saddr,
 				.tos	= RT_CONN_FLAGS(sk),
 			},
 		},
@@ -1131,8 +1131,8 @@ int inet_sk_rebuild_header(struct sock *sk)
 		.flags = inet_sk_flowi_flags(sk),
 		.uli_u = {
 			.ports = {
-				.sport = inet->sport,
-				.dport = inet->dport,
+				.sport = inet->inet_sport,
+				.dport = inet->inet_dport,
 			},
 		},
 	};
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 5e6c5a0f3fde..fb2465811b48 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_dst_reset(sk);
 
 	oif = sk->sk_bound_dev_if;
-	saddr = inet->saddr;
+	saddr = inet->inet_saddr;
 	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
 		if (!oif)
 			oif = inet->mc_index;
@@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
 			       RT_CONN_FLAGS(sk), oif,
 			       sk->sk_protocol,
-			       inet->sport, usin->sin_port, sk, 1);
+			       inet->inet_sport, usin->sin_port, sk, 1);
 	if (err) {
 		if (err == -ENETUNREACH)
 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		ip_rt_put(rt);
 		return -EACCES;
 	}
-	if (!inet->saddr)
-		inet->saddr = rt->rt_src;	/* Update source address */
-	if (!inet->rcv_saddr)
-		inet->rcv_saddr = rt->rt_src;
-	inet->daddr = rt->rt_dst;
-	inet->dport = usin->sin_port;
+	if (!inet->inet_saddr)
+		inet->inet_saddr = rt->rt_src;	/* Update source address */
+	if (!inet->inet_rcv_saddr)
+		inet->inet_rcv_saddr = rt->rt_src;
+	inet->inet_daddr = rt->rt_dst;
+	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
-	inet->id = jiffies;
+	inet->inet_id = jiffies;
 
 	sk_dst_set(sk, &rt->u.dst);
 	return(0);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 9139e8f6fdb1..f6a0af759932 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -368,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 			    .proto = sk->sk_protocol,
 			    .flags = inet_sk_flowi_flags(sk),
 			    .uli_u = { .ports =
-				       { .sport = inet_sk(sk)->sport,
+				       { .sport = inet_sk(sk)->inet_sport,
 					 .dport = ireq->rmt_port } } };
 	struct net *net = sock_net(sk);
 
@@ -547,9 +547,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 		newsk->sk_state = TCP_SYN_RECV;
 		newicsk->icsk_bind_hash = NULL;
 
-		inet_sk(newsk)->dport = inet_rsk(req)->rmt_port;
-		inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port);
-		inet_sk(newsk)->sport = inet_rsk(req)->loc_port;
+		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
+		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
+		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newicsk->icsk_retransmits = 0;
@@ -580,8 +580,8 @@ void inet_csk_destroy_sock(struct sock *sk)
 	/* It cannot be in hash table! */
 	WARN_ON(!sk_unhashed(sk));
 
-	/* If it has not 0 inet_sk(sk)->num, it must be bound */
-	WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash);
+	/* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
+	WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
 
 	sk->sk_prot->destroy(sk);
 
@@ -616,8 +616,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 	 * after validation is complete.
 	 */
 	sk->sk_state = TCP_LISTEN;
-	if (!sk->sk_prot->get_port(sk, inet->num)) {
-		inet->sport = htons(inet->num);
+	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
+		inet->inet_sport = htons(inet->inet_num);
 
 		sk_dst_reset(sk);
 		sk->sk_prot->hash(sk);
@@ -693,8 +693,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
 	const struct inet_sock *inet = inet_sk(sk);
 
 	sin->sin_family		= AF_INET;
-	sin->sin_addr.s_addr	= inet->daddr;
-	sin->sin_port		= inet->dport;
+	sin->sin_addr.s_addr	= inet->inet_daddr;
+	sin->sin_port		= inet->inet_dport;
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cb73fdefba91..bdb78dd180ce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -116,10 +116,10 @@ static int inet_csk_diag_fill(struct sock *sk,
 	r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
 	r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
 
-	r->id.idiag_sport = inet->sport;
-	r->id.idiag_dport = inet->dport;
-	r->id.idiag_src[0] = inet->rcv_saddr;
-	r->id.idiag_dst[0] = inet->daddr;
+	r->id.idiag_sport = inet->inet_sport;
+	r->id.idiag_dport = inet->inet_dport;
+	r->id.idiag_src[0] = inet->inet_rcv_saddr;
+	r->id.idiag_dst[0] = inet->inet_daddr;
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
@@ -504,11 +504,11 @@ static int inet_csk_diag_dump(struct sock *sk,
 		} else
 #endif
 		{
-			entry.saddr = &inet->rcv_saddr;
-			entry.daddr = &inet->daddr;
+			entry.saddr = &inet->inet_rcv_saddr;
+			entry.daddr = &inet->inet_daddr;
 		}
-		entry.sport = inet->num;
-		entry.dport = ntohs(inet->dport);
+		entry.sport = inet->inet_num;
+		entry.dport = ntohs(inet->inet_dport);
 		entry.userlocks = sk->sk_userlocks;
 
 		if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
@@ -584,7 +584,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	if (tmo < 0)
 		tmo = 0;
 
-	r->id.idiag_sport = inet->sport;
+	r->id.idiag_sport = inet->inet_sport;
 	r->id.idiag_dport = ireq->rmt_port;
 	r->id.idiag_src[0] = ireq->loc_addr;
 	r->id.idiag_dst[0] = ireq->rmt_addr;
@@ -639,7 +639,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
 		bc = (struct rtattr *)(r + 1);
-		entry.sport = inet->num;
+		entry.sport = inet->inet_num;
 		entry.userlocks = sk->sk_userlocks;
 	}
 
@@ -732,7 +732,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 					continue;
 				}
 
-				if (r->id.idiag_sport != inet->sport &&
+				if (r->id.idiag_sport != inet->inet_sport &&
 				    r->id.idiag_sport)
 					goto next_listen;
 
@@ -797,10 +797,10 @@ skip_listen_ht:
 				goto next_normal;
 			if (!(r->idiag_states & (1 << sk->sk_state)))
 				goto next_normal;
-			if (r->id.idiag_sport != inet->sport &&
+			if (r->id.idiag_sport != inet->inet_sport &&
 			    r->id.idiag_sport)
 				goto next_normal;
-			if (r->id.idiag_dport != inet->dport &&
+			if (r->id.idiag_dport != inet->inet_dport &&
 			    r->id.idiag_dport)
 				goto next_normal;
 			if (inet_csk_diag_dump(sk, skb, cb) < 0) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a45aaf3d48b1..47ad7aab51e3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 
 	atomic_inc(&hashinfo->bsockets);
 
-	inet_sk(sk)->num = snum;
+	inet_sk(sk)->inet_num = snum;
 	sk_add_bind_node(sk, &tb->owners);
 	tb->num_owners++;
 	inet_csk(sk)->icsk_bind_hash = tb;
@@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 static void __inet_put_port(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
+	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
 			hashinfo->bhash_size);
 	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
 	struct inet_bind_bucket *tb;
@@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk)
 	__sk_del_bind_node(sk);
 	tb->num_owners--;
 	inet_csk(sk)->icsk_bind_hash = NULL;
-	inet_sk(sk)->num = 0;
+	inet_sk(sk)->inet_num = 0;
 	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
 	spin_unlock(&head->lock);
 }
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port);
 void __inet_inherit_port(struct sock *sk, struct sock *child)
 {
 	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
-	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
+	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
 			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
 	struct inet_bind_bucket *tb;
@@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	int score = -1;
 	struct inet_sock *inet = inet_sk(sk);
 
-	if (net_eq(sock_net(sk), net) && inet->num == hnum &&
+	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
-		__be32 rcv_saddr = inet->rcv_saddr;
+		__be32 rcv_saddr = inet->inet_rcv_saddr;
 		score = sk->sk_family == PF_INET ? 1 : 0;
 		if (rcv_saddr) {
 			if (rcv_saddr != daddr)
@@ -273,13 +273,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
-	__be32 daddr = inet->rcv_saddr;
-	__be32 saddr = inet->daddr;
+	__be32 daddr = inet->inet_rcv_saddr;
+	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
-	unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
+	unsigned int hash = inet_ehashfn(net, daddr, lport,
+					 saddr, inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
@@ -312,8 +313,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 unique:
 	/* Must record num and sport now. Otherwise we will see
 	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
+	inet->inet_num = lport;
+	inet->inet_sport = htons(lport);
 	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
@@ -341,8 +342,9 @@ not_unique:
 static inline u32 inet_sk_port_offset(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr,
-					  inet->dport);
+	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
+					  inet->inet_daddr,
+					  inet->inet_dport);
 }
 
 void __inet_hash_nolisten(struct sock *sk)
@@ -424,7 +426,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		void (*hash)(struct sock *sk))
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
-	const unsigned short snum = inet_sk(sk)->num;
+	const unsigned short snum = inet_sk(sk)->inet_num;
 	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 	int ret;
@@ -485,7 +487,7 @@ ok:
 		/* Head lock still held and bh's disabled */
 		inet_bind_hash(sk, tb, port);
 		if (sk_unhashed(sk)) {
-			inet_sk(sk)->sport = htons(port);
+			inet_sk(sk)->inet_sport = htons(port);
 			hash(sk);
 		}
 		spin_unlock(&head->lock);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2fe571155b22..1f5d508bb18b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
+	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
 			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
@@ -124,14 +124,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		kmemcheck_annotate_bitfield(tw, flags);
 
 		/* Give us an identity. */
-		tw->tw_daddr	    = inet->daddr;
-		tw->tw_rcv_saddr    = inet->rcv_saddr;
+		tw->tw_daddr	    = inet->inet_daddr;
+		tw->tw_rcv_saddr    = inet->inet_rcv_saddr;
 		tw->tw_bound_dev_if = sk->sk_bound_dev_if;
-		tw->tw_num	    = inet->num;
+		tw->tw_num	    = inet->inet_num;
 		tw->tw_state	    = TCP_TIME_WAIT;
 		tw->tw_substate	    = state;
-		tw->tw_sport	    = inet->sport;
-		tw->tw_dport	    = inet->dport;
+		tw->tw_sport	    = inet->inet_sport;
+		tw->tw_dport	    = inet->inet_dport;
 		tw->tw_family	    = sk->sk_family;
 		tw->tw_reuse	    = sk->sk_reuse;
 		tw->tw_hash	    = sk->sk_hash;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 6c98b43badf4..fdf51badc8e5 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -161,7 +161,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		/* If socket is bound to an interface, only report
 		 * the packet if it came  from that interface.
 		 */
-		if (sk && inet_sk(sk)->num == protocol &&
+		if (sk && inet_sk(sk)->inet_num == protocol &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == dev->ifindex) &&
 		    sock_net(sk) == dev_net(dev)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f9895180f481..322b40864ac0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -329,7 +329,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 		__be32 daddr;
 
 		/* Use correct destination address if we have options. */
-		daddr = inet->daddr;
+		daddr = inet->inet_daddr;
 		if(opt && opt->srr)
 			daddr = opt->faddr;
 
@@ -338,13 +338,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 					    .mark = sk->sk_mark,
 					    .nl_u = { .ip4_u =
 						      { .daddr = daddr,
-							.saddr = inet->saddr,
+							.saddr = inet->inet_saddr,
 							.tos = RT_CONN_FLAGS(sk) } },
 					    .proto = sk->sk_protocol,
 					    .flags = inet_sk_flowi_flags(sk),
 					    .uli_u = { .ports =
-						       { .sport = inet->sport,
-							 .dport = inet->dport } } };
+						       { .sport = inet->inet_sport,
+							 .dport = inet->inet_dport } } };
 
 			/* If this fails, retransmit mechanism of transport layer will
 			 * keep trying until route appears or the connection times
@@ -379,7 +379,7 @@ packet_routed:
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen >> 2;
-		ip_options_build(skb, opt, inet->daddr, rt, 0);
+		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
 	}
 
 	ip_select_ident_more(iph, &rt->u.dst, sk,
@@ -846,7 +846,8 @@ int ip_append_data(struct sock *sk,
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 
 	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
+			       mtu-exthdrlen);
 		return -EMSGSIZE;
 	}
 
@@ -1100,7 +1101,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 
 	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
 		return -EMSGSIZE;
 	}
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0c0b6e363a20..c602d985fe14 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -245,7 +245,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 {
 	struct ip_ra_chain *ra, *new_ra, **rap;
 
-	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW)
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 		return -EINVAL;
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -492,7 +492,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			if (sk->sk_family == PF_INET ||
 			    (!((1 << sk->sk_state) &
 			       (TCPF_LISTEN | TCPF_CLOSE)) &&
-			     inet->daddr != LOOPBACK4_IPV6)) {
+			     inet->inet_daddr != LOOPBACK4_IPV6)) {
 #endif
 				if (inet->opt)
 					icsk->icsk_ext_hdr_len -= inet->opt->optlen;
@@ -1181,8 +1181,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
 			struct in_pktinfo info;
 
-			info.ipi_addr.s_addr = inet->rcv_saddr;
-			info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+			info.ipi_addr.s_addr = inet->inet_rcv_saddr;
+			info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
 			info.ipi_ifindex = inet->mc_index;
 			put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c757f0b4b74c..694974502ea6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -956,7 +956,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	switch (optname) {
 	case MRT_INIT:
 		if (sk->sk_type != SOCK_RAW ||
-		    inet_sk(sk)->num != IPPROTO_IGMP)
+		    inet_sk(sk)->inet_num != IPPROTO_IGMP)
 			return -EOPNOTSUPP;
 		if (optlen != sizeof(int))
 			return -ENOPROTOOPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index aa95bb82ee6c..9cd423ffafa8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -255,10 +255,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	struct nf_conntrack_tuple tuple;
 
 	memset(&tuple, 0, sizeof(tuple));
-	tuple.src.u3.ip = inet->rcv_saddr;
-	tuple.src.u.tcp.port = inet->sport;
-	tuple.dst.u3.ip = inet->daddr;
-	tuple.dst.u.tcp.port = inet->dport;
+	tuple.src.u3.ip = inet->inet_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.ip = inet->inet_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
 	tuple.src.l3num = PF_INET;
 	tuple.dst.protonum = sk->sk_protocol;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 39e2a6b8752c..9ef8c0829a77 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk)
 	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
 	struct hlist_head *head;
 
-	head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
+	head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
 
 	write_lock_bh(&h->lock);
 	sk_add_node(sk, head);
@@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 	sk_for_each_from(sk, node) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (net_eq(sock_net(sk), net) && inet->num == num	&&
-		    !(inet->daddr && inet->daddr != raddr) 		&&
-		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
+		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
+		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
+		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
 		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
 			goto found; /* gotcha */
 	}
@@ -326,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	int err;
 
 	if (length > rt->u.dst.dev->mtu) {
-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
 			       rt->u.dst.dev->mtu);
 		return -EMSGSIZE;
 	}
@@ -489,10 +489,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		err = -EDESTADDRREQ;
 		if (sk->sk_state != TCP_ESTABLISHED)
 			goto out;
-		daddr = inet->daddr;
+		daddr = inet->inet_daddr;
 	}
 
-	ipc.addr = inet->saddr;
+	ipc.addr = inet->inet_saddr;
 	ipc.opt = NULL;
 	ipc.shtx.flags = 0;
 	ipc.oif = sk->sk_bound_dev_if;
@@ -634,9 +634,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
 		goto out;
-	inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
-		inet->saddr = 0;  /* Use device */
+		inet->inet_saddr = 0;  /* Use device */
 	sk_dst_reset(sk);
 	ret = 0;
 out:	return ret;
@@ -706,7 +706,7 @@ static int raw_init(struct sock *sk)
 {
 	struct raw_sock *rp = raw_sk(sk);
 
-	if (inet_sk(sk)->num == IPPROTO_ICMP)
+	if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
 		memset(&rp->filter, 0, sizeof(rp->filter));
 	return 0;
 }
@@ -743,7 +743,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, unsigned int optlen)
 {
 	if (optname == ICMP_FILTER) {
-		if (inet_sk(sk)->num != IPPROTO_ICMP)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
 			return -EOPNOTSUPP;
 		else
 			return raw_seticmpfilter(sk, optval, optlen);
@@ -773,7 +773,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
 	if (optname == ICMP_FILTER) {
-		if (inet_sk(sk)->num != IPPROTO_ICMP)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
 			return -EOPNOTSUPP;
 		else
 			return raw_geticmpfilter(sk, optval, optlen);
@@ -932,10 +932,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop);
 static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 {
 	struct inet_sock *inet = inet_sk(sp);
-	__be32 dest = inet->daddr,
-	       src = inet->rcv_saddr;
+	__be32 dest = inet->inet_daddr,
+	       src = inet->inet_rcv_saddr;
 	__u16 destp = 0,
-	      srcp  = inet->num;
+	      srcp  = inet->inet_num;
 
 	seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf13726259cd..206a291dff03 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1998,7 +1998,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	__skb_queue_purge(&sk->sk_async_wait_queue);
 #endif
 
-	inet->dport = 0;
+	inet->inet_dport = 0;
 
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -2022,7 +2022,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 
-	WARN_ON(inet->num && !icsk->icsk_bind_hash);
+	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 99718703d040..a4a3390a5287 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		nexthop = inet->opt->faddr;
 	}
 
-	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			       IPPROTO_TCP,
-			       inet->sport, usin->sin_port, sk, 1);
+			       inet->inet_sport, usin->sin_port, sk, 1);
 	if (tmp < 0) {
 		if (tmp == -ENETUNREACH)
 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (!inet->opt || !inet->opt->srr)
 		daddr = rt->rt_dst;
 
-	if (!inet->saddr)
-		inet->saddr = rt->rt_src;
-	inet->rcv_saddr = inet->saddr;
+	if (!inet->inet_saddr)
+		inet->inet_saddr = rt->rt_src;
+	inet->inet_rcv_saddr = inet->inet_saddr;
 
-	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 		/* Reset inherited state */
 		tp->rx_opt.ts_recent	   = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
@@ -210,8 +210,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		}
 	}
 
-	inet->dport = usin->sin_port;
-	inet->daddr = daddr;
+	inet->inet_dport = usin->sin_port;
+	inet->inet_daddr = daddr;
 
 	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt)
@@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto failure;
 
 	err = ip_route_newports(&rt, IPPROTO_TCP,
-				inet->sport, inet->dport, sk);
+				inet->inet_sport, inet->inet_dport, sk);
 	if (err)
 		goto failure;
 
@@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_setup_caps(sk, &rt->u.dst);
 
 	if (!tp->write_seq)
-		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
-							   inet->daddr,
-							   inet->sport,
+		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
+							   inet->inet_daddr,
+							   inet->inet_sport,
 							   usin->sin_port);
 
-	inet->id = tp->write_seq ^ jiffies;
+	inet->inet_id = tp->write_seq ^ jiffies;
 
 	err = tcp_connect(sk);
 	rt = NULL;
@@ -261,7 +261,7 @@ failure:
 	tcp_set_state(sk, TCP_CLOSE);
 	ip_rt_put(rt);
 	sk->sk_route_caps = 0;
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	return err;
 }
 
@@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(len, inet->saddr,
-					  inet->daddr, 0);
+		th->check = ~tcp_v4_check(len, inet->inet_saddr,
+					  inet->inet_daddr, 0);
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
+		th->check = tcp_v4_check(len, inet->inet_saddr,
+					 inet->inet_daddr,
 					 csum_partial(th,
 						      th->doff << 2,
 						      skb->csum));
@@ -848,7 +849,7 @@ static struct tcp_md5sig_key *
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 					 struct sock *addr_sk)
 {
-	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
+	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 }
 
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
@@ -923,7 +924,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add);
 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 			       u8 *newkey, u8 newkeylen)
 {
-	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
+	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 				 newkey, newkeylen);
 }
 
@@ -1089,8 +1090,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 	__be32 saddr, daddr;
 
 	if (sk) {
-		saddr = inet_sk(sk)->saddr;
-		daddr = inet_sk(sk)->daddr;
+		saddr = inet_sk(sk)->inet_saddr;
+		daddr = inet_sk(sk)->inet_daddr;
 	} else if (req) {
 		saddr = inet_rsk(req)->loc_addr;
 		daddr = inet_rsk(req)->rmt_addr;
@@ -1380,9 +1381,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
 	ireq		      = inet_rsk(req);
-	newinet->daddr	      = ireq->rmt_addr;
-	newinet->rcv_saddr    = ireq->loc_addr;
-	newinet->saddr	      = ireq->loc_addr;
+	newinet->inet_daddr   = ireq->rmt_addr;
+	newinet->inet_rcv_saddr = ireq->loc_addr;
+	newinet->inet_saddr	      = ireq->loc_addr;
 	newinet->opt	      = ireq->opt;
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
@@ -1390,7 +1391,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
-	newinet->id = newtp->write_seq ^ jiffies;
+	newinet->inet_id = newtp->write_seq ^ jiffies;
 
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1403,7 +1404,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
-	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
+	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
+	if (key != NULL) {
 		/*
 		 * We're using one, so create a matching key
 		 * on the newsk structure. If we fail to get
@@ -1412,7 +1414,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 */
 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
 		if (newkey != NULL)
-			tcp_v4_md5_do_add(newsk, newinet->daddr,
+			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
 					  newkey, key->keylen);
 		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 	}
@@ -1711,8 +1713,8 @@ int tcp_v4_remember_stamp(struct sock *sk)
 	struct inet_peer *peer = NULL;
 	int release_it = 0;
 
-	if (!rt || rt->rt_dst != inet->daddr) {
-		peer = inet_getpeer(inet->daddr, 1);
+	if (!rt || rt->rt_dst != inet->inet_daddr) {
+		peer = inet_getpeer(inet->inet_daddr, 1);
 		release_it = 1;
 	} else {
 		if (!rt->peer)
@@ -2225,7 +2227,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
 		i,
 		ireq->loc_addr,
-		ntohs(inet_sk(sk)->sport),
+		ntohs(inet_sk(sk)->inet_sport),
 		ireq->rmt_addr,
 		ntohs(ireq->rmt_port),
 		TCP_SYN_RECV,
@@ -2248,10 +2250,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet = inet_sk(sk);
-	__be32 dest = inet->daddr;
-	__be32 src = inet->rcv_saddr;
-	__u16 destp = ntohs(inet->dport);
-	__u16 srcp = ntohs(inet->sport);
+	__be32 dest = inet->inet_daddr;
+	__be32 src = inet->inet_rcv_saddr;
+	__u16 destp = ntohs(inet->inet_dport);
+	__u16 srcp = ntohs(inet->inet_sport);
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
 		timer_active	= 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a7080e..2e2eb74ac4cc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -661,8 +661,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	/* Build TCP header and checksum it. */
 	th = tcp_hdr(skb);
-	th->source		= inet->sport;
-	th->dest		= inet->dport;
+	th->source		= inet->inet_sport;
+	th->dest		= inet->inet_dport;
 	th->seq			= htonl(tcb->seq);
 	th->ack_seq		= htonl(tp->rcv_nxt);
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 59f5b5e7c566..7a3cc2ffad84 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -94,7 +94,8 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	const struct inet_sock *inet = inet_sk(sk);
 
 	/* Only update if port matches */
-	if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
+	if ((port == 0 || ntohs(inet->inet_dport) == port ||
+	     ntohs(inet->inet_sport) == port)
 	    && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
 
 		spin_lock(&tcp_probe.lock);
@@ -103,10 +104,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			struct tcp_log *p = tcp_probe.log + tcp_probe.head;
 
 			p->tstamp = ktime_get();
-			p->saddr = inet->saddr;
-			p->sport = inet->sport;
-			p->daddr = inet->daddr;
-			p->dport = inet->dport;
+			p->saddr = inet->inet_saddr;
+			p->sport = inet->inet_sport;
+			p->daddr = inet->inet_daddr;
+			p->dport = inet->inet_dport;
 			p->length = skb->len;
 			p->snd_nxt = tp->snd_nxt;
 			p->snd_una = tp->snd_una;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cdb2ca7684d4..6e8996cb79d0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -303,15 +303,15 @@ void tcp_retransmit_timer(struct sock *sk)
 		struct inet_sock *inet = inet_sk(sk);
 		if (sk->sk_family == AF_INET) {
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
-			       &inet->daddr, ntohs(inet->dport),
-			       inet->num, tp->snd_una, tp->snd_nxt);
+			       &inet->inet_daddr, ntohs(inet->inet_dport),
+			       inet->inet_num, tp->snd_una, tp->snd_nxt);
 		}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
-			       &np->daddr, ntohs(inet->dport),
-			       inet->num, tp->snd_una, tp->snd_nxt);
+			       &np->daddr, ntohs(inet->inet_dport),
+			       inet->inet_num, tp->snd_una, tp->snd_nxt);
 		}
 #endif
 #endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 45a8a7e374d8..ec5c7a720c0c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -214,7 +214,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 			goto fail_unlock;
 	}
 found:
-	inet_sk(sk)->num = snum;
+	inet_sk(sk)->inet_num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
 		sk_nulls_add_node_rcu(sk, &hslot->head);
@@ -233,8 +233,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 
 	return 	(!ipv6_only_sock(sk2)  &&
-		 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
-		   inet1->rcv_saddr == inet2->rcv_saddr));
+		 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
+		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
 }
 
 int udp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -253,18 +253,18 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 		struct inet_sock *inet = inet_sk(sk);
 
 		score = (sk->sk_family == PF_INET ? 1 : 0);
-		if (inet->rcv_saddr) {
-			if (inet->rcv_saddr != daddr)
+		if (inet->inet_rcv_saddr) {
+			if (inet->inet_rcv_saddr != daddr)
 				return -1;
 			score += 2;
 		}
-		if (inet->daddr) {
-			if (inet->daddr != saddr)
+		if (inet->inet_daddr) {
+			if (inet->inet_daddr != saddr)
 				return -1;
 			score += 2;
 		}
-		if (inet->dport) {
-			if (inet->dport != sport)
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
 				return -1;
 			score += 2;
 		}
@@ -360,9 +360,10 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
 
 		if (!net_eq(sock_net(s), net)				||
 		    s->sk_hash != hnum					||
-		    (inet->daddr && inet->daddr != rmt_addr)		||
-		    (inet->dport != rmt_port && inet->dport)		||
-		    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)	||
+		    (inet->inet_daddr && inet->inet_daddr != rmt_addr)	||
+		    (inet->inet_dport != rmt_port && inet->inet_dport)	||
+		    (inet->inet_rcv_saddr	&&
+		     inet->inet_rcv_saddr != loc_addr)			||
 		    ipv6_only_sock(s)					||
 		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
 			continue;
@@ -646,14 +647,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = inet->daddr;
-		dport = inet->dport;
+		daddr = inet->inet_daddr;
+		dport = inet->inet_dport;
 		/* Open fast path for connected socket.
 		   Route will not be used, if at least one option is set.
 		 */
 		connected = 1;
 	}
-	ipc.addr = inet->saddr;
+	ipc.addr = inet->inet_saddr;
 
 	ipc.oif = sk->sk_bound_dev_if;
 	err = sock_tx_timestamp(msg, sk, &ipc.shtx);
@@ -708,7 +709,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .proto = sk->sk_protocol,
 				    .flags = inet_sk_flowi_flags(sk),
 				    .uli_u = { .ports =
-					       { .sport = inet->sport,
+					       { .sport = inet->inet_sport,
 						 .dport = dport } } };
 		struct net *net = sock_net(sk);
 
@@ -752,7 +753,7 @@ back_from_confirm:
 	inet->cork.fl.fl4_dst = daddr;
 	inet->cork.fl.fl_ip_dport = dport;
 	inet->cork.fl.fl4_src = saddr;
-	inet->cork.fl.fl_ip_sport = inet->sport;
+	inet->cork.fl.fl_ip_sport = inet->inet_sport;
 	up->pending = AF_INET;
 
 do_append_data:
@@ -1029,15 +1030,15 @@ int udp_disconnect(struct sock *sk, int flags)
 	 */
 
 	sk->sk_state = TCP_CLOSE;
-	inet->daddr = 0;
-	inet->dport = 0;
+	inet->inet_daddr = 0;
+	inet->inet_dport = 0;
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
 
 	if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
 		sk->sk_prot->unhash(sk);
-		inet->sport = 0;
+		inet->inet_sport = 0;
 	}
 	sk_dst_reset(sk);
 	return 0;
@@ -1053,7 +1054,7 @@ void udp_lib_unhash(struct sock *sk)
 
 		spin_lock_bh(&hslot->lock);
 		if (sk_nulls_del_node_init_rcu(sk)) {
-			inet_sk(sk)->num = 0;
+			inet_sk(sk)->inet_num = 0;
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 		}
 		spin_unlock_bh(&hslot->lock);
@@ -1752,10 +1753,10 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 		int bucket, int *len)
 {
 	struct inet_sock *inet = inet_sk(sp);
-	__be32 dest = inet->daddr;
-	__be32 src  = inet->rcv_saddr;
-	__u16 destp	  = ntohs(inet->dport);
-	__u16 srcp	  = ntohs(inet->sport);
+	__be32 dest = inet->inet_daddr;
+	__be32 src  = inet->inet_rcv_saddr;
+	__u16 destp	  = ntohs(inet->inet_dport);
+	__u16 srcp	  = ntohs(inet->inet_sport);
 
 	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94216519873c..b6d058818673 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -185,7 +185,7 @@ lookup_protocol:
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
 	if (SOCK_RAW == sock->type) {
-		inet->num = protocol;
+		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
 			inet->hdrincl = 1;
 	}
@@ -228,12 +228,12 @@ lookup_protocol:
 	 */
 	sk_refcnt_debug_inc(sk);
 
-	if (inet->num) {
+	if (inet->inet_num) {
 		/* It assumes that any protocol which allows
 		 * the user to assign a number at socket
 		 * creation time automatically shares.
 		 */
-		inet->sport = htons(inet->num);
+		inet->inet_sport = htons(inet->inet_num);
 		sk->sk_prot->hash(sk);
 	}
 	if (sk->sk_prot->init) {
@@ -281,7 +281,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
-	if (sk->sk_state != TCP_CLOSE || inet->num) {
+	if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -353,8 +353,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		}
 	}
 
-	inet->rcv_saddr = v4addr;
-	inet->saddr = v4addr;
+	inet->inet_rcv_saddr = v4addr;
+	inet->inet_saddr = v4addr;
 
 	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
 
@@ -375,9 +375,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	}
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-	inet->sport = htons(inet->num);
-	inet->dport = 0;
-	inet->daddr = 0;
+	inet->inet_sport = htons(inet->inet_num);
+	inet->inet_dport = 0;
+	inet->inet_daddr = 0;
 out:
 	release_sock(sk);
 	return err;
@@ -441,12 +441,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	sin->sin6_flowinfo = 0;
 	sin->sin6_scope_id = 0;
 	if (peer) {
-		if (!inet->dport)
+		if (!inet->inet_dport)
 			return -ENOTCONN;
 		if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
 		    peer == 1)
 			return -ENOTCONN;
-		sin->sin6_port = inet->dport;
+		sin->sin6_port = inet->inet_dport;
 		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
@@ -456,7 +456,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		else
 			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
 
-		sin->sin6_port = inet->sport;
+		sin->sin6_port = inet->inet_sport;
 	}
 	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 		sin->sin6_scope_id = sk->sk_bound_dev_if;
@@ -655,8 +655,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl6_flowlabel = np->flow_label;
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
+		fl.fl_ip_dport = inet->inet_dport;
+		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index dbfec7147aa5..9f70452a69e7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -98,13 +98,14 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set_v4mapped(inet->daddr, &np->daddr);
+		ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
 
 		if (ipv6_addr_any(&np->saddr))
-			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
 		if (ipv6_addr_any(&np->rcv_saddr))
-			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &np->rcv_saddr);
 
 		goto out;
 	}
@@ -133,7 +134,7 @@ ipv4_connected:
 	ipv6_addr_copy(&np->daddr, daddr);
 	np->flow_label = fl.fl6_flowlabel;
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	/*
 	 *	Check for a route to destination an obtain the
@@ -145,8 +146,8 @@ ipv4_connected:
 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
-	fl.fl_ip_dport = inet->dport;
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->inet_dport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
@@ -188,7 +189,7 @@ ipv4_connected:
 
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
-		inet->rcv_saddr = LOOPBACK4_IPV6;
+		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 	}
 
 	ip6_dst_store(sk, dst,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index a9f4a21b31ea..19dceef4fcca 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -132,7 +132,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 
 	sin6->sin6_family = AF_INET6;
 	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
+	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
 	sin6->sin6_scope_id = 0;
@@ -195,8 +195,8 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
+	fl.fl_ip_sport = inet->inet_sport;
+	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 874aed86e1a2..00c6a3e6cddf 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,7 +125,7 @@ static int inline compute_score(struct sock *sk, struct net *net,
 {
 	int score = -1;
 
-	if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
+	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -214,10 +214,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &np->rcv_saddr;
 	const struct in6_addr *saddr = &np->daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
-						inet->dport);
+						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
@@ -248,8 +248,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 unique:
 	/* Must record num and sport now. Otherwise we will see
 	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
+	inet->inet_num = lport;
+	inet->inet_sport = htons(lport);
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
 	sk->sk_hash = hash;
@@ -279,7 +279,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
 	const struct ipv6_pinfo *np = inet6_sk(sk);
 	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 					  np->daddr.s6_addr32,
-					  inet->dport);
+					  inet->inet_dport);
 }
 
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 716153941fc4..85849b4f5a36 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1297,7 +1297,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	switch (optname) {
 	case MRT6_INIT:
 		if (sk->sk_type != SOCK_RAW ||
-		    inet_sk(sk)->num != IPPROTO_ICMPV6)
+		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		if (optlen < sizeof(int))
 			return -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index dc0f7366073d..39e10ac88019 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -64,7 +64,7 @@ int ip6_ra_control(struct sock *sk, int sel)
 	struct ip6_ra_chain *ra, *new_ra, **rap;
 
 	/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
-	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
 		return -ENOPROTOOPT;
 
 	new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -106,7 +106,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 	if (inet_sk(sk)->is_icsk) {
 		if (opt &&
 		    !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
-		    inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+		    inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
 			struct inet_connection_sock *icsk = inet_csk(sk);
 			icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
 			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -234,7 +234,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
 	case IPV6_V6ONLY:
 		if (optlen < sizeof(int) ||
-		    inet_sk(sk)->num)
+		    inet_sk(sk)->inet_num)
 			goto e_inval;
 		np->ipv6only = valbool;
 		retv = 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fd737efed96c..52ed7d7f9dab 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 	int is_multicast = ipv6_addr_is_multicast(loc_addr);
 
 	sk_for_each_from(sk, node)
-		if (inet_sk(sk)->num == num) {
+		if (inet_sk(sk)->inet_num == num) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			if (!net_eq(sock_net(sk), net))
@@ -298,7 +298,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			dev_put(dev);
 	}
 
-	inet->rcv_saddr = inet->saddr = v4addr;
+	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
 	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
@@ -415,14 +415,14 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 				   skb_network_header_len(skb));
 		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 				     &ipv6_hdr(skb)->daddr,
-				     skb->len, inet->num, skb->csum))
+				     skb->len, inet->inet_num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len,
-							 inet->num, 0));
+							 inet->inet_num, 0));
 
 	if (inet->hdrincl) {
 		if (skb_checksum_complete(skb)) {
@@ -765,8 +765,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		proto = ntohs(sin6->sin6_port);
 
 		if (!proto)
-			proto = inet->num;
-		else if (proto != inet->num)
+			proto = inet->inet_num;
+		else if (proto != inet->inet_num)
 			return(-EINVAL);
 
 		if (proto > 255)
@@ -799,7 +799,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		proto = inet->num;
+		proto = inet->inet_num;
 		daddr = &np->daddr;
 		fl.fl6_flowlabel = np->flow_label;
 	}
@@ -966,7 +966,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 
 	switch (optname) {
 		case IPV6_CHECKSUM:
-			if (inet_sk(sk)->num == IPPROTO_ICMPV6 &&
+			if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
 			    level == IPPROTO_IPV6) {
 				/*
 				 * RFC3542 tells that IPV6_CHECKSUM socket
@@ -1006,7 +1006,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 				return -EOPNOTSUPP;
 			return rawv6_seticmpfilter(sk, level, optname, optval,
 						   optlen);
@@ -1029,7 +1029,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 	case SOL_RAW:
 		break;
 	case SOL_ICMPV6:
-		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
 	case SOL_IPV6:
@@ -1086,7 +1086,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 				return -EOPNOTSUPP;
 			return rawv6_geticmpfilter(sk, level, optname, optval,
 						   optlen);
@@ -1109,7 +1109,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 	case SOL_RAW:
 		break;
 	case SOL_ICMPV6:
-		if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
 			return -EOPNOTSUPP;
 		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
 	case SOL_IPV6:
@@ -1156,7 +1156,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static void rawv6_close(struct sock *sk, long timeout)
 {
-	if (inet_sk(sk)->num == IPPROTO_RAW)
+	if (inet_sk(sk)->inet_num == IPPROTO_RAW)
 		ip6_ra_control(sk, -1);
 	ip6mr_sk_done(sk);
 	sk_common_release(sk);
@@ -1175,7 +1175,7 @@ static int rawv6_init_sk(struct sock *sk)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 
-	switch (inet_sk(sk)->num) {
+	switch (inet_sk(sk)->inet_num) {
 	case IPPROTO_ICMPV6:
 		rp->checksum = 1;
 		rp->offset   = 2;
@@ -1225,7 +1225,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
 	destp = 0;
-	srcp  = inet_sk(sp)->num;
+	srcp  = inet_sk(sp)->inet_num;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index cbe55e5d9f96..c46da533888a 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -254,7 +254,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, &fl);
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 451763059142..c54ec3615ded 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -226,8 +226,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 #endif
 			goto failure;
 		} else {
-			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
-			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &np->rcv_saddr);
 		}
 
 		return err;
@@ -243,7 +244,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.oif = sk->sk_bound_dev_if;
 	fl.mark = sk->sk_mark;
 	fl.fl_ip_dport = usin->sin6_port;
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -275,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	/* set the source address */
 	ipv6_addr_copy(&np->saddr, saddr);
-	inet->rcv_saddr = LOOPBACK4_IPV6;
+	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(sk, dst, NULL, NULL);
@@ -287,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
 	err = inet6_hash_connect(&tcp_death_row, sk);
@@ -297,8 +298,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 							     np->daddr.s6_addr32,
-							     inet->sport,
-							     inet->dport);
+							     inet->inet_sport,
+							     inet->inet_dport);
 
 	err = tcp_connect(sk);
 	if (err)
@@ -310,7 +311,7 @@ late_failure:
 	tcp_set_state(sk, TCP_CLOSE);
 	__sk_dst_reset(sk);
 failure:
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	sk->sk_route_caps = 0;
 	return err;
 }
@@ -383,8 +384,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 			fl.oif = sk->sk_bound_dev_if;
 			fl.mark = sk->sk_mark;
-			fl.fl_ip_dport = inet->dport;
-			fl.fl_ip_sport = inet->sport;
+			fl.fl_ip_dport = inet->inet_dport;
+			fl.fl_ip_sport = inet->inet_sport;
 			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
@@ -1291,9 +1292,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
 
-		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
+		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
@@ -1431,7 +1432,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
 	tcp_initialize_rcv_mss(newsk);
 
-	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
@@ -1931,8 +1933,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
+	destp = ntohs(inet->inet_dport);
+	srcp  = ntohs(inet->inet_sport);
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
 		timer_active	= 1;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b86425b7ea22..829d300a6f35 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	__be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
 	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
@@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	/* if both are mapped, treat as IPv4 */
 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
 		return (!sk2_ipv6only &&
-			(!sk_rcv_saddr || !sk2_rcv_saddr ||
-			  sk_rcv_saddr == sk2_rcv_saddr));
+			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
+			  sk1_rcv_saddr == sk2_rcv_saddr));
 
 	if (addr_type2 == IPV6_ADDR_ANY &&
 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -100,8 +100,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
 		struct inet_sock *inet = inet_sk(sk);
 
 		score = 0;
-		if (inet->dport) {
-			if (inet->dport != sport)
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
 				return -1;
 			score++;
 		}
@@ -417,8 +417,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 
 		if (s->sk_hash == num && s->sk_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(s);
-			if (inet->dport) {
-				if (inet->dport != rmt_port)
+			if (inet->inet_dport) {
+				if (inet->inet_dport != rmt_port)
 					continue;
 			}
 			if (!ipv6_addr_any(&np->daddr) &&
@@ -792,7 +792,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (ipv6_addr_v4mapped(daddr)) {
 			struct sockaddr_in sin;
 			sin.sin_family = AF_INET;
-			sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
+			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
 			sin.sin_addr.s_addr = daddr->s6_addr32[3];
 			msg->msg_name = &sin;
 			msg->msg_namelen = sizeof(sin);
@@ -865,7 +865,7 @@ do_udp_sendmsg:
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_dport = inet->inet_dport;
 		daddr = &np->daddr;
 		fl.fl6_flowlabel = np->flow_label;
 		connected = 1;
@@ -911,7 +911,7 @@ do_udp_sendmsg:
 		fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_sport = inet->inet_sport;
 
 	/* merge ip6_build_xmit from ip6_output */
 	if (opt && opt->srcrt) {
@@ -1192,8 +1192,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
+	destp = ntohs(inet->inet_dport);
+	srcp  = ntohs(inet->inet_sport);
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index ebf00ad5b194..362afbd60a96 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -149,7 +149,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 
 		/* Ignore sockets listening on INADDR_ANY */
 		wildcard = (sk->sk_state != TCP_TIME_WAIT &&
-			    inet_sk(sk)->rcv_saddr == 0);
+			    inet_sk(sk)->inet_rcv_saddr == 0);
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 24b743eb0b1b..45474a436862 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -67,11 +67,11 @@ static int rds_tcp_accept_one(struct socket *sock)
 	inet = inet_sk(new_sock->sk);
 
 	rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n",
-		  NIPQUAD(inet->saddr), ntohs(inet->sport),
-		  NIPQUAD(inet->daddr), ntohs(inet->dport));
+		  NIPQUAD(inet->inet_saddr), ntohs(inet->inet_sport),
+		  NIPQUAD(inet->inet_daddr), ntohs(inet->inet_dport));
 
-	conn = rds_conn_create(inet->saddr, inet->daddr, &rds_tcp_transport,
-			       GFP_KERNEL);
+	conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr,
+			       &rds_tcp_transport, GFP_KERNEL);
 	if (IS_ERR(conn)) {
 		ret = PTR_ERR(conn);
 		goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 612dc878e05c..d9f4cc2c7869 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -296,19 +296,19 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v4.sin_family = AF_INET;
 	addr->v4.sin_port = 0;
-	addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr;
+	addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr;
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
 static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 {
-	inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr;
+	inet_sk(sk)->inet_rcv_saddr = addr->v4.sin_addr.s_addr;
 }
 
 /* Initialize sk->sk_daddr from sctp_addr. */
 static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 {
-	inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr;
+	inet_sk(sk)->inet_daddr = addr->v4.sin_addr.s_addr;
 }
 
 /* Initialize a sctp_addr from an address parameter. */
@@ -598,7 +598,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
 
 	newinet = inet_sk(newsk);
 
-	newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
+	newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
 
 	sk_refcnt_debug_inc(newsk);
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0970e92c6acd..4085db99033d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -394,7 +394,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 
 	/* Refresh ephemeral port.  */
 	if (!bp->port)
-		bp->port = inet_sk(sk)->num;
+		bp->port = inet_sk(sk)->inet_num;
 
 	/* Add the address to the bind address list.
 	 * Use GFP_ATOMIC since BHs will be disabled.
@@ -403,7 +403,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 
 	/* Copy back into socket for getsockname() use. */
 	if (!ret) {
-		inet_sk(sk)->sport = htons(inet_sk(sk)->num);
+		inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num);
 		af->to_sk_saddr(addr, sk);
 	}
 
@@ -1115,7 +1115,7 @@ static int __sctp_connect(struct sock* sk,
 	}
 
 	/* Initialize sk's dport and daddr for getpeername() */
-	inet_sk(sk)->dport = htons(asoc->peer.port);
+	inet_sk(sk)->inet_dport = htons(asoc->peer.port);
 	af = sctp_get_af_specific(sa_addr->sa.sa_family);
 	af->to_sk_daddr(sa_addr, sk);
 	sk->sk_err = 0;
@@ -5851,7 +5851,7 @@ pp_not_found:
 	 */
 success:
 	if (!sctp_sk(sk)->bind_hash) {
-		inet_sk(sk)->num = snum;
+		inet_sk(sk)->inet_num = snum;
 		sk_add_bind_node(sk, &pp->owner);
 		sctp_sk(sk)->bind_hash = pp;
 	}
@@ -5923,7 +5923,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
 		if (sctp_autobind(sk))
 			return -EAGAIN;
 	} else {
-		if (sctp_get_port(sk, inet_sk(sk)->num)) {
+		if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
 			sk->sk_state = SCTP_SS_CLOSED;
 			return -EADDRINUSE;
 		}
@@ -6094,14 +6094,14 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
 static inline void __sctp_put_port(struct sock *sk)
 {
 	struct sctp_bind_hashbucket *head =
-		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)];
+		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)];
 	struct sctp_bind_bucket *pp;
 
 	sctp_spin_lock(&head->lock);
 	pp = sctp_sk(sk)->bind_hash;
 	__sk_del_bind_node(sk);
 	sctp_sk(sk)->bind_hash = NULL;
-	inet_sk(sk)->num = 0;
+	inet_sk(sk)->inet_num = 0;
 	sctp_bucket_destroy(pp);
 	sctp_spin_unlock(&head->lock);
 }
@@ -6128,7 +6128,7 @@ static int sctp_autobind(struct sock *sk)
 	/* Initialize a local sockaddr structure to INADDR_ANY. */
 	af = sctp_sk(sk)->pf->af;
 
-	port = htons(inet_sk(sk)->num);
+	port = htons(inet_sk(sk)->inet_num);
 	af->inaddr_any(&autoaddr, port);
 
 	return sctp_do_bind(sk, &autoaddr, af->sockaddr_len);
@@ -6697,12 +6697,12 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	/* Initialize sk's sport, dport, rcv_saddr and daddr for
 	 * getsockname() and getpeername()
 	 */
-	newinet->sport = inet->sport;
-	newinet->saddr = inet->saddr;
-	newinet->rcv_saddr = inet->rcv_saddr;
-	newinet->dport = htons(asoc->peer.port);
+	newinet->inet_sport = inet->inet_sport;
+	newinet->inet_saddr = inet->inet_saddr;
+	newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
+	newinet->inet_dport = htons(asoc->peer.port);
 	newinet->pmtudisc = inet->pmtudisc;
-	newinet->id = asoc->next_tsn ^ jiffies;
+	newinet->inet_id = asoc->next_tsn ^ jiffies;
 
 	newinet->uc_ttl = inet->uc_ttl;
 	newinet->mc_loop = 1;
@@ -6741,13 +6741,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	newsp->hmac = NULL;
 
 	/* Hook this new socket in to the bind_hash list. */
-	head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)];
+	head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)];
 	sctp_local_bh_disable();
 	sctp_spin_lock(&head->lock);
 	pp = sctp_sk(oldsk)->bind_hash;
 	sk_add_bind_node(newsk, &pp->owner);
 	sctp_sk(newsk)->bind_hash = pp;
-	inet_sk(newsk)->num = inet_sk(oldsk)->num;
+	inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num;
 	sctp_spin_unlock(&head->lock);
 	sctp_local_bh_enable();
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ccc5e83cae5d..c2a17876defd 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -272,14 +272,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 	case PF_INET:
 		len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
 				proto_name,
-				&inet_sk(sk)->rcv_saddr,
-				inet_sk(sk)->num);
+				&inet_sk(sk)->inet_rcv_saddr,
+				inet_sk(sk)->inet_num);
 		break;
 	case PF_INET6:
 		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
 				proto_name,
 				&inet6_sk(sk)->rcv_saddr,
-				inet_sk(sk)->num);
+				inet_sk(sk)->inet_num);
 		break;
 	default:
 		len = snprintf(buf, remaining, "*unknown-%d*\n",
@@ -1311,7 +1311,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	/* Register socket with portmapper */
 	if (*errp >= 0 && pmap_register)
 		*errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
-				     ntohs(inet_sk(inet)->sport));
+				     ntohs(inet_sk(inet)->inet_sport));
 
 	if (*errp < 0) {
 		kfree(svsk);
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 3bb90b6f1dd3..e04566a2c4e5 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -273,11 +273,11 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 			case AF_INET: {
 				struct inet_sock *inet = inet_sk(sk);
 
-				print_ipv4_addr(ab, inet->rcv_saddr,
-						inet->sport,
+				print_ipv4_addr(ab, inet->inet_rcv_saddr,
+						inet->inet_sport,
 						"laddr", "lport");
-				print_ipv4_addr(ab, inet->daddr,
-						inet->dport,
+				print_ipv4_addr(ab, inet->inet_daddr,
+						inet->inet_dport,
 						"faddr", "fport");
 				break;
 			}
@@ -286,10 +286,10 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 				struct ipv6_pinfo *inet6 = inet6_sk(sk);
 
 				print_ipv6_addr(ab, &inet6->rcv_saddr,
-						inet->sport,
+						inet->inet_sport,
 						"laddr", "lport");
 				print_ipv6_addr(ab, &inet6->daddr,
-						inet->dport,
+						inet->inet_dport,
 						"faddr", "fport");
 				break;
 			}
-- 
cgit v1.2.3


From 8ffd1be6779c86ebc2a1013f43fdcee8bdbba2b7 Mon Sep 17 00:00:00 2001
From: Benjamin Gilbert <bgilbert@cs.cmu.edu>
Date: Mon, 19 Oct 2009 12:58:55 +0900
Subject: crypto: hash - Remove cra_u.{digest,hash}

Remove unused digest_alg and hash_alg structs from crypto_alg union and
kill their definitions.  This also ensures that old-style digest/hash
algorithms maintained out of tree will break at build time rather than
oopsing at runtime.

Signed-off-by: Benjamin Gilbert <bgilbert@cs.cmu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 27 ---------------------------
 1 file changed, 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fd929889e8dc..24d2e30f1b46 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -250,29 +250,6 @@ struct cipher_alg {
 	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
-struct digest_alg {
-	unsigned int dia_digestsize;
-	void (*dia_init)(struct crypto_tfm *tfm);
-	void (*dia_update)(struct crypto_tfm *tfm, const u8 *data,
-			   unsigned int len);
-	void (*dia_final)(struct crypto_tfm *tfm, u8 *out);
-	int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen);
-};
-
-struct hash_alg {
-	int (*init)(struct hash_desc *desc);
-	int (*update)(struct hash_desc *desc, struct scatterlist *sg,
-		      unsigned int nbytes);
-	int (*final)(struct hash_desc *desc, u8 *out);
-	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
-		      unsigned int nbytes, u8 *out);
-	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
-		      unsigned int keylen);
-
-	unsigned int digestsize;
-};
-
 struct compress_alg {
 	int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
 			    unsigned int slen, u8 *dst, unsigned int *dlen);
@@ -293,8 +270,6 @@ struct rng_alg {
 #define cra_aead	cra_u.aead
 #define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
-#define cra_digest	cra_u.digest
-#define cra_hash	cra_u.hash
 #define cra_compress	cra_u.compress
 #define cra_rng		cra_u.rng
 
@@ -320,8 +295,6 @@ struct crypto_alg {
 		struct aead_alg aead;
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
-		struct digest_alg digest;
-		struct hash_alg hash;
 		struct compress_alg compress;
 		struct rng_alg rng;
 	} cra_u;
-- 
cgit v1.2.3


From 4c2b1a11646bf74e2926ce8b13a21884adc1e05c Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Wed, 2 Sep 2009 15:36:05 -0700
Subject: wimax: allow specifying debug levels as command line option

Add "debug" module options to all the wimax modules (including
drivers) so that the debug levels can be set upon kernel boot or
module load time.

This is needed as currently there was a limitation where the debug
levels could only be set when a device was succesfully
enumerated. This made it difficult to debug issues that made a device
not probe properly.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/driver.c | 10 ++++++
 drivers/net/wimax/i2400m/sdio.c   | 10 ++++++
 drivers/net/wimax/i2400m/usb.c    |  9 +++++
 include/linux/wimax/debug.h       | 72 +++++++++++++++++++++++++++++++++++++++
 net/wimax/stack.c                 | 11 ++++++
 5 files changed, 112 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 7ba00de5dd9b..e3b2c246cad7 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -90,6 +90,14 @@ MODULE_PARM_DESC(power_save_disabled,
 		 "False by default (so the device is told to do power "
 		 "saving).");
 
+static char i2400m_debug_params[128];
+module_param_string(debug, i2400m_debug_params, sizeof(i2400m_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
 /**
  * i2400m_queue_work - schedule work on a i2400m's queue
  *
@@ -794,6 +802,8 @@ size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
 static
 int __init i2400m_driver_init(void)
 {
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400m_debug_params,
+		       "i2400m.debug");
 	return 0;
 }
 module_init(i2400m_driver_init);
diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index 7c1b843b63e9..2d2cc5ac6d86 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -71,6 +71,14 @@
 static int ioe_timeout = 2;
 module_param(ioe_timeout, int, 0);
 
+static char i2400ms_debug_params[128];
+module_param_string(debug, i2400ms_debug_params, sizeof(i2400ms_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
 /* Our firmware file name list */
 static const char *i2400ms_bus_fw_names[] = {
 #define I2400MS_FW_FILE_NAME "i2400m-fw-sdio-1.3.sbcf"
@@ -559,6 +567,8 @@ struct sdio_driver i2400m_sdio_driver = {
 static
 int __init i2400ms_driver_init(void)
 {
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400ms_debug_params,
+		       "i2400m_sdio.debug");
 	return sdio_register_driver(&i2400m_sdio_driver);
 }
 module_init(i2400ms_driver_init);
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index a5879e21bbf3..063422290a43 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -71,6 +71,13 @@
 #define D_SUBMODULE usb
 #include "usb-debug-levels.h"
 
+static char i2400mu_debug_params[128];
+module_param_string(debug, i2400mu_debug_params, sizeof(i2400mu_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
 
 /* Our firmware file name */
 static const char *i2400mu_bus_fw_names[] = {
@@ -633,6 +640,8 @@ struct usb_driver i2400mu_driver = {
 static
 int __init i2400mu_driver_init(void)
 {
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400mu_debug_params,
+		       "i2400m_usb.debug");
 	return usb_register(&i2400mu_driver);
 }
 module_init(i2400mu_driver_init);
diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
index c703e0340423..db8096e88533 100644
--- a/include/linux/wimax/debug.h
+++ b/include/linux/wimax/debug.h
@@ -450,4 +450,76 @@ do {							\
 })
 
 
+static inline
+void d_submodule_set(struct d_level *d_level, size_t d_level_size,
+		     const char *submodule, u8 level, const char *tag)
+{
+	struct d_level *itr, *top;
+	int index = -1;
+
+	for (itr = d_level, top = itr + d_level_size; itr < top; itr++) {
+		index++;
+		if (itr->name == NULL) {
+			printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n",
+			       tag, itr, index);
+			continue;
+		}
+		if (!strcmp(itr->name, submodule)) {
+			itr->level = level;
+			return;
+		}
+	}
+	printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule);
+}
+
+
+/**
+ * d_parse_params - Parse a string with debug parameters from the
+ * command line
+ *
+ * @d_level: level structure (D_LEVEL)
+ * @d_level_size: number of items in the level structure
+ *     (D_LEVEL_SIZE).
+ * @_params: string with the parameters; this is a space (not tab!)
+ *     separated list of NAME:VALUE, where value is the debug level
+ *     and NAME is the name of the submodule.
+ * @tag: string for error messages (example: MODULE.ARGNAME).
+ */
+static inline
+void d_parse_params(struct d_level *d_level, size_t d_level_size,
+		    const char *_params, const char *tag)
+{
+	char submodule[130], *params, *params_orig, *token, *colon;
+	unsigned level, tokens;
+
+	if (_params == NULL)
+		return;
+	params_orig = kstrdup(_params, GFP_KERNEL);
+	params = params_orig;
+	while (1) {
+		token = strsep(&params, " ");
+		if (token == NULL)
+			break;
+		if (*token == '\0')	/* eat joint spaces */
+			continue;
+		/* kernel's sscanf %s eats until whitespace, so we
+		 * replace : by \n so it doesn't get eaten later by
+		 * strsep */
+		colon = strchr(token, ':');
+		if (colon != NULL)
+			*colon = '\n';
+		tokens = sscanf(token, "%s\n%u", submodule, &level);
+		if (colon != NULL)
+			*colon = ':';	/* set back, for error messages */
+		if (tokens == 2)
+			d_submodule_set(d_level, d_level_size,
+					submodule, level, tag);
+		else
+			printk(KERN_ERR "%s: can't parse '%s' as a "
+			       "SUBMODULE:LEVEL (%d tokens)\n",
+			       tag, token, tokens);
+	}
+	kfree(params_orig);
+}
+
 #endif /* #ifndef __debug__h__ */
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 79fb7d7c640f..c8866412f830 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -60,6 +60,14 @@
 #define D_SUBMODULE stack
 #include "debug-levels.h"
 
+static char wimax_debug_params[128];
+module_param_string(debug, wimax_debug_params, sizeof(wimax_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
 /*
  * Authoritative source for the RE_STATE_CHANGE attribute policy
  *
@@ -562,6 +570,9 @@ int __init wimax_subsys_init(void)
 	int result, cnt;
 
 	d_fnstart(4, NULL, "()\n");
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
+		       "wimax.debug");
+
 	snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
 		 "WiMAX");
 	result = genl_register_family(&wimax_gnl_family);
-- 
cgit v1.2.3


From 32742e6158657f19ad31653705bef56d983508e7 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Thu, 3 Sep 2009 15:56:40 -0700
Subject: wimax/i2400m: decide properly if using signed vs non-signed firmware
 loading

The i2400m based devices can boot two main types of firmware images:
signed and non-signed. Signed images have signature data included that
must match that of a certificate stored in the device.

Currently the code is making the decission on what type of firmware
load (signed vs non-signed) is going to be loaded based on a hardcoded
decission in __i2400m_ack_verify(), based on the barker the device
sent upon boot.

This is not flexible enough as future hardware will emit more barkers;
thus the bit has to be set in a place where there is better knowledge
of what is going on. This will be done in follow-up commits -- however
this patch paves the way for it.

So the querying of the mode is packed into i2400m_boot_is_signed();
the main changes are just using i2400m_boot_is_signed() to determine
the method to follow and setting i2400m->sboot in
i2400m_is_boot_barker(). The modifications in i2400m_dnload_init() and
i2400m_dnload_finalize() are just reorganizing the order of the if
blocks and thus look larger than they really are.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/fw.c | 35 +++++++++++++++++++++--------------
 include/linux/wimax/i2400m.h  | 10 ----------
 2 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 92d4d605dc2d..c962a8d8df7e 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -508,6 +508,17 @@ error_send:
 }
 
 
+/*
+ * Indicate if the device emitted a reboot barker that indicates
+ * "signed boot"
+ */
+static
+unsigned i2400m_boot_is_signed(struct i2400m *i2400m)
+{
+	return likely(i2400m->sboot);
+}
+
+
 /*
  * Do the final steps of uploading firmware
  *
@@ -529,7 +540,7 @@ int i2400m_dnload_finalize(struct i2400m *i2400m,
 
 	d_fnstart(3, dev, "offset %zu\n", offset);
 	cmd = (void *) bcf + offset;
-	if (i2400m->sboot == 0) {
+	if (i2400m_boot_is_signed(i2400m) == 0) {
 		struct i2400m_bootrom_header jump_ack;
 		d_printf(1, dev, "unsecure boot, jumping to 0x%08x\n",
 			le32_to_cpu(cmd->target_addr));
@@ -846,28 +857,24 @@ int i2400m_dnload_init(struct i2400m *i2400m, const struct i2400m_bcf_hdr *bcf)
 {
 	int result;
 	struct device *dev = i2400m_dev(i2400m);
-	u32 module_id = le32_to_cpu(bcf->module_id);
 
-	if (i2400m->sboot == 0
-	    && (module_id & I2400M_BCF_MOD_ID_POKES) == 0) {
-		/* non-signed boot process without pokes */
-		result = i2400m_dnload_init_nonsigned(i2400m);
+	if (i2400m_boot_is_signed(i2400m)) {
+		d_printf(1, dev, "signed boot\n");
+		result = i2400m_dnload_init_signed(i2400m, bcf);
 		if (result == -ERESTARTSYS)
 			return result;
 		if (result < 0)
-			dev_err(dev, "fw %s: non-signed download "
+			dev_err(dev, "firmware %s: signed boot download "
 				"initialization failed: %d\n",
 				i2400m->fw_name, result);
-	} else if (i2400m->sboot == 0
-		 && (module_id & I2400M_BCF_MOD_ID_POKES)) {
-		/* non-signed boot process with pokes, nothing to do */
-		result = 0;
-	} else {		 /* signed boot process */
-		result = i2400m_dnload_init_signed(i2400m, bcf);
+	} else {
+		/* non-signed boot process without pokes */
+		d_printf(1, dev, "non-signed boot\n");
+		result = i2400m_dnload_init_nonsigned(i2400m);
 		if (result == -ERESTARTSYS)
 			return result;
 		if (result < 0)
-			dev_err(dev, "fw %s: signed boot download "
+			dev_err(dev, "firmware %s: non-signed download "
 				"initialization failed: %d\n",
 				i2400m->fw_name, result);
 	}
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index 433693ef2bb0..d6e2a3595682 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -168,16 +168,6 @@ enum i2400m_brh {
 };
 
 
-/* Constants for bcf->module_id */
-enum i2400m_bcf_mod_id {
-	/* Firmware file carries its own pokes -- pokes are a set of
-	 * magical values that have to be written in certain memory
-	 * addresses to get the device up and ready for firmware
-	 * download when it is in non-signed boot mode. */
-	I2400M_BCF_MOD_ID_POKES = 0x000000001,
-};
-
-
 /**
  * i2400m_bootrom_header - Header for a boot-mode command
  *
-- 
cgit v1.2.3


From 923d708fed9d47c7b4d67694500d766337663e29 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Fri, 4 Sep 2009 14:50:59 -0700
Subject: wimax/i2400m: fix reboot echo/ack barker deadlock

The i2400m based devices can get in a sort of a deadlock some times;
when they boot, they send a reboot "barker" (a magic number) and then
the driver has to echo that same barker to ack reception
(echo/ack). Then the device does a final ack by sending an ACK barker.

The first time this happens, we don't know ahead of time with barker
the device is going to send, as different device models and SKUs will
send different barker depending on the EEPROM programming.

If the device has sent the barker before the driver has been able to
read it, the driver looses, as it doesn't know which barker it has to
echo/ack back. With older devices, we tried a couple of combinations
and that always worked; but now, with adding support for more, in
which we have an unlimited number of new barkers, that is not an
option.

So we rework said case so that when the device gets stuck, we just
cycle through all the known types until one forces the device to send
an ack. Otherwise, the driver gives up and aborts.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/fw.c | 63 ++++++++++++++++++++++++++++++++++---------
 include/linux/wimax/i2400m.h  |  2 +-
 2 files changed, 52 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 55fe011a9633..eef236d85af3 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -812,7 +812,7 @@ int i2400m_dnload_finalize(struct i2400m *i2400m,
  *
  * @i2400m: device descriptor
  * @flags:
- *      I2400M_BRI_SOFT: a reboot notification has been seen
+ *      I2400M_BRI_SOFT: a reboot barker has been seen
  *          already, so don't wait for it.
  *
  *      I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait
@@ -829,8 +829,9 @@ int i2400m_dnload_finalize(struct i2400m *i2400m,
  * main phases to this:
  *
  * a. (1) send a reboot command and (2) get a reboot barker
- * b. (1) ack the reboot sending a reboot barker and (2) getting an
- *        ack barker in return
+ *
+ * b. (1) echo/ack the reboot sending the reboot barker back and (2)
+ *        getting an ack barker in return
  *
  * We want to skip (a) in some cases [soft]. The state machine is
  * horrible, but it is basically: on each phase, send what has to be
@@ -838,6 +839,16 @@ int i2400m_dnload_finalize(struct i2400m *i2400m,
  * have to backtrack and retry, so we keep a max tries counter for
  * that.
  *
+ * It sucks because we don't know ahead of time which is going to be
+ * the reboot barker (the device might send different ones depending
+ * on its EEPROM config) and once the device reboots and waits for the
+ * echo/ack reboot barker being sent back, it doesn't understand
+ * anything else. So we can be left at the point where we don't know
+ * what to send to it -- cold reset and bus reset seem to have little
+ * effect. So the function iterates (in this case) through all the
+ * known barkers and tries them all until an ACK is
+ * received. Otherwise, it gives up.
+ *
  * If we get a timeout after sending a warm reset, we do it again.
  */
 int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags)
@@ -848,6 +859,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags)
 	struct i2400m_bootrom_header ack;
 	int count = i2400m->bus_bm_retries;
 	int ack_timeout_cnt = 1;
+	unsigned i;
 
 	BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data));
 	BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER));
@@ -858,6 +870,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags)
 	if (flags & I2400M_BRI_SOFT)
 		goto do_reboot_ack;
 do_reboot:
+	ack_timeout_cnt = 1;
 	if (--count < 0)
 		goto error_timeout;
 	d_printf(4, dev, "device reboot: reboot command [%d # left]\n",
@@ -869,22 +882,47 @@ do_reboot:
 	flags &= ~I2400M_BRI_NO_REBOOT;
 	switch (result) {
 	case -ERESTARTSYS:
+		/*
+		 * at this point, i2400m_bm_cmd(), through
+		 * __i2400m_bm_ack_process(), has updated
+		 * i2400m->barker and we are good to go.
+		 */
 		d_printf(4, dev, "device reboot: got reboot barker\n");
 		break;
 	case -EISCONN:	/* we don't know how it got here...but we follow it */
 		d_printf(4, dev, "device reboot: got ack barker - whatever\n");
 		goto do_reboot;
-	case -ETIMEDOUT:	/* device has timed out, we might be in boot
-				 * mode already and expecting an ack, let's try
-				 * that */
-		if (i2400m->barker == NULL) {
-			dev_info(dev, "warm reset timed out, unknown barker "
-				 "type, rebooting\n");
-			goto do_reboot;
-		} else {
-			dev_info(dev, "warm reset timed out, trying an ack\n");
+	case -ETIMEDOUT:
+		/*
+		 * Device has timed out, we might be in boot mode
+		 * already and expecting an ack; if we don't know what
+		 * the barker is, we just send them all. Cold reset
+		 * and bus reset don't work. Beats me.
+		 */
+		if (i2400m->barker != NULL) {
+			dev_err(dev, "device boot: reboot barker timed out, "
+				"trying (set) %08x echo/ack\n",
+				le32_to_cpu(i2400m->barker->data[0]));
 			goto do_reboot_ack;
 		}
+		for (i = 0; i < i2400m_barker_db_used; i++) {
+			struct i2400m_barker_db *barker = &i2400m_barker_db[i];
+			memcpy(cmd, barker->data, sizeof(barker->data));
+			result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
+					       &ack, sizeof(ack),
+					       I2400M_BM_CMD_RAW);
+			if (result == -EISCONN) {
+				dev_warn(dev, "device boot: got ack barker "
+					 "after sending echo/ack barker "
+					 "#%d/%08x; rebooting j.i.c.\n",
+					 i, le32_to_cpu(barker->data[0]));
+				flags &= ~I2400M_BRI_NO_REBOOT;
+				goto do_reboot;
+			}
+		}
+		dev_err(dev, "device boot: tried all the echo/acks, could "
+			"not get device to respond; giving up");
+		result = -ESHUTDOWN;
 	case -EPROTO:
 	case -ESHUTDOWN:	/* dev is gone */
 	case -EINTR:		/* user cancelled */
@@ -892,6 +930,7 @@ do_reboot:
 	default:
 		dev_err(dev, "device reboot: error %d while waiting "
 			"for reboot barker - rebooting\n", result);
+		d_dump(1, dev, &ack, result);
 		goto do_reboot;
 	}
 	/* At this point we ack back with 4 REBOOT barkers and expect
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index d6e2a3595682..fd5af05083cb 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -138,7 +138,7 @@ struct i2400m_bcf_hdr {
 	__le32 module_id;
 	__le32 module_vendor;
 	__le32 date;		/* BCD YYYMMDD */
-	__le32 size;
+	__le32 size;            /* in dwords */
 	__le32 key_size;	/* in dwords */
 	__le32 modulus_size;	/* in dwords */
 	__le32 exponent_size;	/* in dwords */
-- 
cgit v1.2.3


From f8fc3295570115267ce1ce901f362d13d194aefc Mon Sep 17 00:00:00 2001
From: Cindy H Kao <cindy.h.kao@intel.com>
Date: Fri, 4 Sep 2009 17:38:46 -0700
Subject: wimax/iwmc3200: add new sdio device ID to support iwmc3200 2.5GHz sku

Different sdio device IDs are designated to support different intel
wimax silicon sku. The new macro SDIO_DEVICE_ID_IWMC3200_WIMAX_2G5(0x1407)
is added to support iwmc3200 2.5GHz sku.  The existing
SDIO_DEVICE_ID_IWMC3200_WIMAX(0x1402) is for iwmc3200 general sku.

Signed-off-by: Cindy H Kao <cindy.h.kao@intel.com>
Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/sdio.c | 2 ++
 include/linux/mmc/sdio_ids.h    | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index 2d2cc5ac6d86..de158ee0c32c 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -550,6 +550,8 @@ const struct sdio_device_id i2400ms_sdio_ids[] = {
 	/* Intel: i2400m WiMAX (iwmc3200) over SDIO */
 	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL,
 		      SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX) },
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL,
+		      SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5) },
 	{ /* end: all zeroes */ },
 };
 MODULE_DEVICE_TABLE(sdio, i2400ms_sdio_ids);
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 2dbfb5a05994..33b2ea09a4ad 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -28,6 +28,7 @@
 #define SDIO_DEVICE_ID_INTEL_IWMC3200TOP	0x1404
 #define SDIO_DEVICE_ID_INTEL_IWMC3200GPS	0x1405
 #define SDIO_DEVICE_ID_INTEL_IWMC3200BT		0x1406
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5	0x1407
 
 #define SDIO_VENDOR_ID_MARVELL			0x02df
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS		0x9103
-- 
cgit v1.2.3


From 7329012e673231dee9a21567cfb9881f5ea462ba Mon Sep 17 00:00:00 2001
From: Dirk Brandewie <dirk.j.brandewie@intel.com>
Date: Wed, 12 Aug 2009 11:29:46 -0700
Subject: wimax/i6x50: add Intel WiFi/WiMAX Link 6050 Series support

Add support for the WiMAX device in the Intel WiFi/WiMAX Link 6050
Series; this involves:

 - adding the device ID to bind to and an endpoint mapping for the
   driver to use.

 - at probe() time, some things are set depending on the device id:

   + the list of firmware names to try

   + mapping of endpoints

Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/fw.c         |  3 +++
 drivers/net/wimax/i2400m/i2400m-usb.h |  3 +++
 drivers/net/wimax/i2400m/usb.c        | 26 +++++++++++++++++++-------
 include/linux/wimax/i2400m.h          |  1 +
 4 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 84a39c30c3d3..5719f4a4080f 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -277,6 +277,9 @@ int i2400m_barker_db_known_barkers(void)
 	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER);
 	if (result < 0)
 		goto error_add;
+	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER_6050);
+	if (result < 0)
+		goto error_add;
 error_add:
        return result;
 }
diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h
index f73a067e0668..5cc0f279417e 100644
--- a/drivers/net/wimax/i2400m/i2400m-usb.h
+++ b/drivers/net/wimax/i2400m/i2400m-usb.h
@@ -148,6 +148,9 @@ enum {
 	I2400MU_MAX_NOTIFICATION_LEN = 256,
 	I2400MU_BLK_SIZE = 16,
 	I2400MU_PL_SIZE_MAX = 0x3EFF,
+
+	/* Device IDs */
+	USB_DEVICE_ID_I6050 = 0x0186,
 };
 
 
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 063422290a43..77d08d928274 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -80,11 +80,16 @@ MODULE_PARM_DESC(debug,
 		 "initial debug value to set.");
 
 /* Our firmware file name */
-static const char *i2400mu_bus_fw_names[] = {
+static const char *i2400mu_bus_fw_names_5x50[] = {
 #define I2400MU_FW_FILE_NAME_v1_4 "i2400m-fw-usb-1.4.sbcf"
 	I2400MU_FW_FILE_NAME_v1_4,
-#define I2400MU_FW_FILE_NAME_v1_3 "i2400m-fw-usb-1.3.sbcf"
-	I2400MU_FW_FILE_NAME_v1_3,
+	NULL,
+};
+
+
+static const char *i2400mu_bus_fw_names_6050[] = {
+#define I6050U_FW_FILE_NAME_v1_5 "i6050-fw-usb-1.5.sbcf"
+	I6050U_FW_FILE_NAME_v1_5,
 	NULL,
 };
 
@@ -418,10 +423,16 @@ int i2400mu_probe(struct usb_interface *iface,
 	i2400m->bus_bm_retries = I2400M_USB_BOOT_RETRIES;
 	i2400m->bus_bm_cmd_send = i2400mu_bus_bm_cmd_send;
 	i2400m->bus_bm_wait_for_ack = i2400mu_bus_bm_wait_for_ack;
-	i2400m->bus_fw_names = i2400mu_bus_fw_names;
 	i2400m->bus_bm_mac_addr_impaired = 0;
 
-	{
+	if (id->idProduct == USB_DEVICE_ID_I6050) {
+		i2400m->bus_fw_names = i2400mu_bus_fw_names_6050;
+		i2400mu->endpoint_cfg.bulk_out = 0;
+		i2400mu->endpoint_cfg.notification = 3;
+		i2400mu->endpoint_cfg.reset_cold = 2;
+		i2400mu->endpoint_cfg.bulk_in = 1;
+	} else {
+		i2400m->bus_fw_names = i2400mu_bus_fw_names_5x50;
 		i2400mu->endpoint_cfg.bulk_out = 0;
 		i2400mu->endpoint_cfg.notification = 1;
 		i2400mu->endpoint_cfg.reset_cold = 2;
@@ -614,6 +625,7 @@ out:
 
 static
 struct usb_device_id i2400mu_id_table[] = {
+	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) },
 	{ USB_DEVICE(0x8086, 0x0181) },
 	{ USB_DEVICE(0x8086, 0x1403) },
 	{ USB_DEVICE(0x8086, 0x1405) },
@@ -656,7 +668,7 @@ void __exit i2400mu_driver_exit(void)
 module_exit(i2400mu_driver_exit);
 
 MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
-MODULE_DESCRIPTION("Intel 2400M WiMAX networking for USB");
+MODULE_DESCRIPTION("Driver for USB based Intel Wireless WiMAX Connection 2400M "
+		   "(5x50 & 6050)");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_4);
-MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_3);
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index fd5af05083cb..62d356153565 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -266,6 +266,7 @@ enum {
 	I2400M_WARM_RESET_BARKER = 0x50f750f7,
 	I2400M_NBOOT_BARKER = 0xdeadbeef,
 	I2400M_SBOOT_BARKER = 0x0ff1c1a1,
+	I2400M_SBOOT_BARKER_6050 = 0x80000001,
 	I2400M_ACK_BARKER = 0xfeedbabe,
 	I2400M_D2H_MSG_BARKER = 0xbeefbabe,
 };
-- 
cgit v1.2.3


From 7e75f93eda027d9f9e0203ee6ffd210ea92e98f3 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Mon, 19 Oct 2009 02:17:56 +0000
Subject: pkt_sched: ingress socket filter by mark

Allow bpf to set a filter to drop packets that dont
match a specific mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 3 ++-
 net/core/filter.c      | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1354aaf6abbe..909193e25395 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -123,7 +123,8 @@ struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_IFINDEX 	8
 #define SKF_AD_NLATTR	12
 #define SKF_AD_NLATTR_NEST	16
-#define SKF_AD_MAX	20
+#define SKF_AD_MARK 	20
+#define SKF_AD_MAX	24
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index d1d779ca096d..e3987e1d4839 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -303,6 +303,9 @@ load_b:
 		case SKF_AD_IFINDEX:
 			A = skb->dev->ifindex;
 			continue;
+		case SKF_AD_MARK:
+			A = skb->mark;
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3


From 7b6856a0296a8f187bb88ba31fa83a08abba7966 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Tue, 20 Oct 2009 00:08:01 -0700
Subject: can: provide library functions for skb allocation

This patch makes the private functions alloc_can_skb() and
alloc_can_err_skb() of the at91_can driver public and adapts all
drivers to use these. While making the patch I realized, that
the skb's are *not* setup consistently. It's now done as shown
below:

  skb->protocol = htons(ETH_P_CAN);
  skb->pkt_type = PACKET_BROADCAST;
  skb->ip_summed = CHECKSUM_UNNECESSARY;
  *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
  memset(*cf, 0, sizeof(struct can_frame));

The frame is zeroed out to avoid uninitialized data to be passed to
user space. Some drivers or library code did not set "pkt_type" or
"ip_summed". Also,  "__constant_htons()" should not be used for
runtime invocations, as pointed out by David Miller.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        | 32 -----------------------------
 drivers/net/can/dev.c             | 42 ++++++++++++++++++++++++++++++++-------
 drivers/net/can/sja1000/sja1000.c | 12 ++---------
 drivers/net/can/ti_hecc.c         | 17 ++++------------
 drivers/net/can/usb/ems_usb.c     | 16 ++-------------
 include/linux/can/dev.h           |  4 ++++
 6 files changed, 47 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index b13fd9114130..cbe3fce53e3b 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -221,38 +221,6 @@ static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb,
 	set_mb_mode_prio(priv, mb, mode, 0);
 }
 
-static struct sk_buff *alloc_can_skb(struct net_device *dev,
-		struct can_frame **cf)
-{
-	struct sk_buff *skb;
-
-	skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
-	if (unlikely(!skb))
-		return NULL;
-
-	skb->protocol = htons(ETH_P_CAN);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-
-	return skb;
-}
-
-static struct sk_buff *alloc_can_err_skb(struct net_device *dev,
-		struct can_frame **cf)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_can_skb(dev, cf);
-	if (unlikely(!skb))
-		return NULL;
-
-	memset(*cf, 0, sizeof(struct can_frame));
-	(*cf)->can_id = CAN_ERR_FLAG;
-	(*cf)->can_dlc = CAN_ERR_DLC;
-
-	return skb;
-}
-
 /*
  * Swtich transceiver on or off
  */
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 39b99f57c265..c3db111d2ff5 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -366,17 +366,12 @@ void can_restart(unsigned long data)
 	can_flush_echo_skb(dev);
 
 	/* send restart message upstream */
-	skb = dev_alloc_skb(sizeof(struct can_frame));
+	skb = alloc_can_err_skb(dev, &cf);
 	if (skb == NULL) {
 		err = -ENOMEM;
 		goto restart;
 	}
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_CAN);
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-	memset(cf, 0, sizeof(struct can_frame));
-	cf->can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED;
-	cf->can_dlc = CAN_ERR_DLC;
+	cf->can_id |= CAN_ERR_RESTARTED;
 
 	netif_rx(skb);
 
@@ -449,6 +444,39 @@ static void can_setup(struct net_device *dev)
 	dev->features = NETIF_F_NO_CSUM;
 }
 
+struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
+{
+	struct sk_buff *skb;
+
+	skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
+	if (unlikely(!skb))
+		return NULL;
+
+	skb->protocol = htons(ETH_P_CAN);
+	skb->pkt_type = PACKET_BROADCAST;
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	memset(*cf, 0, sizeof(struct can_frame));
+
+	return skb;
+}
+EXPORT_SYMBOL_GPL(alloc_can_skb);
+
+struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_can_skb(dev, cf);
+	if (unlikely(!skb))
+		return NULL;
+
+	(*cf)->can_id = CAN_ERR_FLAG;
+	(*cf)->can_dlc = CAN_ERR_DLC;
+
+	return skb;
+}
+EXPORT_SYMBOL_GPL(alloc_can_err_skb);
+
 /*
  * Allocate and setup space for the CAN network device
  */
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 96d8be4253f8..1a9c5958bbd7 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -296,11 +296,9 @@ static void sja1000_rx(struct net_device *dev)
 	uint8_t dlc;
 	int i;
 
-	skb = dev_alloc_skb(sizeof(struct can_frame));
+	skb = alloc_can_skb(dev, &cf);
 	if (skb == NULL)
 		return;
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_CAN);
 
 	fi = priv->read_reg(priv, REG_FI);
 	dlc = fi & 0x0F;
@@ -351,15 +349,9 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 	enum can_state state = priv->can.state;
 	uint8_t ecc, alc;
 
-	skb = dev_alloc_skb(sizeof(struct can_frame));
+	skb = alloc_can_err_skb(dev, &cf);
 	if (skb == NULL)
 		return -ENOMEM;
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_CAN);
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-	memset(cf, 0, sizeof(struct can_frame));
-	cf->can_id = CAN_ERR_FLAG;
-	cf->can_dlc = CAN_ERR_DLC;
 
 	if (isrc & IRQ_DOI) {
 		/* data overrun interrupt */
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 23a7128e4eb7..07e8016b17ec 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -535,18 +535,15 @@ static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
 	u32 data, mbx_mask;
 	unsigned long flags;
 
-	skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame));
+	skb = alloc_can_skb(priv->ndev, &cf);
 	if (!skb) {
 		if (printk_ratelimit())
 			dev_err(priv->ndev->dev.parent,
-				"ti_hecc_rx_pkt: netdev_alloc_skb() failed\n");
+				"ti_hecc_rx_pkt: alloc_can_skb() failed\n");
 		return -ENOMEM;
 	}
-	skb->protocol = __constant_htons(ETH_P_CAN);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	mbx_mask = BIT(mbxno);
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
 	if (data & HECC_CANMID_IDE)
 		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
@@ -656,19 +653,13 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
 	struct sk_buff *skb;
 
 	/* propogate the error condition to the can stack */
-	skb = netdev_alloc_skb(ndev, sizeof(struct can_frame));
+	skb = alloc_can_err_skb(ndev, &cf);
 	if (!skb) {
 		if (printk_ratelimit())
 			dev_err(priv->ndev->dev.parent,
-				"ti_hecc_error: netdev_alloc_skb() failed\n");
+				"ti_hecc_error: alloc_can_err_skb() failed\n");
 		return -ENOMEM;
 	}
-	skb->protocol = __constant_htons(ETH_P_CAN);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-	memset(cf, 0, sizeof(struct can_frame));
-	cf->can_id = CAN_ERR_FLAG;
-	cf->can_dlc = CAN_ERR_DLC;
 
 	if (int_status & HECC_CANGIF_WLIF) { /* warning level int */
 		if ((int_status & HECC_CANGIF_BOIF) == 0) {
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index a65f56a9cd3d..3685f3e42d12 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -311,14 +311,10 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
 	int i;
 	struct net_device_stats *stats = &dev->netdev->stats;
 
-	skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+	skb = alloc_can_skb(dev->netdev, &cf);
 	if (skb == NULL)
 		return;
 
-	skb->protocol = htons(ETH_P_CAN);
-
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-
 	cf->can_id = msg->msg.can_msg.id;
 	cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
 
@@ -346,18 +342,10 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
 	struct sk_buff *skb;
 	struct net_device_stats *stats = &dev->netdev->stats;
 
-	skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+	skb = alloc_can_err_skb(dev->netdev, &cf);
 	if (skb == NULL)
 		return;
 
-	skb->protocol = htons(ETH_P_CAN);
-
-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-	memset(cf, 0, sizeof(struct can_frame));
-
-	cf->can_id = CAN_ERR_FLAG;
-	cf->can_dlc = CAN_ERR_DLC;
-
 	if (msg->type == CPC_MSG_TYPE_CAN_STATE) {
 		u8 state = msg->msg.can_state;
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 1d3f7f00e3af..1ed2a5cc03f5 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -68,4 +68,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 void can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
+struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
+struct sk_buff *alloc_can_err_skb(struct net_device *dev,
+				  struct can_frame **cf);
+
 #endif /* CAN_DEV_H */
-- 
cgit v1.2.3


From d19742fb1c68e6db83b76e06dea5a374c99e104f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Oct 2009 01:06:22 -0700
Subject: filter: Add SKF_AD_QUEUE instruction

It can help being able to filter packets on their queue_mapping.

If filter performance is not good, we could add a "numqueue" field
in struct packet_type, so that netif_nit_deliver() and other functions
can directly ignore packets with not expected queue number.

Lets experiment this simple filter extension first.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 3 ++-
 net/core/filter.c      | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 909193e25395..bb3b4352978a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -124,7 +124,8 @@ struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_NLATTR	12
 #define SKF_AD_NLATTR_NEST	16
 #define SKF_AD_MARK 	20
-#define SKF_AD_MAX	24
+#define SKF_AD_QUEUE	24
+#define SKF_AD_MAX	28
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index e3987e1d4839..08db7b9143a3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -306,6 +306,9 @@ load_b:
 		case SKF_AD_MARK:
 			A = skb->mark;
 			continue;
+		case SKF_AD_QUEUE:
+			A = skb->queue_mapping;
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3


From e95646c3ec33c8ec0693992da4332a6b32eb7e31 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 30 Sep 2009 11:17:21 +0200
Subject: virtio: let header files include virtio_ids.h

Rusty,

commit 3ca4f5ca73057a617f9444a91022d7127041970a
    virtio: add virtio IDs file
moved all device IDs into a single file. While the change itself is
a very good one, it can break userspace applications. For example
if a userspace tool wanted to get the ID of virtio_net it used to
include virtio_net.h. This does no longer work, since virtio_net.h
does not include virtio_ids.h.
This patch moves all "#include <linux/virtio_ids.h>" from the C
files into the header files, making the header files compatible with
the old ones.

In addition, this patch exports virtio_ids.h to userspace.

CC: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c       | 1 -
 drivers/block/virtio_blk.c          | 1 -
 drivers/char/hw_random/virtio-rng.c | 1 -
 drivers/char/virtio_console.c       | 1 -
 drivers/net/virtio_net.c            | 1 -
 drivers/virtio/virtio_balloon.c     | 1 -
 include/linux/Kbuild                | 1 +
 include/linux/virtio_9p.h           | 1 +
 include/linux/virtio_balloon.h      | 1 +
 include/linux/virtio_blk.h          | 1 +
 include/linux/virtio_console.h      | 1 +
 include/linux/virtio_net.h          | 1 +
 include/linux/virtio_rng.h          | 1 +
 net/9p/trans_virtio.c               | 1 -
 14 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index ba9373f82ab5..098de5bce00a 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -42,7 +42,6 @@
 #include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
-#include <linux/virtio_ids.h>
 #include "linux/virtio_net.h"
 #include "linux/virtio_blk.h"
 #include "linux/virtio_console.h"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 348befaaec73..55635d1f697d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,7 +3,6 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 962968f05b94..b6c24dcc987d 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,7 +21,6 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 0d328b59568d..a035ae39a359 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,7 +31,6 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8d009760277c..50ac94ce9c16 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,7 +22,6 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f55130..39789232646d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,7 +19,6 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f384d4b163a..1feed71551c9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -364,6 +364,7 @@ unifdef-y += utsname.h
 unifdef-y += videodev2.h
 unifdef-y += videodev.h
 unifdef-y += virtio_config.h
+unifdef-y += virtio_ids.h
 unifdef-y += virtio_blk.h
 unifdef-y += virtio_net.h
 unifdef-y += virtio_9p.h
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index ea7226a45acb..095e10d148b4 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_9P_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* Maximum number of virtio channels per partition (1 for now) */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d730085060..1418f048cb34 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_BALLOON_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* The feature bitmap for virtio balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 15cb666581d7..1e19470d2da2 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -3,6 +3,7 @@
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 /* Feature bits */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index b5f519806014..fe885174cc1f 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_VIRTIO_CONSOLE_H
 #define _LINUX_VIRTIO_CONSOLE_H
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers. */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 1f41734bbb77..085e42298ce5 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -3,6 +3,7 @@
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
 #include <linux/types.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 #include <linux/if_ether.h>
 
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 48121c3c434b..c4d5de896f0c 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -2,6 +2,7 @@
 #define _LINUX_VIRTIO_RNG_H
 /* This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers. */
+#include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
 #endif /* _LINUX_VIRTIO_RNG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b2e07f0dd298..ea1e3daabefe 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,7 +43,6 @@
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
-#include <linux/virtio_ids.h>
 #include <linux/virtio_9p.h>
 
 #define VIRTQUEUE_NUM	128
-- 
cgit v1.2.3


From 3225beaba05d4f06087593f5e903ce867b6e118a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Oct 2009 16:39:28 -0600
Subject: virtio_blk: Revert serial number support

This reverts "Add serial number support for virtio_blk, V4a".

Turns out that virtio_pci, lguest and s/390 all have an 8 bit limit
on virtio config space, so noone could ever use this.

This is coming back later in a cleaner form.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: john cooper <john.cooper@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/virtio_blk.c | 37 +++----------------------------------
 include/linux/virtio_blk.h |  4 ----
 2 files changed, 3 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 55635d1f697d..51042f0ba7e1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -182,34 +182,6 @@ static void do_virtblk_request(struct request_queue *q)
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
-/* return ATA identify data
- */
-static int virtblk_identify(struct gendisk *disk, void *argp)
-{
-	struct virtio_blk *vblk = disk->private_data;
-	void *opaque;
-	int err = -ENOMEM;
-
-	opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
-	if (!opaque)
-		goto out;
-
-	err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY,
-		offsetof(struct virtio_blk_config, identify), opaque,
-		VIRTIO_BLK_ID_BYTES);
-
-	if (err)
-		goto out_kfree;
-
-	if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES))
-		err = -EFAULT;
-
-out_kfree:
-	kfree(opaque);
-out:
-	return err;
-}
-
 static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
 {
 	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
@@ -221,10 +193,6 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct virtio_blk *vblk = disk->private_data;
-	void __user *argp = (void __user *)data;
-
-	if (cmd == HDIO_GET_IDENTITY)
-		return virtblk_identify(disk, argp);
 
 	/*
 	 * Only allow the generic SCSI ioctls if the host can support it.
@@ -232,7 +200,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOTTY;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
+			      (void __user *)data);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -443,7 +412,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 1e19470d2da2..fd294c56d571 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -14,11 +14,8 @@
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
-#define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
 #define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
 
-#define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
-
 struct virtio_blk_config {
 	/* The capacity (in 512-byte sectors). */
 	__u64 capacity;
@@ -34,7 +31,6 @@ struct virtio_blk_config {
 	} geometry;
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
-	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
 /*
-- 
cgit v1.2.3


From 1c55d62e77fa16cdace417834fc7b8a421a1877f Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Thu, 15 Oct 2009 03:09:18 +0000
Subject: pkt_sched: skbedit add support for setting mark

This adds support for setting the skb mark.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tc_act/tc_skbedit.h |  2 ++
 include/net/tc_act/tc_skbedit.h   |  2 ++
 net/sched/act_skbedit.c           | 17 ++++++++++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h
index a14e461a7af7..7a2e910a5f08 100644
--- a/include/linux/tc_act/tc_skbedit.h
+++ b/include/linux/tc_act/tc_skbedit.h
@@ -26,6 +26,7 @@
 
 #define SKBEDIT_F_PRIORITY		0x1
 #define SKBEDIT_F_QUEUE_MAPPING		0x2
+#define SKBEDIT_F_MARK			0x4
 
 struct tc_skbedit {
 	tc_gen;
@@ -37,6 +38,7 @@ enum {
 	TCA_SKBEDIT_PARMS,
 	TCA_SKBEDIT_PRIORITY,
 	TCA_SKBEDIT_QUEUE_MAPPING,
+	TCA_SKBEDIT_MARK,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 6abb3ed3ebf7..e103fe02f375 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -26,7 +26,9 @@ struct tcf_skbedit {
 	struct tcf_common	common;
 	u32			flags;
 	u32     		priority;
+	u32     		mark;
 	u16			queue_mapping;
+	/* XXX: 16-bit pad here? */
 };
 #define to_skbedit(pc) \
 	container_of(pc, struct tcf_skbedit, common)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 4ab916b8074b..e9607fe55b58 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -54,6 +54,8 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
 	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
 	    skb->dev->real_num_tx_queues > d->queue_mapping)
 		skb_set_queue_mapping(skb, d->queue_mapping);
+	if (d->flags & SKBEDIT_F_MARK)
+		skb->mark = d->mark;
 
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
@@ -63,6 +65,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
 	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
+	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
 };
 
 static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
@@ -72,7 +75,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
 	struct tcf_common *pc;
-	u32 flags = 0, *priority = NULL;
+	u32 flags = 0, *priority = NULL, *mark = NULL;
 	u16 *queue_mapping = NULL;
 	int ret = 0, err;
 
@@ -95,6 +98,12 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 		flags |= SKBEDIT_F_QUEUE_MAPPING;
 		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
 	}
+
+	if (tb[TCA_SKBEDIT_MARK] != NULL) {
+		flags |= SKBEDIT_F_MARK;
+		mark = nla_data(tb[TCA_SKBEDIT_MARK]);
+	}
+
 	if (!flags)
 		return -EINVAL;
 
@@ -124,6 +133,9 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 		d->priority = *priority;
 	if (flags & SKBEDIT_F_QUEUE_MAPPING)
 		d->queue_mapping = *queue_mapping;
+	if (flags & SKBEDIT_F_MARK)
+		d->mark = *mark;
+
 	d->tcf_action = parm->action;
 
 	spin_unlock_bh(&d->tcf_lock);
@@ -161,6 +173,9 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
 		NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
 			sizeof(d->queue_mapping), &d->queue_mapping);
+	if (d->flags & SKBEDIT_F_MARK)
+		NLA_PUT(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
+			&d->mark);
 	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
-- 
cgit v1.2.3


From 40b1f4e5113eafc5e84f2ba86822df66087fcb25 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 22 Oct 2009 14:39:28 +1100
Subject: irq: trivial: Fix typo in comment for #endif

The comment suggests this #endif is CONFIG_X86 but it's really
CONFIG_TRACE_IRQFLAGS_SUPPORT

Signed-off-by: Michael Neuling <mikey@neuling.org>
Cc: michael@ellerman.id.au
LKML-Reference: <18191.1256182768@neuling.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index b02a3f1d46a0..006bf45eae30 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -124,6 +124,6 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
-#endif		/* CONFIG_X86 */
+#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #endif
-- 
cgit v1.2.3


From 721a669b7225edeeb0ca8e2bf71b83882326a71b Mon Sep 17 00:00:00 2001
From: Soeren Sandmann <sandmann@daimi.au.dk>
Date: Tue, 15 Sep 2009 14:33:08 +0200
Subject: perf events: Fix swevent hrtimer sampling by keeping track of
 remaining time when enabling/disabling swevent hrtimers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the hrtimer based events work for sysprof.

Whenever a swevent is scheduled out, the hrtimer is canceled.
When it is scheduled back in, the timer is restarted. This
happens every scheduler tick, which means the timer never
expired because it was getting repeatedly restarted over and
over with the same period.

To fix that, save the remaining time when disabling; when
reenabling, use that saved time as the period instead of the
user-specified sampling period.

Also, move the starting and stopping of the hrtimers to helper
functions instead of duplicating the code.

Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
LKML-Reference: <ye8vdi7mluz.fsf@camel16.daimi.au.dk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  4 +--
 kernel/perf_event.c        | 61 +++++++++++++++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..9e7012689a84 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -471,8 +471,8 @@ struct hw_perf_event {
 			unsigned long	event_base;
 			int		idx;
 		};
-		union { /* software */
-			atomic64_t	count;
+		struct { /* software */
+			s64		remaining;
 			struct hrtimer	hrtimer;
 		};
 	};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index afb7ef3dbc44..33ff019f9aa6 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3969,6 +3969,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 	return ret;
 }
 
+static void perf_swevent_start_hrtimer(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period;
+
+		if (hwc->remaining) {
+			if (hwc->remaining < 0)
+				period = 10000;
+			else
+				period = hwc->remaining;
+			hwc->remaining = 0;
+		} else {
+			period = max_t(u64, 10000, hwc->sample_period);
+		}
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+}
+
+static void perf_swevent_cancel_hrtimer(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->sample_period) {
+		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
+		hwc->remaining = ktime_to_ns(remaining);
+
+		hrtimer_cancel(&hwc->hrtimer);
+	}
+}
+
 /*
  * Software event: cpu wall time clock
  */
@@ -3991,22 +4027,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
 	int cpu = raw_smp_processor_id();
 
 	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swevent_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
+	perf_swevent_start_hrtimer(event);
 
 	return 0;
 }
 
 static void cpu_clock_perf_event_disable(struct perf_event *event)
 {
-	if (event->hw.sample_period)
-		hrtimer_cancel(&event->hw.hrtimer);
+	perf_swevent_cancel_hrtimer(event);
 	cpu_clock_perf_event_update(event);
 }
 
@@ -4043,22 +4071,15 @@ static int task_clock_perf_event_enable(struct perf_event *event)
 	now = event->ctx->time;
 
 	atomic64_set(&hwc->prev_count, now);
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swevent_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
+
+	perf_swevent_start_hrtimer(event);
 
 	return 0;
 }
 
 static void task_clock_perf_event_disable(struct perf_event *event)
 {
-	if (event->hw.sample_period)
-		hrtimer_cancel(&event->hw.hrtimer);
+	perf_swevent_cancel_hrtimer(event);
 	task_clock_perf_event_update(event, event->ctx->time);
 
 }
-- 
cgit v1.2.3


From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 23 Oct 2009 14:58:11 +0200
Subject: ratelimit: Make suppressed output messages more useful

Today I got:

  [39648.224782] Registered led device: iwl-phy0::TX
  [40676.545099] __ratelimit: 246 callbacks suppressed
  [40676.545103] abcdef[23675]: segfault at 0 ...

as you can see the ratelimit message contains a function prefix.
Since this is always __ratelimit, this wont help much.

This patch changes __ratelimit and printk_ratelimit to print the
function name that calls ratelimit.

This will pinpoint the responsible function, as long as not several
different places call ratelimit with the same ratelimit state at
the same time. In that case we catch only one random function that
calls ratelimit after the wait period.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
CC: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h    | 3 ++-
 include/linux/ratelimit.h | 3 ++-
 kernel/printk.c           | 6 +++---
 lib/ratelimit.c           | 6 +++---
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3305f33201be..21d0d822c716 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -240,7 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit(void);
+extern int __printk_ratelimit(const char *func);
+#define printk_ratelimit() __printk_ratelimit(__func__)
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 187bc16c1f15..668cf1bef030 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -25,6 +25,7 @@ struct ratelimit_state {
 		.burst		= burst_init,				\
 	}
 
-extern int __ratelimit(struct ratelimit_state *rs);
+extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
+#define __ratelimit(state) ___ratelimit(state, __func__)
 
 #endif /* _LINUX_RATELIMIT_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index b997c893cdcf..8283dbe15af4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1364,11 +1364,11 @@ late_initcall(disable_boot_consoles);
  */
 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 
-int printk_ratelimit(void)
+int __printk_ratelimit(const char *func)
 {
-	return __ratelimit(&printk_ratelimit_state);
+	return ___ratelimit(&printk_ratelimit_state, func);
 }
-EXPORT_SYMBOL(printk_ratelimit);
+EXPORT_SYMBOL(__printk_ratelimit);
 
 /**
  * printk_timed_ratelimit - caller-controlled printk ratelimiting
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 5551731ae1d4..09f5ce1810dc 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -20,7 +20,7 @@
  * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
  * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(struct ratelimit_state *rs)
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
 	unsigned long flags;
 	int ret;
@@ -43,7 +43,7 @@ int __ratelimit(struct ratelimit_state *rs)
 	if (time_is_before_jiffies(rs->begin + rs->interval)) {
 		if (rs->missed)
 			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
-				__func__, rs->missed);
+				func, rs->missed);
 		rs->begin   = 0;
 		rs->printed = 0;
 		rs->missed  = 0;
@@ -59,4 +59,4 @@ int __ratelimit(struct ratelimit_state *rs)
 
 	return ret;
 }
-EXPORT_SYMBOL(__ratelimit);
+EXPORT_SYMBOL(___ratelimit);
-- 
cgit v1.2.3


From 6d3f1e12f46a2f9a1bb7e7aa433df8dd31ce5647 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 23 Oct 2009 19:36:19 -0400
Subject: tracing: Remove cpu arg from the rb_time_stamp() function

The cpu argument is not used inside the rb_time_stamp() function.
Plus fix a typo.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20091023233647.118547500@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/trace_seq.h  |  2 +-
 kernel/trace/ring_buffer.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index c134dd1fe6b6..09077f6ed128 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -7,7 +7,7 @@
 
 /*
  * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE.
+ * to create a string of data to use (up to a max of PAGE_SIZE).
  */
 
 struct trace_seq {
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 217f6991184f..3ffa502fb243 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -483,7 +483,7 @@ struct ring_buffer_iter {
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
-static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+static inline u64 rb_time_stamp(struct ring_buffer *buffer)
 {
 	/* shift to debug/test normalization and TIME_EXTENTS */
 	return buffer->clock() << DEBUG_SHIFT;
@@ -494,7 +494,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 	u64 time;
 
 	preempt_disable_notrace();
-	time = rb_time_stamp(buffer, cpu);
+	time = rb_time_stamp(buffer);
 	preempt_enable_no_resched_notrace();
 
 	return time;
@@ -599,7 +599,7 @@ static struct list_head *rb_list_head(struct list_head *list)
 }
 
 /*
- * rb_is_head_page - test if the give page is the head page
+ * rb_is_head_page - test if the given page is the head page
  *
  * Because the reader may move the head_page pointer, we can
  * not trust what the head page is (it may be pointing to
@@ -1868,7 +1868,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 		 * Nested commits always have zero deltas, so
 		 * just reread the time stamp
 		 */
-		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+		*ts = rb_time_stamp(buffer);
 		next_page->page->time_stamp = *ts;
 	}
 
@@ -2111,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		goto out_fail;
 
-	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+	ts = rb_time_stamp(cpu_buffer->buffer);
 
 	/*
 	 * Only the first commit can update the timestamp.
-- 
cgit v1.2.3


From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 19 Oct 2009 11:43:32 +0200
Subject: pcmcia: drop already defined PCI_IDs

Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in
pci_ids.h. Move them and drop the duplicates. Successfully build-tested.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cirrus.h  | 10 ----------
 drivers/pcmcia/o2micro.h | 22 ----------------------
 include/linux/pci_ids.h  |  2 ++
 3 files changed, 2 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pcmcia/cirrus.h b/drivers/pcmcia/cirrus.h
index ecd4fc7f666f..446a4576e73e 100644
--- a/drivers/pcmcia/cirrus.h
+++ b/drivers/pcmcia/cirrus.h
@@ -30,16 +30,6 @@
 #ifndef _LINUX_CIRRUS_H
 #define _LINUX_CIRRUS_H
 
-#ifndef PCI_VENDOR_ID_CIRRUS
-#define PCI_VENDOR_ID_CIRRUS		0x1013
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_6729
-#define PCI_DEVICE_ID_CIRRUS_6729	0x1100
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_6832
-#define PCI_DEVICE_ID_CIRRUS_6832	0x1110
-#endif
-
 #define PD67_MISC_CTL_1		0x16	/* Misc control 1 */
 #define PD67_FIFO_CTL		0x17	/* FIFO control */
 #define PD67_MISC_CTL_2		0x1E	/* Misc control 2 */
diff --git a/drivers/pcmcia/o2micro.h b/drivers/pcmcia/o2micro.h
index 72188c462c9c..624442fc0d35 100644
--- a/drivers/pcmcia/o2micro.h
+++ b/drivers/pcmcia/o2micro.h
@@ -30,28 +30,6 @@
 #ifndef _LINUX_O2MICRO_H
 #define _LINUX_O2MICRO_H
 
-#ifndef PCI_VENDOR_ID_O2
-#define PCI_VENDOR_ID_O2		0x1217
-#endif
-#ifndef PCI_DEVICE_ID_O2_6729
-#define PCI_DEVICE_ID_O2_6729		0x6729
-#endif
-#ifndef PCI_DEVICE_ID_O2_6730
-#define PCI_DEVICE_ID_O2_6730		0x673a
-#endif
-#ifndef PCI_DEVICE_ID_O2_6832
-#define PCI_DEVICE_ID_O2_6832		0x6832
-#endif
-#ifndef PCI_DEVICE_ID_O2_6836
-#define PCI_DEVICE_ID_O2_6836		0x6836
-#endif
-#ifndef PCI_DEVICE_ID_O2_6812
-#define PCI_DEVICE_ID_O2_6812		0x6872
-#endif
-#ifndef PCI_DEVICE_ID_O2_6933
-#define PCI_DEVICE_ID_O2_6933           0x6933
-#endif
-
 /* Additional PCI configuration registers */
 
 #define O2_MUX_CONTROL		0x90	/* 32 bit */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f490e7a7307a..857cc349bf71 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1632,6 +1632,8 @@
 #define PCI_DEVICE_ID_O2_6730		0x673a
 #define PCI_DEVICE_ID_O2_6832		0x6832
 #define PCI_DEVICE_ID_O2_6836		0x6836
+#define PCI_DEVICE_ID_O2_6812		0x6872
+#define PCI_DEVICE_ID_O2_6933		0x6933
 
 #define PCI_VENDOR_ID_3DFX		0x121a
 #define PCI_DEVICE_ID_3DFX_VOODOO	0x0001
-- 
cgit v1.2.3


From ce0e7b28fb75cb003cfc8d0238613aaf1c55e797 Mon Sep 17 00:00:00 2001
From: Ryota Ozaki <ozaki.ryota@gmail.com>
Date: Sat, 24 Oct 2009 01:20:10 +0900
Subject: sched, cpuacct: Fix niced guest time accounting

CPU time of a guest is always accounted in 'user' time
without concern for the nice value of its counterpart
process although the guest is scheduled under the nice
value.

This patch fixes the defect and accounts cpu time of
a niced guest in 'nice' time as same as a niced process.

And also the patch adds 'guest_nice' to cpuacct. The
value provides niced guest cpu time which is like 'nice'
to 'user'.

The original discussions can be found here:

  http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html
  http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html

Signed-off-by: Ryota Ozaki <ozaki.ryota@gmail.com>
Acked-by: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1256314810-7897-1-git-send-email-ozaki.ryota@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/filesystems/proc.txt |  3 ++-
 fs/proc/stat.c                     | 19 +++++++++++++------
 include/linux/kernel_stat.h        |  1 +
 kernel/sched.c                     |  9 +++++++--
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2c48f945546b..4af0018533f2 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1072,7 +1072,8 @@ second).  The meanings of the columns are as follows, from left to right:
 - irq: servicing interrupts
 - softirq: servicing softirqs
 - steal: involuntary wait
-- guest: running a guest
+- guest: running a normal guest
+- guest_nice: running a niced guest
 
 The "intr" line gives counts of interrupts  serviced since boot time, for each
 of the  possible system interrupts.   The first  column  is the  total of  all
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7cc726c6d70a..b9b7aad2003d 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
 	int i, j;
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
-	cputime64_t guest;
+	cputime64_t guest, guest_nice;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	guest = cputime64_zero;
+	guest = guest_nice = cputime64_zero;
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 
@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+		guest_nice = cputime64_add(guest_nice,
+			kstat_cpu(i).cpustat.guest_nice);
 		for_each_irq_nr(j) {
 			sum += kstat_irqs_cpu(j, i);
 		}
@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	sum += arch_irq_stat();
 
-	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu "
+		"%llu\n",
 		(unsigned long long)cputime64_to_clock_t(user),
 		(unsigned long long)cputime64_to_clock_t(nice),
 		(unsigned long long)cputime64_to_clock_t(system),
@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
 		(unsigned long long)cputime64_to_clock_t(irq),
 		(unsigned long long)cputime64_to_clock_t(softirq),
 		(unsigned long long)cputime64_to_clock_t(steal),
-		(unsigned long long)cputime64_to_clock_t(guest));
+		(unsigned long long)cputime64_to_clock_t(guest),
+		(unsigned long long)cputime64_to_clock_t(guest_nice));
 	for_each_online_cpu(i) {
 
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = kstat_cpu(i).cpustat.softirq;
 		steal = kstat_cpu(i).cpustat.steal;
 		guest = kstat_cpu(i).cpustat.guest;
+		guest_nice = kstat_cpu(i).cpustat.guest_nice;
 		seq_printf(p,
-			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
+			"%llu\n",
 			i,
 			(unsigned long long)cputime64_to_clock_t(user),
 			(unsigned long long)cputime64_to_clock_t(nice),
@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
 			(unsigned long long)cputime64_to_clock_t(irq),
 			(unsigned long long)cputime64_to_clock_t(softirq),
 			(unsigned long long)cputime64_to_clock_t(steal),
-			(unsigned long long)cputime64_to_clock_t(guest));
+			(unsigned long long)cputime64_to_clock_t(guest),
+			(unsigned long long)cputime64_to_clock_t(guest_nice));
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 348fa8874b52..c059044bc6dc 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -25,6 +25,7 @@ struct cpu_usage_stat {
 	cputime64_t iowait;
 	cputime64_t steal;
 	cputime64_t guest;
+	cputime64_t guest_nice;
 };
 
 struct kernel_stat {
diff --git a/kernel/sched.c b/kernel/sched.c
index e5205811c19e..67be4d0dddaa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5017,8 +5017,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	/* Add guest time to cpustat. */
-	cpustat->user = cputime64_add(cpustat->user, tmp);
-	cpustat->guest = cputime64_add(cpustat->guest, tmp);
+	if (TASK_NICE(p) > 0) {
+		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+		cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
+	} else {
+		cpustat->user = cputime64_add(cpustat->user, tmp);
+		cpustat->guest = cputime64_add(cpustat->guest, tmp);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 0b83ddebc6e884dc0221358cf68c461520fbdd8e Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 22 Oct 2009 13:26:45 +0300
Subject: MFD: twl4030: add twl4030_codec MFD as a new child to the core

New MFD child to twl4030 MFD device.

Reason for the twl4030_codec MFD: the vibra control is actually in the codec
part of the twl4030. If both the vibra and the audio functionality is needed
from the twl4030 at the same time, than they need to control the codec power
and APLL at the same time without breaking the other driver.
Also these two has to be able to work without the need for the other driver.

This MFD device will be used by the drivers, which needs resources
from the twl4030 codec like audio and vibra.

The platform specific configuration data is passed along to the
child drivers (audio, vibra).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/Kconfig               |   6 +
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/twl4030-codec.c       | 241 +++++++++++++++++++++++++++++++++
 drivers/mfd/twl4030-core.c        |  14 ++
 include/linux/i2c/twl4030.h       |  18 +++
 include/linux/mfd/twl4030-codec.h | 271 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 551 insertions(+)
 create mode 100644 drivers/mfd/twl4030-codec.c
 create mode 100644 include/linux/mfd/twl4030-codec.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 570be139f9df..08f2d07bf56a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -121,6 +121,12 @@ config TWL4030_POWER
 	  and load scripts controling which resources are switched off/on
 	  or reset when a sleep, wakeup or warm reset event occurs.
 
+config TWL4030_CODEC
+	bool
+	depends on TWL4030_CORE
+	select MFD_CORE
+	default n
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f3b277b90d40..af0fc903cec8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
+obj-$(CONFIG_TWL4030_CODEC)	+= twl4030-codec.o
 
 obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
 
diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
new file mode 100644
index 000000000000..97103078dc2a
--- /dev/null
+++ b/drivers/mfd/twl4030-codec.c
@@ -0,0 +1,241 @@
+/*
+ * MFD driver for twl4030 codec submodule
+ *
+ * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * Copyright:   (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/twl4030.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/twl4030-codec.h>
+
+#define TWL4030_CODEC_CELLS	2
+
+static struct platform_device *twl4030_codec_dev;
+
+struct twl4030_codec_resource {
+	int request_count;
+	u8 reg;
+	u8 mask;
+};
+
+struct twl4030_codec {
+	struct mutex mutex;
+	struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX];
+	struct mfd_cell cells[TWL4030_CODEC_CELLS];
+};
+
+/*
+ * Modify the resource, the function returns the content of the register
+ * after the modification.
+ */
+static int twl4030_codec_set_resource(enum twl4030_codec_res id, int enable)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	u8 val;
+
+	twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+			codec->resource[id].reg);
+
+	if (enable)
+		val |= codec->resource[id].mask;
+	else
+		val &= ~codec->resource[id].mask;
+
+	twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+					val, codec->resource[id].reg);
+
+	return val;
+}
+
+static inline int twl4030_codec_get_resource(enum twl4030_codec_res id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	u8 val;
+
+	twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+			codec->resource[id].reg);
+
+	return val;
+}
+
+/*
+ * Enable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_enable_resource(enum twl4030_codec_res id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	int val;
+
+	if (id >= TWL4030_CODEC_RES_MAX) {
+		dev_err(&twl4030_codec_dev->dev,
+				"Invalid resource ID (%u)\n", id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&codec->mutex);
+	if (!codec->resource[id].request_count)
+		/* Resource was disabled, enable it */
+		val = twl4030_codec_set_resource(id, 1);
+	else
+		val = twl4030_codec_get_resource(id);
+
+	codec->resource[id].request_count++;
+	mutex_unlock(&codec->mutex);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_enable_resource);
+
+/*
+ * Disable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_disable_resource(unsigned id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	int val;
+
+	if (id >= TWL4030_CODEC_RES_MAX) {
+		dev_err(&twl4030_codec_dev->dev,
+				"Invalid resource ID (%u)\n", id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&codec->mutex);
+	if (!codec->resource[id].request_count) {
+		dev_err(&twl4030_codec_dev->dev,
+			"Resource has been disabled already (%u)\n", id);
+		mutex_unlock(&codec->mutex);
+		return -EPERM;
+	}
+	codec->resource[id].request_count--;
+
+	if (!codec->resource[id].request_count)
+		/* Resource can be disabled now */
+		val = twl4030_codec_set_resource(id, 0);
+	else
+		val = twl4030_codec_get_resource(id);
+
+	mutex_unlock(&codec->mutex);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource);
+
+static int __devinit twl4030_codec_probe(struct platform_device *pdev)
+{
+	struct twl4030_codec *codec;
+	struct twl4030_codec_data *pdata = pdev->dev.platform_data;
+	struct mfd_cell *cell = NULL;
+	int ret, childs = 0;
+
+	codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL);
+	if (!codec)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, codec);
+
+	twl4030_codec_dev = pdev;
+	mutex_init(&codec->mutex);
+
+	/* Codec power */
+	codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE;
+	codec->resource[TWL4030_CODEC_RES_POWER].mask = TWL4030_CODECPDZ;
+
+	/* PLL */
+	codec->resource[TWL4030_CODEC_RES_APLL].reg = TWL4030_REG_APLL_CTL;
+	codec->resource[TWL4030_CODEC_RES_APLL].mask = TWL4030_APLL_EN;
+
+	if (pdata->audio) {
+		cell = &codec->cells[childs];
+		cell->name = "twl4030_codec_audio";
+		cell->platform_data = pdata->audio;
+		cell->data_size = sizeof(*pdata->audio);
+		childs++;
+	}
+	if (pdata->vibra) {
+		cell = &codec->cells[childs];
+		cell->name = "twl4030_codec_vibra";
+		cell->platform_data = pdata->vibra;
+		cell->data_size = sizeof(*pdata->vibra);
+		childs++;
+	}
+
+	if (childs)
+		ret = mfd_add_devices(&pdev->dev, pdev->id, codec->cells,
+				      childs, NULL, 0);
+	else {
+		dev_err(&pdev->dev, "No platform data found for childs\n");
+		ret = -ENODEV;
+	}
+
+	if (!ret)
+		return 0;
+
+	platform_set_drvdata(pdev, NULL);
+	kfree(codec);
+	twl4030_codec_dev = NULL;
+	return ret;
+}
+
+static int __devexit twl4030_codec_remove(struct platform_device *pdev)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(pdev);
+
+	mfd_remove_devices(&pdev->dev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(codec);
+	twl4030_codec_dev = NULL;
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:twl4030_codec");
+
+static struct platform_driver twl4030_codec_driver = {
+	.probe		= twl4030_codec_probe,
+	.remove		= __devexit_p(twl4030_codec_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "twl4030_codec",
+	},
+};
+
+static int __devinit twl4030_codec_init(void)
+{
+	return platform_driver_register(&twl4030_codec_driver);
+}
+module_init(twl4030_codec_init);
+
+static void __devexit twl4030_codec_exit(void)
+{
+	platform_driver_unregister(&twl4030_codec_driver);
+}
+module_exit(twl4030_codec_exit);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index e424cf6d8e9e..0ee81e46bfbd 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -114,6 +114,12 @@
 #define twl_has_watchdog()        false
 #endif
 
+#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
+#define twl_has_codec()	true
+#else
+#define twl_has_codec()	false
+#endif
+
 /* Triton Core internal information (BEGIN) */
 
 /* Last - for index max*/
@@ -557,6 +563,14 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
+	if (twl_has_codec() && pdata->codec) {
+		child = add_child(1, "twl4030_codec",
+				pdata->codec, sizeof(*pdata->codec),
+				false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
 	if (twl_has_regulator()) {
 		/*
 		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 2d02dfd7076c..42d6c722bd82 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -401,6 +401,23 @@ struct twl4030_power_data {
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
 
+struct twl4030_codec_audio_data {
+	unsigned int	audio_mclk;
+	unsigned int ramp_delay_value;
+	unsigned int hs_extmute:1;
+	void (*set_hs_extmute)(int mute);
+};
+
+struct twl4030_codec_vibra_data {
+	unsigned int	audio_mclk;
+	unsigned int	coexist;
+};
+
+struct twl4030_codec_data {
+	struct twl4030_codec_audio_data		*audio;
+	struct twl4030_codec_vibra_data		*vibra;
+};
+
 struct twl4030_platform_data {
 	unsigned				irq_base, irq_end;
 	struct twl4030_bci_platform_data	*bci;
@@ -409,6 +426,7 @@ struct twl4030_platform_data {
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
 	struct twl4030_power_data		*power;
+	struct twl4030_codec_data		*codec;
 
 	/* LDO regulators */
 	struct regulator_init_data		*vdac;
diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h
new file mode 100644
index 000000000000..ef0a3041d759
--- /dev/null
+++ b/include/linux/mfd/twl4030-codec.h
@@ -0,0 +1,271 @@
+/*
+ * MFD driver for twl4030 codec submodule
+ *
+ * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * Copyright:   (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __TWL4030_CODEC_H__
+#define __TWL4030_CODEC_H__
+
+/* Codec registers */
+#define TWL4030_REG_CODEC_MODE		0x01
+#define TWL4030_REG_OPTION		0x02
+#define TWL4030_REG_UNKNOWN		0x03
+#define TWL4030_REG_MICBIAS_CTL		0x04
+#define TWL4030_REG_ANAMICL		0x05
+#define TWL4030_REG_ANAMICR		0x06
+#define TWL4030_REG_AVADC_CTL		0x07
+#define TWL4030_REG_ADCMICSEL		0x08
+#define TWL4030_REG_DIGMIXING		0x09
+#define TWL4030_REG_ATXL1PGA		0x0A
+#define TWL4030_REG_ATXR1PGA		0x0B
+#define TWL4030_REG_AVTXL2PGA		0x0C
+#define TWL4030_REG_AVTXR2PGA		0x0D
+#define TWL4030_REG_AUDIO_IF		0x0E
+#define TWL4030_REG_VOICE_IF		0x0F
+#define TWL4030_REG_ARXR1PGA		0x10
+#define TWL4030_REG_ARXL1PGA		0x11
+#define TWL4030_REG_ARXR2PGA		0x12
+#define TWL4030_REG_ARXL2PGA		0x13
+#define TWL4030_REG_VRXPGA		0x14
+#define TWL4030_REG_VSTPGA		0x15
+#define TWL4030_REG_VRX2ARXPGA		0x16
+#define TWL4030_REG_AVDAC_CTL		0x17
+#define TWL4030_REG_ARX2VTXPGA		0x18
+#define TWL4030_REG_ARXL1_APGA_CTL	0x19
+#define TWL4030_REG_ARXR1_APGA_CTL	0x1A
+#define TWL4030_REG_ARXL2_APGA_CTL	0x1B
+#define TWL4030_REG_ARXR2_APGA_CTL	0x1C
+#define TWL4030_REG_ATX2ARXPGA		0x1D
+#define TWL4030_REG_BT_IF		0x1E
+#define TWL4030_REG_BTPGA		0x1F
+#define TWL4030_REG_BTSTPGA		0x20
+#define TWL4030_REG_EAR_CTL		0x21
+#define TWL4030_REG_HS_SEL		0x22
+#define TWL4030_REG_HS_GAIN_SET		0x23
+#define TWL4030_REG_HS_POPN_SET		0x24
+#define TWL4030_REG_PREDL_CTL		0x25
+#define TWL4030_REG_PREDR_CTL		0x26
+#define TWL4030_REG_PRECKL_CTL		0x27
+#define TWL4030_REG_PRECKR_CTL		0x28
+#define TWL4030_REG_HFL_CTL		0x29
+#define TWL4030_REG_HFR_CTL		0x2A
+#define TWL4030_REG_ALC_CTL		0x2B
+#define TWL4030_REG_ALC_SET1		0x2C
+#define TWL4030_REG_ALC_SET2		0x2D
+#define TWL4030_REG_BOOST_CTL		0x2E
+#define TWL4030_REG_SOFTVOL_CTL		0x2F
+#define TWL4030_REG_DTMF_FREQSEL	0x30
+#define TWL4030_REG_DTMF_TONEXT1H	0x31
+#define TWL4030_REG_DTMF_TONEXT1L	0x32
+#define TWL4030_REG_DTMF_TONEXT2H	0x33
+#define TWL4030_REG_DTMF_TONEXT2L	0x34
+#define TWL4030_REG_DTMF_TONOFF		0x35
+#define TWL4030_REG_DTMF_WANONOFF	0x36
+#define TWL4030_REG_I2S_RX_SCRAMBLE_H	0x37
+#define TWL4030_REG_I2S_RX_SCRAMBLE_M	0x38
+#define TWL4030_REG_I2S_RX_SCRAMBLE_L	0x39
+#define TWL4030_REG_APLL_CTL		0x3A
+#define TWL4030_REG_DTMF_CTL		0x3B
+#define TWL4030_REG_DTMF_PGA_CTL2	0x3C
+#define TWL4030_REG_DTMF_PGA_CTL1	0x3D
+#define TWL4030_REG_MISC_SET_1		0x3E
+#define TWL4030_REG_PCMBTMUX		0x3F
+#define TWL4030_REG_RX_PATH_SEL		0x43
+#define TWL4030_REG_VDL_APGA_CTL	0x44
+#define TWL4030_REG_VIBRA_CTL		0x45
+#define TWL4030_REG_VIBRA_SET		0x46
+#define TWL4030_REG_VIBRA_PWM_SET	0x47
+#define TWL4030_REG_ANAMIC_GAIN		0x48
+#define TWL4030_REG_MISC_SET_2		0x49
+
+/* Bitfield Definitions */
+
+/* TWL4030_CODEC_MODE (0x01) Fields */
+#define TWL4030_APLL_RATE		0xF0
+#define TWL4030_APLL_RATE_8000		0x00
+#define TWL4030_APLL_RATE_11025		0x10
+#define TWL4030_APLL_RATE_12000		0x20
+#define TWL4030_APLL_RATE_16000		0x40
+#define TWL4030_APLL_RATE_22050		0x50
+#define TWL4030_APLL_RATE_24000		0x60
+#define TWL4030_APLL_RATE_32000		0x80
+#define TWL4030_APLL_RATE_44100		0x90
+#define TWL4030_APLL_RATE_48000		0xA0
+#define TWL4030_APLL_RATE_96000		0xE0
+#define TWL4030_SEL_16K			0x08
+#define TWL4030_CODECPDZ		0x02
+#define TWL4030_OPT_MODE		0x01
+#define TWL4030_OPTION_1		(1 << 0)
+#define TWL4030_OPTION_2		(0 << 0)
+
+/* TWL4030_OPTION (0x02) Fields */
+#define TWL4030_ATXL1_EN		(1 << 0)
+#define TWL4030_ATXR1_EN		(1 << 1)
+#define TWL4030_ATXL2_VTXL_EN		(1 << 2)
+#define TWL4030_ATXR2_VTXR_EN		(1 << 3)
+#define TWL4030_ARXL1_VRX_EN		(1 << 4)
+#define TWL4030_ARXR1_EN		(1 << 5)
+#define TWL4030_ARXL2_EN		(1 << 6)
+#define TWL4030_ARXR2_EN		(1 << 7)
+
+/* TWL4030_REG_MICBIAS_CTL (0x04) Fields */
+#define TWL4030_MICBIAS2_CTL		0x40
+#define TWL4030_MICBIAS1_CTL		0x20
+#define TWL4030_HSMICBIAS_EN		0x04
+#define TWL4030_MICBIAS2_EN		0x02
+#define TWL4030_MICBIAS1_EN		0x01
+
+/* ANAMICL (0x05) Fields */
+#define TWL4030_CNCL_OFFSET_START	0x80
+#define TWL4030_OFFSET_CNCL_SEL		0x60
+#define TWL4030_OFFSET_CNCL_SEL_ARX1	0x00
+#define TWL4030_OFFSET_CNCL_SEL_ARX2	0x20
+#define TWL4030_OFFSET_CNCL_SEL_VRX	0x40
+#define TWL4030_OFFSET_CNCL_SEL_ALL	0x60
+#define TWL4030_MICAMPL_EN		0x10
+#define TWL4030_CKMIC_EN		0x08
+#define TWL4030_AUXL_EN			0x04
+#define TWL4030_HSMIC_EN		0x02
+#define TWL4030_MAINMIC_EN		0x01
+
+/* ANAMICR (0x06) Fields */
+#define TWL4030_MICAMPR_EN		0x10
+#define TWL4030_AUXR_EN			0x04
+#define TWL4030_SUBMIC_EN		0x01
+
+/* AVADC_CTL (0x07) Fields */
+#define TWL4030_ADCL_EN			0x08
+#define TWL4030_AVADC_CLK_PRIORITY	0x04
+#define TWL4030_ADCR_EN			0x02
+
+/* TWL4030_REG_ADCMICSEL (0x08) Fields */
+#define TWL4030_DIGMIC1_EN		0x08
+#define TWL4030_TX2IN_SEL		0x04
+#define TWL4030_DIGMIC0_EN		0x02
+#define TWL4030_TX1IN_SEL		0x01
+
+/* AUDIO_IF (0x0E) Fields */
+#define TWL4030_AIF_SLAVE_EN		0x80
+#define TWL4030_DATA_WIDTH		0x60
+#define TWL4030_DATA_WIDTH_16S_16W	0x00
+#define TWL4030_DATA_WIDTH_32S_16W	0x40
+#define TWL4030_DATA_WIDTH_32S_24W	0x60
+#define TWL4030_AIF_FORMAT		0x18
+#define TWL4030_AIF_FORMAT_CODEC	0x00
+#define TWL4030_AIF_FORMAT_LEFT		0x08
+#define TWL4030_AIF_FORMAT_RIGHT	0x10
+#define TWL4030_AIF_FORMAT_TDM		0x18
+#define TWL4030_AIF_TRI_EN		0x04
+#define TWL4030_CLK256FS_EN		0x02
+#define TWL4030_AIF_EN			0x01
+
+/* VOICE_IF (0x0F) Fields */
+#define TWL4030_VIF_SLAVE_EN		0x80
+#define TWL4030_VIF_DIN_EN		0x40
+#define TWL4030_VIF_DOUT_EN		0x20
+#define TWL4030_VIF_SWAP		0x10
+#define TWL4030_VIF_FORMAT		0x08
+#define TWL4030_VIF_TRI_EN		0x04
+#define TWL4030_VIF_SUB_EN		0x02
+#define TWL4030_VIF_EN			0x01
+
+/* EAR_CTL (0x21) */
+#define TWL4030_EAR_GAIN		0x30
+
+/* HS_GAIN_SET (0x23) Fields */
+#define TWL4030_HSR_GAIN		0x0C
+#define TWL4030_HSR_GAIN_PWR_DOWN	0x00
+#define TWL4030_HSR_GAIN_PLUS_6DB	0x04
+#define TWL4030_HSR_GAIN_0DB		0x08
+#define TWL4030_HSR_GAIN_MINUS_6DB	0x0C
+#define TWL4030_HSL_GAIN		0x03
+#define TWL4030_HSL_GAIN_PWR_DOWN	0x00
+#define TWL4030_HSL_GAIN_PLUS_6DB	0x01
+#define TWL4030_HSL_GAIN_0DB		0x02
+#define TWL4030_HSL_GAIN_MINUS_6DB	0x03
+
+/* HS_POPN_SET (0x24) Fields */
+#define TWL4030_VMID_EN			0x40
+#define	TWL4030_EXTMUTE			0x20
+#define TWL4030_RAMP_DELAY		0x1C
+#define TWL4030_RAMP_DELAY_20MS		0x00
+#define TWL4030_RAMP_DELAY_40MS		0x04
+#define TWL4030_RAMP_DELAY_81MS		0x08
+#define TWL4030_RAMP_DELAY_161MS	0x0C
+#define TWL4030_RAMP_DELAY_323MS	0x10
+#define TWL4030_RAMP_DELAY_645MS	0x14
+#define TWL4030_RAMP_DELAY_1291MS	0x18
+#define TWL4030_RAMP_DELAY_2581MS	0x1C
+#define TWL4030_RAMP_EN			0x02
+
+/* PREDL_CTL (0x25) */
+#define TWL4030_PREDL_GAIN		0x30
+
+/* PREDR_CTL (0x26) */
+#define TWL4030_PREDR_GAIN		0x30
+
+/* PRECKL_CTL (0x27) */
+#define TWL4030_PRECKL_GAIN		0x30
+
+/* PRECKR_CTL (0x28) */
+#define TWL4030_PRECKR_GAIN		0x30
+
+/* HFL_CTL (0x29, 0x2A) Fields */
+#define TWL4030_HF_CTL_HB_EN		0x04
+#define TWL4030_HF_CTL_LOOP_EN		0x08
+#define TWL4030_HF_CTL_RAMP_EN		0x10
+#define TWL4030_HF_CTL_REF_EN		0x20
+
+/* APLL_CTL (0x3A) Fields */
+#define TWL4030_APLL_EN			0x10
+#define TWL4030_APLL_INFREQ		0x0F
+#define TWL4030_APLL_INFREQ_19200KHZ	0x05
+#define TWL4030_APLL_INFREQ_26000KHZ	0x06
+#define TWL4030_APLL_INFREQ_38400KHZ	0x0F
+
+/* REG_MISC_SET_1 (0x3E) Fields */
+#define TWL4030_CLK64_EN		0x80
+#define TWL4030_SCRAMBLE_EN		0x40
+#define TWL4030_FMLOOP_EN		0x20
+#define TWL4030_SMOOTH_ANAVOL_EN	0x02
+#define TWL4030_DIGMIC_LR_SWAP_EN	0x01
+
+/* VIBRA_CTL (0x45) */
+#define TWL4030_VIBRA_EN		0x01
+#define TWL4030_VIBRA_DIR		0x02
+#define TWL4030_VIBRA_AUDIO_SEL_L1	(0x00 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_R1	(0x01 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_L2	(0x02 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_R2	(0x03 << 2)
+#define TWL4030_VIBRA_SEL		0x10
+#define TWL4030_VIBRA_DIR_SEL		0x20
+
+/* TWL4030 codec resource IDs */
+enum twl4030_codec_res {
+	TWL4030_CODEC_RES_POWER = 0,
+	TWL4030_CODEC_RES_APLL,
+	TWL4030_CODEC_RES_MAX,
+};
+
+int twl4030_codec_disable_resource(enum twl4030_codec_res id);
+int twl4030_codec_enable_resource(enum twl4030_codec_res id);
+
+#endif	/* End of __TWL4030_CODEC_H__ */
-- 
cgit v1.2.3


From 9b1d82fa1611706fa7ee1505f290160a18caf95d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 25 Oct 2009 19:03:50 -0700
Subject: rcu: "Tiny RCU", The Bloatwatch Edition

This patch is a version of RCU designed for !SMP provided for a
small-footprint RCU implementation.  In particular, the
implementation of synchronize_rcu() is extremely lightweight and
high performance. It passes rcutorture testing in each of the
four relevant configurations (combinations of NO_HZ and PREEMPT)
on x86.  This saves about 1K bytes compared to old Classic RCU
(which is no longer in mainline), and more than three kilobytes
compared to Hierarchical RCU (updated to 2.6.30):

	CONFIG_TREE_RCU:

	   text	   data	    bss	    dec	    filename
	    183       4       0     187     kernel/rcupdate.o
	   2783     520      36    3339     kernel/rcutree.o
				   3526 Total (vs 4565 for v7)

	CONFIG_TREE_PREEMPT_RCU:

	   text	   data	    bss	    dec	    filename
	    263       4       0     267     kernel/rcupdate.o
	   4594     776      52    5422     kernel/rcutree.o
	   			   5689 Total (6155 for v7)

	CONFIG_TINY_RCU:

	   text	   data	    bss	    dec	    filename
	     96       4       0     100     kernel/rcupdate.o
	    734      24       0     758     kernel/rcutiny.o
	    			    858 Total (vs 848 for v7)

The above is for x86.  Your mileage may vary on other platforms.
Further compression is possible, but is being procrastinated.

Changes from v7 (http://lkml.org/lkml/2009/10/9/388)

o	Apply Lai Jiangshan's review comments (aside from
might_sleep() 	in synchronize_sched(), which is covered by SMP builds).

o	Fix up expedited primitives.

Changes from v6 (http://lkml.org/lkml/2009/9/23/293).

o	Forward ported to put it into the 2.6.33 stream.

o	Added lockdep support.

o	Make lightweight rcu_barrier.

Changes from v5 (http://lkml.org/lkml/2009/6/23/12).

o	Ported to latest pre-2.6.32 merge window kernel.

	- Renamed rcu_qsctr_inc() to rcu_sched_qs().
	- Renamed rcu_bh_qsctr_inc() to rcu_bh_qs().
	- Provided trivial rcu_cpu_notify().
	- Provided trivial exit_rcu().
	- Provided trivial rcu_needs_cpu().
	- Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h.

o	Removed the dependence on EMBEDDED, with a view to making
	TINY_RCU default for !SMP at some time in the future.

o	Added (trivial) support for expedited grace periods.

Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include:

o	Squeeze the size down a bit further by removing the
	->completed field from struct rcu_ctrlblk.

o	This permits synchronize_rcu() to become the empty function.
	Previous concerns about rcutorture were unfounded, as
	rcutorture correctly handles a constant value from
	rcu_batches_completed() and rcu_batches_completed_bh().

Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include:

o	Changed rcu_batches_completed(), rcu_batches_completed_bh()
	rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and
	rcu_nmi_exit(), to be static inlines, as suggested by David
	Howells.  Doing this saves about 100 bytes from rcutiny.o.
	(The numbers between v3 and this v4 of the patch are not directly
	comparable, since they are against different versions of Linux.)

Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include:

o	Fix whitespace issues.

o	Change short-circuit "||" operator to instead be "+" in order
to 	fix performance bug noted by "kraai" on LWN.

		(http://lwn.net/Articles/324348/)

Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include:

o	This version depends on EMBEDDED as well as !SMP, as suggested
	by Ingo.

o	Updated rcu_needs_cpu() to unconditionally return zero,
	permitting the CPU to enter dynticks-idle mode at any time.
	This works because callbacks can be invoked upon entry to
	dynticks-idle mode.

o	Paul is now OK with this being included, based on a poll at
the 	Kernel Miniconf at linux.conf.au, where about ten people said
	that they cared about saving 900 bytes on single-CPU systems.

o	Applies to both mainline and tip/core/rcu.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
LKML-Reference: <12565226351355-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h  |  24 ++++
 include/linux/rcupdate.h |   6 +
 include/linux/rcutiny.h  |  97 ++++++++++++++++
 init/Kconfig             |   9 ++
 kernel/Makefile          |   1 +
 kernel/rcupdate.c        |   4 +
 kernel/rcutiny.c         | 294 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 435 insertions(+)
 create mode 100644 include/linux/rcutiny.h
 create mode 100644 kernel/rcutiny.c

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 6d527ee82b2b..d5b387669dab 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk)
 #endif
 
 #if defined(CONFIG_NO_HZ)
+#if defined(CONFIG_TINY_RCU)
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+static inline void rcu_irq_enter(void)
+{
+	rcu_exit_nohz();
+}
+
+static inline void rcu_irq_exit(void)
+{
+	rcu_enter_nohz();
+}
+
+static inline void rcu_nmi_enter(void)
+{
+}
+
+static inline void rcu_nmi_exit(void)
+{
+}
+
+#else
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 extern void rcu_nmi_enter(void);
 extern void rcu_nmi_exit(void);
+#endif
 #else
 # define rcu_irq_enter() do { } while (0)
 # define rcu_irq_exit() do { } while (0)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3ebd0b7bcb08..6dd71fa48429 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -68,11 +68,17 @@ extern int sched_expedited_torture_stats(char *page);
 /* Internal to kernel */
 extern void rcu_init(void);
 extern void rcu_scheduler_starting(void);
+#ifndef CONFIG_TINY_RCU
 extern int rcu_needs_cpu(int cpu);
+#else
+static inline int rcu_needs_cpu(int cpu) { return 0; }
+#endif
 extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
+#elif CONFIG_TINY_RCU
+#include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
new file mode 100644
index 000000000000..891073c264dc
--- /dev/null
+++ b/include/linux/rcutiny.h
@@ -0,0 +1,97 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ */
+
+#ifndef __LINUX_TINY_H
+#define __LINUX_TINY_H
+
+#include <linux/cache.h>
+
+void rcu_sched_qs(int cpu);
+void rcu_bh_qs(int cpu);
+
+#define __rcu_read_lock()	preempt_disable()
+#define __rcu_read_unlock()	preempt_enable()
+#define __rcu_read_lock_bh()	local_bh_disable()
+#define __rcu_read_unlock_bh()	local_bh_enable()
+#define call_rcu_sched		call_rcu
+
+#define rcu_init_sched()	do { } while (0)
+extern void rcu_check_callbacks(int cpu, int user);
+extern void __rcu_init(void);
+
+/*
+ * Return the number of grace periods.
+ */
+static inline long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods.
+ */
+static inline long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+extern int rcu_expedited_torture_stats(char *page);
+
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_sched();
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
+{
+	synchronize_sched();
+}
+
+struct notifier_block;
+extern int rcu_cpu_notify(struct notifier_block *self,
+			  unsigned long action, void *hcpu);
+
+#ifdef CONFIG_NO_HZ
+
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static inline void rcu_enter_nohz(void)
+{
+}
+
+static inline void rcu_exit_nohz(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+static inline void exit_rcu(void)
+{
+}
+
+#endif /* __LINUX_RCUTINY_H */
diff --git a/init/Kconfig b/init/Kconfig
index 09c5c6431f42..d367bf6bcc34 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -334,6 +334,15 @@ config TREE_PREEMPT_RCU
 	  is also required.  It also scales down nicely to
 	  smaller systems.
 
+config TINY_RCU
+	bool "UP-only small-memory-footprint RCU"
+	depends on !SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for UP systems from which real-time response
+	  is not required.  This option greatly reduces the
+	  memory footprint of RCU.
+
 endchoice
 
 config RCU_TRACE
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..e9a0e6ed25cc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_TINY_RCU) += rcutiny.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 400183346ad2..7625f207f65e 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,6 +67,8 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	complete(&rcu->completion);
 }
 
+#ifndef CONFIG_TINY_RCU
+
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 /**
@@ -157,6 +159,8 @@ void synchronize_rcu_bh(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
+#endif /* #ifndef CONFIG_TINY_RCU */
+
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
new file mode 100644
index 000000000000..0b54efde66ac
--- /dev/null
+++ b/kernel/rcutiny.c
@@ -0,0 +1,294 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
+	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
+	struct rcu_head **curtail;	/* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_ctrlblk = {
+	.rcucblist = NULL,
+	.donetail = &rcu_ctrlblk.rcucblist,
+	.curtail = &rcu_ctrlblk.rcucblist,
+};
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.rcucblist = NULL,
+	.donetail = &rcu_bh_ctrlblk.rcucblist,
+	.curtail = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_NO_HZ
+
+static long rcu_dynticks_nesting = 1;
+
+/*
+ * Enter dynticks-idle mode, which is an extended quiescent state
+ * if we have fully entered that mode (i.e., if the new value of
+ * dynticks_nesting is zero).
+ */
+void rcu_enter_nohz(void)
+{
+	if (--rcu_dynticks_nesting == 0)
+		rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+}
+
+/*
+ * Exit dynticks-idle mode, so that we are no longer in an extended
+ * quiescent state.
+ */
+void rcu_exit_nohz(void)
+{
+	rcu_dynticks_nesting++;
+}
+
+#endif /* #ifdef CONFIG_NO_HZ */
+
+/*
+ * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
+ * Also disable irqs to avoid confusion due to interrupt handlers invoking
+ * call_rcu().
+ */
+static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (rcp->rcucblist != NULL &&
+	    rcp->donetail != rcp->curtail) {
+		rcp->donetail = rcp->curtail;
+		local_irq_restore(flags);
+		return 1;
+	}
+	local_irq_restore(flags);
+	return 0;
+}
+
+/*
+ * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
+ * are at it, given that any rcu quiescent state is also an rcu_bh
+ * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
+ */
+void rcu_sched_qs(int cpu)
+{
+	if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Record an rcu_bh quiescent state.
+ */
+void rcu_bh_qs(int cpu)
+{
+	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Check to see if the scheduling-clock interrupt came from an extended
+ * quiescent state, and, if so, tell RCU about it.
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user ||
+	    (idle_cpu(cpu) &&
+	     !in_softirq() &&
+	     hardirq_count() <= (1 << HARDIRQ_SHIFT)))
+		rcu_sched_qs(cpu);
+	else if (!in_softirq())
+		rcu_bh_qs(cpu);
+}
+
+/*
+ * Helper function for rcu_process_callbacks() that operates on the
+ * specified rcu_ctrlkblk structure.
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+	struct rcu_head *next, *list;
+
+	/* If no RCU callbacks ready to invoke, just return. */
+	if (&rcp->rcucblist == rcp->donetail)
+		return;
+
+	/* Move the ready-to-invoke callbacks to a local list. */
+	local_irq_save(flags);
+	list = rcp->rcucblist;
+	rcp->rcucblist = *rcp->donetail;
+	*rcp->donetail = NULL;
+	if (rcp->curtail == rcp->donetail)
+		rcp->curtail = &rcp->rcucblist;
+	rcp->donetail = &rcp->rcucblist;
+	local_irq_restore(flags);
+
+	/* Invoke the callbacks on the local list. */
+	while (list) {
+		next = list->next;
+		prefetch(next);
+		list->func(list);
+		list = next;
+	}
+}
+
+/*
+ * Invoke any callbacks whose grace period has completed.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+	__rcu_process_callbacks(&rcu_ctrlblk);
+	__rcu_process_callbacks(&rcu_bh_ctrlblk);
+}
+
+/*
+ * Null function to handle CPU being onlined.  Longer term, we want to
+ * make TINY_RCU avoid using rcupdate.c, but later...
+ */
+int rcu_cpu_notify(struct notifier_block *self,
+		   unsigned long action, void *hcpu)
+{
+	return NOTIFY_OK;
+}
+
+/*
+ * Wait for a grace period to elapse.  But it is illegal to invoke
+ * synchronize_sched() from within an RCU read-side critical section.
+ * Therefore, any legal call to synchronize_sched() is a quiescent
+ * state, and so on a UP system, synchronize_sched() need do nothing.
+ * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
+ * benefits of doing might_sleep() to reduce latency.)
+ *
+ * Cool, huh?  (Due to Josh Triplett.)
+ *
+ * But we want to make this a static inline later.
+ */
+void synchronize_sched(void)
+{
+	cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+void synchronize_rcu_bh(void)
+{
+	synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
+/*
+ * Helper function for call_rcu() and call_rcu_bh().
+ */
+static void __call_rcu(struct rcu_head *head,
+		       void (*func)(struct rcu_head *rcu),
+		       struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	*rcp->curtail = head;
+	rcp->curtail = &head->next;
+	local_irq_restore(flags);
+}
+
+/*
+ * Post an RCU callback to be invoked after the end of an RCU grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
+ */
+void call_rcu(struct rcu_head *head,
+	      void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+ */
+void call_rcu_bh(struct rcu_head *head,
+		 void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_bh_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+void rcu_barrier(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+void rcu_barrier_bh(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_bh(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+void __rcu_init(void)
+{
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
-- 
cgit v1.2.3


From 0cd397d33608ae6c97d2ee6c8c43462b419b7e26 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 25 Oct 2009 19:03:51 -0700
Subject: rcu: Add synchronize_srcu_expedited()

This patch creates a synchronize_srcu_expedited() that uses
synchronize_sched_expedited() where synchronize_srcu()
uses synchronize_sched().  The synchronize_srcu() and
synchronize_srcu_expedited() functions become one-liners that
pass synchronize_sched() or synchronize_sched_expedited(),
repectively, to a new __synchronize_srcu() function.

While in the file, move the EXPORT_SYMBOL_GPL()s to immediately
follow the corresponding functions.

Requested-by: Avi Kivity <avi@redhat.com>
Tested-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: avi@redhat.com
LKML-Reference: <12565226354038-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/srcu.h |  1 +
 kernel/srcu.c        | 74 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index aca0eee53930..4765d97dcafb 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp);
 int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
+void synchronize_srcu_expedited(struct srcu_struct *sp);
 long srcu_batches_completed(struct srcu_struct *sp);
 
 #endif
diff --git a/kernel/srcu.c b/kernel/srcu.c
index b0aeeaf22ce4..818d7d9aa03c 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp)
 	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
 	return (sp->per_cpu_ref ? 0 : -ENOMEM);
 }
+EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 /*
  * srcu_readers_active_idx -- returns approximate number of readers
@@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
@@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp)
 	preempt_enable();
 	return idx;
 }
+EXPORT_SYMBOL_GPL(srcu_read_lock);
 
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
@@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(srcu_read_unlock);
 
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Flip the completed counter, and wait for the old count to drain to zero.
- * As with classic RCU, the updater must use some separate means of
- * synchronizing concurrent updates.  Can block; must be called from
- * process context.
- *
- * Note that it is illegal to call synchornize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section.
+/*
+ * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-void synchronize_srcu(struct srcu_struct *sp)
+void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
 {
 	int idx;
 
@@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 		return;
 	}
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * The preceding synchronize_sched() ensures that any CPU that
@@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 	idx = sp->completed & 0x1;
 	sp->completed++;
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * At this point, because of the preceding synchronize_sched(),
@@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 	while (srcu_readers_active_idx(sp, idx))
 		schedule_timeout_interruptible(1);
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * The preceding synchronize_sched() forces all srcu_read_unlock()
@@ -236,6 +229,47 @@ void synchronize_srcu(struct srcu_struct *sp)
 	mutex_unlock(&sp->mutex);
 }
 
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchronize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, synchronize_sched);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+
+/**
+ * synchronize_srcu_expedited - like synchronize_srcu, but less patient
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchronize_srcu_expedited()
+ * from the corresponding SRCU read-side critical section; doing so
+ * will result in deadlock.  However, it is perfectly legal to call
+ * synchronize_srcu_expedited() on one srcu_struct from some other
+ * srcu_struct's read-side critical section.
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, synchronize_sched_expedited);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
 /**
  * srcu_batches_completed - return batches completed.
  * @sp: srcu_struct on which to report batch completion.
@@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp)
 {
 	return sp->completed;
 }
-
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-EXPORT_SYMBOL_GPL(srcu_read_lock);
-EXPORT_SYMBOL_GPL(srcu_read_unlock);
-EXPORT_SYMBOL_GPL(synchronize_srcu);
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
-- 
cgit v1.2.3


From 4ce5b90340879ce93d169b7b523c2cbbe7c45843 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Oct 2009 07:55:55 +0100
Subject: rcu: Do tiny cleanups in rcutiny

No change in functionality - just straighten out a few small
stylistic details.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
LKML-Reference: <12565226351355-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  6 ++----
 kernel/rcutiny.c        | 49 +++++++++++++++++++++++--------------------------
 2 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 891073c264dc..2c1fe8373e71 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -20,9 +20,8 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  */
-
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
@@ -70,8 +69,7 @@ static inline void synchronize_rcu_bh_expedited(void)
 }
 
 struct notifier_block;
-extern int rcu_cpu_notify(struct notifier_block *self,
-			  unsigned long action, void *hcpu);
+extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu);
 
 #ifdef CONFIG_NO_HZ
 
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0b54efde66ac..b33ec3aa377a 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -20,22 +20,21 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/completion.h>
 #include <linux/moduleparam.h>
+#include <linux/completion.h>
+#include <linux/interrupt.h>
 #include <linux/notifier.h>
-#include <linux/cpu.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/time.h>
+#include <linux/cpu.h>
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
@@ -46,14 +45,13 @@ struct rcu_ctrlblk {
 
 /* Definition for rcupdate control block. */
 static struct rcu_ctrlblk rcu_ctrlblk = {
-	.rcucblist = NULL,
-	.donetail = &rcu_ctrlblk.rcucblist,
-	.curtail = &rcu_ctrlblk.rcucblist,
+	.donetail	= &rcu_ctrlblk.rcucblist,
+	.curtail	= &rcu_ctrlblk.rcucblist,
 };
+
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.rcucblist = NULL,
-	.donetail = &rcu_bh_ctrlblk.rcucblist,
-	.curtail = &rcu_bh_ctrlblk.rcucblist,
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
 };
 
 #ifdef CONFIG_NO_HZ
@@ -84,8 +82,8 @@ void rcu_exit_nohz(void)
 
 /*
  * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
- * Also disable irqs to avoid confusion due to interrupt handlers invoking
- * call_rcu().
+ * Also disable irqs to avoid confusion due to interrupt handlers
+ * invoking call_rcu().
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
@@ -99,6 +97,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 		return 1;
 	}
 	local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -143,8 +142,8 @@ void rcu_check_callbacks(int cpu, int user)
  */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
-	unsigned long flags;
 	struct rcu_head *next, *list;
+	unsigned long flags;
 
 	/* If no RCU callbacks ready to invoke, just return. */
 	if (&rcp->rcucblist == rcp->donetail)
@@ -182,8 +181,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  * Null function to handle CPU being onlined.  Longer term, we want to
  * make TINY_RCU avoid using rcupdate.c, but later...
  */
-int rcu_cpu_notify(struct notifier_block *self,
-		   unsigned long action, void *hcpu)
+int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	return NOTIFY_OK;
 }
@@ -223,6 +221,7 @@ static void __call_rcu(struct rcu_head *head,
 
 	head->func = func;
 	head->next = NULL;
+
 	local_irq_save(flags);
 	*rcp->curtail = head;
 	rcp->curtail = &head->next;
@@ -234,8 +233,7 @@ static void __call_rcu(struct rcu_head *head,
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu(struct rcu_head *head,
-	      void (*func)(struct rcu_head *rcu))
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_ctrlblk);
 }
@@ -245,8 +243,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * Post an RCU bottom-half callback to be invoked after any subsequent
  * quiescent state.
  */
-void call_rcu_bh(struct rcu_head *head,
-		 void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_bh_ctrlblk);
 }
-- 
cgit v1.2.3


From b2c18e1e08a5a9663094d57bb4be2f02226ee61c Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Fri, 23 Oct 2009 17:14:49 -0400
Subject: cfq: calculate the seek_mean per cfq_queue not per cfq_io_context

async cfq_queue's are already shared between processes within the same
priority, and forthcoming patches will change the mapping of cic to sync
cfq_queue from 1:1 to 1:N.  So, calculate the seekiness of a process
based on the cfq_queue instead of the cfq_io_context.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c       | 68 +++++++++++++++++++++++------------------------
 include/linux/iocontext.h |  5 ----
 2 files changed, 33 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 069a61017c02..78cc8ee5da41 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -112,6 +112,11 @@ struct cfq_queue {
 	unsigned short ioprio, org_ioprio;
 	unsigned short ioprio_class, org_ioprio_class;
 
+	unsigned int seek_samples;
+	u64 seek_total;
+	sector_t seek_mean;
+	sector_t last_request_pos;
+
 	pid_t pid;
 };
 
@@ -962,16 +967,16 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 		return cfqd->last_position - blk_rq_pos(rq);
 }
 
-#define CIC_SEEK_THR	8 * 1024
-#define CIC_SEEKY(cic)	((cic)->seek_mean > CIC_SEEK_THR)
+#define CFQQ_SEEK_THR		8 * 1024
+#define CFQQ_SEEKY(cfqq)	((cfqq)->seek_mean > CFQQ_SEEK_THR)
 
-static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
+static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+			       struct request *rq)
 {
-	struct cfq_io_context *cic = cfqd->active_cic;
-	sector_t sdist = cic->seek_mean;
+	sector_t sdist = cfqq->seek_mean;
 
-	if (!sample_valid(cic->seek_samples))
-		sdist = CIC_SEEK_THR;
+	if (!sample_valid(cfqq->seek_samples))
+		sdist = CFQQ_SEEK_THR;
 
 	return cfq_dist_from_last(cfqd, rq) <= sdist;
 }
@@ -1000,7 +1005,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
 	 * will contain the closest sector.
 	 */
 	__cfqq = rb_entry(parent, struct cfq_queue, p_node);
-	if (cfq_rq_close(cfqd, __cfqq->next_rq))
+	if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
 		return __cfqq;
 
 	if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1011,7 +1016,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
 		return NULL;
 
 	__cfqq = rb_entry(node, struct cfq_queue, p_node);
-	if (cfq_rq_close(cfqd, __cfqq->next_rq))
+	if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
 		return __cfqq;
 
 	return NULL;
@@ -1033,13 +1038,6 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
 {
 	struct cfq_queue *cfqq;
 
-	/*
-	 * A valid cfq_io_context is necessary to compare requests against
-	 * the seek_mean of the current cfqq.
-	 */
-	if (!cfqd->active_cic)
-		return NULL;
-
 	/*
 	 * We should notice if some of the queues are cooperating, eg
 	 * working closely on the same area of the disk. In that case,
@@ -1110,7 +1108,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 * seeks. so allow a little bit of time for him to submit a new rq
 	 */
 	sl = cfqd->cfq_slice_idle;
-	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+	if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
 		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
@@ -1947,33 +1945,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 }
 
 static void
-cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
+cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		       struct request *rq)
 {
 	sector_t sdist;
 	u64 total;
 
-	if (!cic->last_request_pos)
+	if (!cfqq->last_request_pos)
 		sdist = 0;
-	else if (cic->last_request_pos < blk_rq_pos(rq))
-		sdist = blk_rq_pos(rq) - cic->last_request_pos;
+	else if (cfqq->last_request_pos < blk_rq_pos(rq))
+		sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
 	else
-		sdist = cic->last_request_pos - blk_rq_pos(rq);
+		sdist = cfqq->last_request_pos - blk_rq_pos(rq);
 
 	/*
 	 * Don't allow the seek distance to get too large from the
 	 * odd fragment, pagein, etc
 	 */
-	if (cic->seek_samples <= 60) /* second&third seek */
-		sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
+	if (cfqq->seek_samples <= 60) /* second&third seek */
+		sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024);
 	else
-		sdist = min(sdist, (cic->seek_mean * 4)	+ 2*1024*64);
+		sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64);
 
-	cic->seek_samples = (7*cic->seek_samples + 256) / 8;
-	cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
-	total = cic->seek_total + (cic->seek_samples/2);
-	do_div(total, cic->seek_samples);
-	cic->seek_mean = (sector_t)total;
+	cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8;
+	cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8;
+	total = cfqq->seek_total + (cfqq->seek_samples/2);
+	do_div(total, cfqq->seek_samples);
+	cfqq->seek_mean = (sector_t)total;
 }
 
 /*
@@ -1995,11 +1993,11 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
 
 	if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-	    (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
+	    (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq)))
 		enable_idle = 0;
 	else if (sample_valid(cic->ttime_samples)) {
 		unsigned int slice_idle = cfqd->cfq_slice_idle;
-		if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+		if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
 			slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
 		if (cic->ttime_mean > slice_idle)
 			enable_idle = 0;
@@ -2066,7 +2064,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 	 * if this request is as-good as one we would expect from the
 	 * current cfqq, let it preempt
 	 */
-	if (cfq_rq_close(cfqd, rq))
+	if (cfq_rq_close(cfqd, cfqq, rq))
 		return true;
 
 	return false;
@@ -2108,10 +2106,10 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqq->meta_pending++;
 
 	cfq_update_io_thinktime(cfqd, cic);
-	cfq_update_io_seektime(cfqd, cic, rq);
+	cfq_update_io_seektime(cfqd, cfqq, rq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
-	cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+	cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (cfqq == cfqd->active_queue) {
 		/*
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 4da4a75c3f1e..eb73632440f1 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -40,16 +40,11 @@ struct cfq_io_context {
 	struct io_context *ioc;
 
 	unsigned long last_end_request;
-	sector_t last_request_pos;
 
 	unsigned long ttime_total;
 	unsigned long ttime_samples;
 	unsigned long ttime_mean;
 
-	unsigned int seek_samples;
-	u64 seek_total;
-	sector_t seek_mean;
-
 	struct list_head queue_list;
 	struct hlist_node cic_list;
 
-- 
cgit v1.2.3


From a5f523bc0cdee2a163a034344ebf1163799b3c5d Mon Sep 17 00:00:00 2001
From: Tias Guns <tias@ulyssis.org>
Date: Sun, 25 Oct 2009 12:13:58 -0700
Subject: Input: add driver for Dynapro serial touchscreen

This is a driver for Dynapro serial touchscreen, which used to be
supported in Xorg. The driver needs updated inputattach utility to
initialize serial port and create proper serio device before the
driver will be bound to it.

Signed-off-by: Tias Guns <tias@ulyssis.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig   |  12 +++
 drivers/input/touchscreen/Makefile  |   1 +
 drivers/input/touchscreen/dynapro.c | 206 ++++++++++++++++++++++++++++++++++++
 include/linux/serio.h               |   1 +
 4 files changed, 220 insertions(+)
 create mode 100644 drivers/input/touchscreen/dynapro.c

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 8cc453c85ea7..1cd9e8c8efb3 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -111,6 +111,18 @@ config TOUCHSCREEN_DA9034
 	  Say Y here to enable the support for the touchscreen found
 	  on Dialog Semiconductor DA9034 PMIC.
 
+config TOUCHSCREEN_DYNAPRO
+	tristate "Dynapro serial touchscreen"
+	select SERIO
+	help
+	  Say Y here if you have a Dynapro serial touchscreen connected to
+	  your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called dynapro.
+
 config TOUCHSCREEN_EETI
 	tristate "EETI touchscreen panel support"
 	depends on I2C
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 15fa62cffc77..1f5cccd3a16a 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
 obj-$(CONFIG_TOUCHSCREEN_CORGI)		+= corgi_ts.o
+obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
 obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
diff --git a/drivers/input/touchscreen/dynapro.c b/drivers/input/touchscreen/dynapro.c
new file mode 100644
index 000000000000..455353908bdf
--- /dev/null
+++ b/drivers/input/touchscreen/dynapro.c
@@ -0,0 +1,206 @@
+/*
+ * Dynapro serial touchscreen driver
+ *
+ * Copyright (c) 2009 Tias Guns
+ * Based on the inexio driver (c) Vojtech Pavlik and Dan Streetman and
+ * Richard Lemon
+ *
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+/*
+ * 2009/09/19 Tias Guns <tias@ulyssis.org>
+ *   Copied inexio.c and edited for Dynapro protocol (from retired Xorg module)
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/serio.h>
+#include <linux/init.h>
+
+#define DRIVER_DESC	"Dynapro serial touchscreen driver"
+
+MODULE_AUTHOR("Tias Guns <tias@ulyssis.org>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+/*
+ * Definitions & global arrays.
+ */
+
+#define DYNAPRO_FORMAT_TOUCH_BIT 0x40
+#define DYNAPRO_FORMAT_LENGTH 3
+#define DYNAPRO_RESPONSE_BEGIN_BYTE 0x80
+
+#define DYNAPRO_MIN_XC 0
+#define DYNAPRO_MAX_XC 0x3ff
+#define DYNAPRO_MIN_YC 0
+#define DYNAPRO_MAX_YC 0x3ff
+
+#define DYNAPRO_GET_XC(data) (data[1] | ((data[0] & 0x38) << 4))
+#define DYNAPRO_GET_YC(data) (data[2] | ((data[0] & 0x07) << 7))
+#define DYNAPRO_GET_TOUCHED(data) (DYNAPRO_FORMAT_TOUCH_BIT & data[0])
+
+/*
+ * Per-touchscreen data.
+ */
+
+struct dynapro {
+	struct input_dev *dev;
+	struct serio *serio;
+	int idx;
+	unsigned char data[DYNAPRO_FORMAT_LENGTH];
+	char phys[32];
+};
+
+static void dynapro_process_data(struct dynapro *pdynapro)
+{
+	struct input_dev *dev = pdynapro->dev;
+
+	if (DYNAPRO_FORMAT_LENGTH == ++pdynapro->idx) {
+		input_report_abs(dev, ABS_X, DYNAPRO_GET_XC(pdynapro->data));
+		input_report_abs(dev, ABS_Y, DYNAPRO_GET_YC(pdynapro->data));
+		input_report_key(dev, BTN_TOUCH,
+				 DYNAPRO_GET_TOUCHED(pdynapro->data));
+		input_sync(dev);
+
+		pdynapro->idx = 0;
+	}
+}
+
+static irqreturn_t dynapro_interrupt(struct serio *serio,
+		unsigned char data, unsigned int flags)
+{
+	struct dynapro *pdynapro = serio_get_drvdata(serio);
+
+	pdynapro->data[pdynapro->idx] = data;
+
+	if (DYNAPRO_RESPONSE_BEGIN_BYTE & pdynapro->data[0])
+		dynapro_process_data(pdynapro);
+	else
+		dev_dbg(&serio->dev, "unknown/unsynchronized data: %x\n",
+			pdynapro->data[0]);
+
+	return IRQ_HANDLED;
+}
+
+static void dynapro_disconnect(struct serio *serio)
+{
+	struct dynapro *pdynapro = serio_get_drvdata(serio);
+
+	input_get_device(pdynapro->dev);
+	input_unregister_device(pdynapro->dev);
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_put_device(pdynapro->dev);
+	kfree(pdynapro);
+}
+
+/*
+ * dynapro_connect() is the routine that is called when someone adds a
+ * new serio device that supports dynapro protocol and registers it as
+ * an input device. This is usually accomplished using inputattach.
+ */
+
+static int dynapro_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct dynapro *pdynapro;
+	struct input_dev *input_dev;
+	int err;
+
+	pdynapro = kzalloc(sizeof(struct dynapro), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!pdynapro || !input_dev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+
+	pdynapro->serio = serio;
+	pdynapro->dev = input_dev;
+	snprintf(pdynapro->phys, sizeof(pdynapro->phys),
+		 "%s/input0", serio->phys);
+
+	input_dev->name = "Dynapro Serial TouchScreen";
+	input_dev->phys = pdynapro->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor = SERIO_DYNAPRO;
+	input_dev->id.product = 0;
+	input_dev->id.version = 0x0001;
+	input_dev->dev.parent = &serio->dev;
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+	input_set_abs_params(pdynapro->dev, ABS_X,
+			     DYNAPRO_MIN_XC, DYNAPRO_MAX_XC, 0, 0);
+	input_set_abs_params(pdynapro->dev, ABS_Y,
+			     DYNAPRO_MIN_YC, DYNAPRO_MAX_YC, 0, 0);
+
+	serio_set_drvdata(serio, pdynapro);
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto fail2;
+
+	err = input_register_device(pdynapro->dev);
+	if (err)
+		goto fail3;
+
+	return 0;
+
+ fail3:	serio_close(serio);
+ fail2:	serio_set_drvdata(serio, NULL);
+ fail1:	input_free_device(input_dev);
+	kfree(pdynapro);
+	return err;
+}
+
+/*
+ * The serio driver structure.
+ */
+
+static struct serio_device_id dynapro_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_DYNAPRO,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, dynapro_serio_ids);
+
+static struct serio_driver dynapro_drv = {
+	.driver		= {
+		.name	= "dynapro",
+	},
+	.description	= DRIVER_DESC,
+	.id_table	= dynapro_serio_ids,
+	.interrupt	= dynapro_interrupt,
+	.connect	= dynapro_connect,
+	.disconnect	= dynapro_disconnect,
+};
+
+/*
+ * The functions for inserting/removing us as a module.
+ */
+
+static int __init dynapro_init(void)
+{
+	return serio_register_driver(&dynapro_drv);
+}
+
+static void __exit dynapro_exit(void)
+{
+	serio_unregister_driver(&dynapro_drv);
+}
+
+module_init(dynapro_init);
+module_exit(dynapro_exit);
diff --git a/include/linux/serio.h b/include/linux/serio.h
index a640bc2afe76..e2f3044d4a4a 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -215,5 +215,6 @@ static inline void serio_unpin_driver(struct serio *serio)
 #define SERIO_INEXIO	0x37
 #define SERIO_TOUCHIT213	0x38
 #define SERIO_W8001	0x39
+#define SERIO_DYNAPRO	0x3a
 
 #endif
-- 
cgit v1.2.3


From 9b798d50df3a98d22a6cbae565d9f4f630d161a6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 27 Oct 2009 11:36:43 +0900
Subject: sh: intc: Make ack_regs generally available.

Currently this is ifdef'ed under SH-3 and SH-4A, but there are other CPUs
that will need this as well. Given the size of the existing data
structures, this doesn't cause any additional cacheline utilization for
the existing users, so has no direct impact on the data structures.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 14 +-------------
 include/linux/sh_intc.h |  4 ----
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 559b5fe9dc0f..94e6e46ff82c 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -70,9 +70,7 @@ static LIST_HEAD(intc_list);
 #endif
 
 static unsigned int intc_prio_level[NR_IRQS]; /* for now */
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static unsigned long ack_handle[NR_IRQS];
-#endif
 
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 {
@@ -250,7 +248,6 @@ static int intc_set_wake(unsigned int irq, unsigned int on)
 	return 0; /* allow wakeup, but setup hardware in intc_suspend() */
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static void intc_mask_ack(unsigned int irq)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
@@ -282,7 +279,6 @@ static void intc_mask_ack(unsigned int irq)
 		}
 	}
 }
-#endif
 
 static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
 					     unsigned int nr_hp,
@@ -501,7 +497,6 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 	return 0;
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static unsigned int __init intc_ack_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id)
@@ -533,7 +528,6 @@ static unsigned int __init intc_ack_data(struct intc_desc *desc,
 
 	return 0;
 }
-#endif
 
 static unsigned int __init intc_sense_data(struct intc_desc *desc,
 					   struct intc_desc_int *d,
@@ -641,10 +635,8 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	/* irq should be disabled by default */
 	d->chip.mask(irq);
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	if (desc->ack_regs)
 		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
-#endif
 }
 
 static unsigned int __init save_reg(struct intc_desc_int *d,
@@ -681,10 +673,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->nr_reg = desc->mask_regs ? desc->nr_mask_regs * 2 : 0;
 	d->nr_reg += desc->prio_regs ? desc->nr_prio_regs * 2 : 0;
 	d->nr_reg += desc->sense_regs ? desc->nr_sense_regs : 0;
-
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	d->nr_reg += desc->ack_regs ? desc->nr_ack_regs : 0;
-#endif
+
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
 #ifdef CONFIG_SMP
 	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
@@ -727,14 +717,12 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->chip.set_type = intc_set_sense;
 	d->chip.set_wake = intc_set_wake;
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	if (desc->ack_regs) {
 		for (i = 0; i < desc->nr_ack_regs; i++)
 			k += save_reg(d, k, desc->ack_regs[i].set_reg, 0);
 
 		d->chip.mask_ack = intc_mask_ack;
 	}
-#endif
 
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 68e212ff9dde..4e4b22d50164 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -57,10 +57,8 @@ struct intc_desc {
 	struct intc_sense_reg *sense_regs;
 	unsigned int nr_sense_regs;
 	char *name;
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	struct intc_mask_reg *ack_regs;
 	unsigned int nr_ack_regs;
-#endif
 };
 
 #define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
@@ -73,7 +71,6 @@ struct intc_desc symbol __initdata = {					\
 	chipname,							\
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 #define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
 	mask_regs, prio_regs, sense_regs, ack_regs)			\
 struct intc_desc symbol __initdata = {					\
@@ -83,7 +80,6 @@ struct intc_desc symbol __initdata = {					\
 	chipname,							\
 	_INTC_ARRAY(ack_regs),						\
 }
-#endif
 
 void __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
-- 
cgit v1.2.3


From 05423b241311c9380b7280179295bac7794281b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 26 Oct 2009 18:40:35 -0700
Subject: vlan: allow null VLAN ID to be used

We currently use a 16 bit field (vlan_tci) to store VLAN ID/PRIO on a skb.

Null value is used as a special value, meaning vlan tagging not enabled.
This forbids use of null vlan ID.

As pointed by David, some drivers use the 3 high order bits (PRIO)

As VLAN ID is 12 bits, we can use the remaining bit (CFI) as a flag, and
allow null VLAN ID.

In case future code really wants to use VLAN_CFI_MASK, we'll have to use
a bit outside of vlan_tci.

#define VLAN_PRIO_MASK         0xe000 /* Priority Code Point */
#define VLAN_PRIO_SHIFT        13
#define VLAN_CFI_MASK          0x1000 /* Canonical Format Indicator */
#define VLAN_TAG_PRESENT       VLAN_CFI_MASK
#define VLAN_VID_MASK          0x0fff /* VLAN Identifier */

Reported-by: Gertjan Hofman <gertjan_hofman@yahoo.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 14 +++++++++-----
 net/8021q/vlan.h        |  2 +-
 net/8021q/vlan_dev.c    |  2 +-
 net/core/dev.c          |  2 +-
 net/packet/af_packet.c  |  5 +++--
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 7ff9af1d0f05..8898cbebcf34 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -63,7 +63,11 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 	return (struct vlan_ethhdr *)skb_mac_header(skb);
 }
 
-#define VLAN_VID_MASK	0xfff
+#define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT		13
+#define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT	VLAN_CFI_MASK
+#define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
 
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
@@ -105,8 +109,8 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
-#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci)
-#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci)
+#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
+#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
@@ -231,7 +235,7 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
 						     u16 vlan_tci)
 {
-	skb->vlan_tci = vlan_tci;
+	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
 	return skb;
 }
 
@@ -284,7 +288,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 					 u16 *vlan_tci)
 {
 	if (vlan_tx_tag_present(skb)) {
-		*vlan_tci = skb->vlan_tci;
+		*vlan_tci = vlan_tx_tag_get(skb);
 		return 0;
 	} else {
 		*vlan_tci = 0;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 82570bc2a180..4ade5edf1033 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -89,7 +89,7 @@ static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 {
 	struct vlan_dev_info *vip = vlan_dev_info(dev);
 
-	return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7];
+	return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7];
 }
 
 #ifdef CONFIG_VLAN_8021Q_GVRP
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4198ec5c8abc..e3701977f58d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -393,7 +393,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
-	u32 vlan_qos = (vlan_prio << 13) & 0xE000;
+	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
 
 	/* See if a priority mapping exists.. */
 	mp = vlan->egress_priority_map[skb_prio & 0xF];
diff --git a/net/core/dev.c b/net/core/dev.c
index e7bada1d5ed9..950c13fa60d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2300,7 +2300,7 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
 
 	/* if we've gotten here through NAPI, check netpoll */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ff752c606413..33e68f20ec61 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -79,6 +79,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
+#include <linux/if_vlan.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -766,7 +767,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			getnstimeofday(&ts);
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
-		h.h2->tp_vlan_tci = skb->vlan_tci;
+		h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
 		hdrlen = sizeof(*h.h2);
 		break;
 	default:
@@ -1493,7 +1494,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
-		aux.tp_vlan_tci = skb->vlan_tci;
+		aux.tp_vlan_tci = vlan_tx_tag_get(skb);
 
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
-- 
cgit v1.2.3


From 2c28e2451dba2260e9f88811b29a7787db7e7616 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 26 Oct 2009 13:57:44 -0700
Subject: rcu: Fix TINY_RCU #elif condition

Some compilers are happy with "#elif CONFIG_RCU_TINY", while
others strongly prefer "#elif defined(CONFIG_RCU_TINY)".  Change
to the latter to make more compilers happy.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12565906642768-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6dd71fa48429..2f1bc42a3b82 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -77,7 +77,7 @@ extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
-#elif CONFIG_TINY_RCU
+#elif defined(CONFIG_TINY_RCU)
 #include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
-- 
cgit v1.2.3


From 8b45499ccb8a93cd68b1a8766786c2f8ea991ae2 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 9 Oct 2009 20:32:10 +0200
Subject: ssb: Put host pointers into a union

This slightly shrinks the structure.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_pcicore.c |  4 ++--
 include/linux/ssb/ssb.h      | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index 538c570df337..f1dcd7969a5c 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -551,13 +551,13 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 	might_sleep_if(pdev->id.coreid != SSB_DEV_PCI);
 
 	/* Enable interrupts for this device. */
-	if (bus->host_pci &&
-	    ((pdev->id.revision >= 6) || (pdev->id.coreid == SSB_DEV_PCIE))) {
+	if ((pdev->id.revision >= 6) || (pdev->id.coreid == SSB_DEV_PCIE)) {
 		u32 coremask;
 
 		/* Calculate the "coremask" for the device. */
 		coremask = (1 << dev->core_index);
 
+		SSB_WARN_ON(bus->bustype != SSB_BUSTYPE_PCI);
 		err = pci_read_config_dword(bus->host_pci, SSB_PCI_IRQMASK, &tmp);
 		if (err)
 			goto out;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 3d0a9ff24f01..24f988547361 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -269,7 +269,8 @@ struct ssb_bus {
 
 	const struct ssb_bus_ops *ops;
 
-	/* The core in the basic address register window. (PCI bus only) */
+	/* The core currently mapped into the MMIO window.
+	 * Not valid on all host-buses. So don't use outside of SSB. */
 	struct ssb_device *mapped_device;
 	union {
 		/* Currently mapped PCMCIA segment. (bustype == SSB_BUSTYPE_PCMCIA only) */
@@ -281,14 +282,17 @@ struct ssb_bus {
 	 * On PCMCIA-host busses this is used to protect the whole MMIO access. */
 	spinlock_t bar_lock;
 
-	/* The bus this backplane is running on. */
+	/* The host-bus this backplane is running on. */
 	enum ssb_bustype bustype;
-	/* Pointer to the PCI bus (only valid if bustype == SSB_BUSTYPE_PCI). */
-	struct pci_dev *host_pci;
-	/* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */
-	struct pcmcia_device *host_pcmcia;
-	/* Pointer to the SDIO device (only if bustype == SSB_BUSTYPE_SDIO). */
-	struct sdio_func *host_sdio;
+	/* Pointers to the host-bus. Check bustype before using any of these pointers. */
+	union {
+		/* Pointer to the PCI bus (only valid if bustype == SSB_BUSTYPE_PCI). */
+		struct pci_dev *host_pci;
+		/* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */
+		struct pcmcia_device *host_pcmcia;
+		/* Pointer to the SDIO device (only if bustype == SSB_BUSTYPE_SDIO). */
+		struct sdio_func *host_sdio;
+	};
 
 	/* See enum ssb_quirks */
 	unsigned int quirks;
-- 
cgit v1.2.3


From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Mon, 26 Oct 2009 09:26:18 -0400
Subject: tpm add default function definitions

Add default tpm_pcr_read/extend function definitions required
by IMA/Kconfig changes.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Reviewed-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/tpm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 3338b3f5c21a..8eaa8f83effb 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -31,5 +31,12 @@
 
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
+#else
+static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) {
+	return -ENODEV;
+}
+static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) {
+	return -ENODEV;
+}
 #endif
 #endif
-- 
cgit v1.2.3


From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 27 Oct 2009 14:41:35 -0400
Subject: nfs: new subdir Documentation/filesystems/nfs

We're adding enough nfs documentation that it may as well have its own
subdirectory.

Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 Documentation/filesystems/00-INDEX             |  10 +-
 Documentation/filesystems/Exporting            | 147 --------------
 Documentation/filesystems/nfs-rdma.txt         | 271 -------------------------
 Documentation/filesystems/nfs.txt              |  98 ---------
 Documentation/filesystems/nfs/00-INDEX         |  12 ++
 Documentation/filesystems/nfs/Exporting        | 147 ++++++++++++++
 Documentation/filesystems/nfs/nfs-rdma.txt     | 271 +++++++++++++++++++++++++
 Documentation/filesystems/nfs/nfs.txt          |  98 +++++++++
 Documentation/filesystems/nfs/nfs41-server.txt | 222 ++++++++++++++++++++
 Documentation/filesystems/nfs/nfsroot.txt      | 270 ++++++++++++++++++++++++
 Documentation/filesystems/nfs41-server.txt     | 222 --------------------
 Documentation/filesystems/nfsroot.txt          | 270 ------------------------
 Documentation/filesystems/porting              |   2 +-
 Documentation/kernel-parameters.txt            |   6 +-
 fs/cifs/export.c                               |   2 +-
 fs/exportfs/expfs.c                            |   2 +-
 fs/isofs/export.c                              |   2 +-
 fs/nfs/Kconfig                                 |   2 +-
 include/linux/exportfs.h                       |   2 +-
 net/ipv4/Kconfig                               |   6 +-
 net/ipv4/ipconfig.c                            |   2 +-
 21 files changed, 1035 insertions(+), 1029 deletions(-)
 delete mode 100644 Documentation/filesystems/Exporting
 delete mode 100644 Documentation/filesystems/nfs-rdma.txt
 delete mode 100644 Documentation/filesystems/nfs.txt
 create mode 100644 Documentation/filesystems/nfs/00-INDEX
 create mode 100644 Documentation/filesystems/nfs/Exporting
 create mode 100644 Documentation/filesystems/nfs/nfs-rdma.txt
 create mode 100644 Documentation/filesystems/nfs/nfs.txt
 create mode 100644 Documentation/filesystems/nfs/nfs41-server.txt
 create mode 100644 Documentation/filesystems/nfs/nfsroot.txt
 delete mode 100644 Documentation/filesystems/nfs41-server.txt
 delete mode 100644 Documentation/filesystems/nfsroot.txt

(limited to 'include/linux')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index f15621ee5599..482151c883a5 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -1,7 +1,5 @@
 00-INDEX
 	- this file (info on some of the filesystems supported by linux).
-Exporting
-	- explanation of how to make filesystems exportable.
 Locking
 	- info on locking rules as they pertain to Linux VFS.
 9p.txt
@@ -66,12 +64,8 @@ mandatory-locking.txt
 	- info on the Linux implementation of Sys V mandatory file locking.
 ncpfs.txt
 	- info on Novell Netware(tm) filesystem using NCP protocol.
-nfs41-server.txt
-	- info on the Linux server implementation of NFSv4 minor version 1.
-nfs-rdma.txt
-	- how to install and setup the Linux NFS/RDMA client and server software.
-nfsroot.txt
-	- short guide on setting up a diskless box with NFS root filesystem.
+nfs/
+	- nfs-related documentation.
 nilfs2.txt
 	- info and mount options for the NILFS2 filesystem.
 ntfs.txt
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting
deleted file mode 100644
index 87019d2b5981..000000000000
--- a/Documentation/filesystems/Exporting
+++ /dev/null
@@ -1,147 +0,0 @@
-
-Making Filesystems Exportable
-=============================
-
-Overview
---------
-
-All filesystem operations require a dentry (or two) as a starting
-point.  Local applications have a reference-counted hold on suitable
-dentries via open file descriptors or cwd/root.  However remote
-applications that access a filesystem via a remote filesystem protocol
-such as NFS may not be able to hold such a reference, and so need a
-different way to refer to a particular dentry.  As the alternative
-form of reference needs to be stable across renames, truncates, and
-server-reboot (among other things, though these tend to be the most
-problematic), there is no simple answer like 'filename'.
-
-The mechanism discussed here allows each filesystem implementation to
-specify how to generate an opaque (outside of the filesystem) byte
-string for any dentry, and how to find an appropriate dentry for any
-given opaque byte string.
-This byte string will be called a "filehandle fragment" as it
-corresponds to part of an NFS filehandle.
-
-A filesystem which supports the mapping between filehandle fragments
-and dentries will be termed "exportable".
-
-
-
-Dcache Issues
--------------
-
-The dcache normally contains a proper prefix of any given filesystem
-tree.  This means that if any filesystem object is in the dcache, then
-all of the ancestors of that filesystem object are also in the dcache.
-As normal access is by filename this prefix is created naturally and
-maintained easily (by each object maintaining a reference count on
-its parent).
-
-However when objects are included into the dcache by interpreting a
-filehandle fragment, there is no automatic creation of a path prefix
-for the object.  This leads to two related but distinct features of
-the dcache that are not needed for normal filesystem access.
-
-1/ The dcache must sometimes contain objects that are not part of the
-   proper prefix. i.e that are not connected to the root.
-2/ The dcache must be prepared for a newly found (via ->lookup) directory
-   to already have a (non-connected) dentry, and must be able to move
-   that dentry into place (based on the parent and name in the
-   ->lookup).   This is particularly needed for directories as
-   it is a dcache invariant that directories only have one dentry.
-
-To implement these features, the dcache has:
-
-a/ A dentry flag DCACHE_DISCONNECTED which is set on
-   any dentry that might not be part of the proper prefix.
-   This is set when anonymous dentries are created, and cleared when a
-   dentry is noticed to be a child of a dentry which is in the proper
-   prefix. 
-
-b/ A per-superblock list "s_anon" of dentries which are the roots of
-   subtrees that are not in the proper prefix.  These dentries, as
-   well as the proper prefix, need to be released at unmount time.  As
-   these dentries will not be hashed, they are linked together on the
-   d_hash list_head.
-
-c/ Helper routines to allocate anonymous dentries, and to help attach
-   loose directory dentries at lookup time. They are:
-    d_alloc_anon(inode) will return a dentry for the given inode.
-      If the inode already has a dentry, one of those is returned.
-      If it doesn't, a new anonymous (IS_ROOT and
-        DCACHE_DISCONNECTED) dentry is allocated and attached.
-      In the case of a directory, care is taken that only one dentry
-      can ever be attached.
-    d_splice_alias(inode, dentry) will make sure that there is a
-      dentry with the same name and parent as the given dentry, and
-      which refers to the given inode.
-      If the inode is a directory and already has a dentry, then that
-      dentry is d_moved over the given dentry.
-      If the passed dentry gets attached, care is taken that this is
-      mutually exclusive to a d_alloc_anon operation.
-      If the passed dentry is used, NULL is returned, else the used
-      dentry is returned.  This corresponds to the calling pattern of
-      ->lookup.
-  
- 
-Filesystem Issues
------------------
-
-For a filesystem to be exportable it must:
- 
-   1/ provide the filehandle fragment routines described below.
-   2/ make sure that d_splice_alias is used rather than d_add
-      when ->lookup finds an inode for a given parent and name.
-      Typically the ->lookup routine will end with a:
-
-		return d_splice_alias(inode, dentry);
-	}
-
-
-
-  A file system implementation declares that instances of the filesystem
-are exportable by setting the s_export_op field in the struct
-super_block.  This field must point to a "struct export_operations"
-struct which has the following members:
-
- encode_fh  (optional)
-    Takes a dentry and creates a filehandle fragment which can later be used
-    to find or create a dentry for the same object.  The default
-    implementation creates a filehandle fragment that encodes a 32bit inode
-    and generation number for the inode encoded, and if necessary the
-    same information for the parent.
-
-  fh_to_dentry (mandatory)
-    Given a filehandle fragment, this should find the implied object and
-    create a dentry for it (possibly with d_alloc_anon).
-
-  fh_to_parent (optional but strongly recommended)
-    Given a filehandle fragment, this should find the parent of the
-    implied object and create a dentry for it (possibly with d_alloc_anon).
-    May fail if the filehandle fragment is too small.
-
-  get_parent (optional but strongly recommended)
-    When given a dentry for a directory, this should return  a dentry for
-    the parent.  Quite possibly the parent dentry will have been allocated
-    by d_alloc_anon.  The default get_parent function just returns an error
-    so any filehandle lookup that requires finding a parent will fail.
-    ->lookup("..") is *not* used as a default as it can leave ".." entries
-    in the dcache which are too messy to work with.
-
-  get_name (optional)
-    When given a parent dentry and a child dentry, this should find a name
-    in the directory identified by the parent dentry, which leads to the
-    object identified by the child dentry.  If no get_name function is
-    supplied, a default implementation is provided which uses vfs_readdir
-    to find potential names, and matches inode numbers to find the correct
-    match.
-
-
-A filehandle fragment consists of an array of 1 or more 4byte words,
-together with a one byte "type".
-The decode_fh routine should not depend on the stated size that is
-passed to it.  This size may be larger than the original filehandle
-generated by encode_fh, in which case it will have been padded with
-nuls.  Rather, the encode_fh routine should choose a "type" which
-indicates the decode_fh how much of the filehandle is valid, and how
-it should be interpreted.
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
deleted file mode 100644
index e386f7e4bcee..000000000000
--- a/Documentation/filesystems/nfs-rdma.txt
+++ /dev/null
@@ -1,271 +0,0 @@
-################################################################################
-#									       #
-#				NFS/RDMA README				       #
-#									       #
-################################################################################
-
- Author: NetApp and Open Grid Computing
- Date: May 29, 2008
-
-Table of Contents
-~~~~~~~~~~~~~~~~~
- - Overview
- - Getting Help
- - Installation
- - Check RDMA and NFS Setup
- - NFS/RDMA Setup
-
-Overview
-~~~~~~~~
-
-  This document describes how to install and setup the Linux NFS/RDMA client
-  and server software.
-
-  The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
-  was first included in the following release, Linux 2.6.25.
-
-  In our testing, we have obtained excellent performance results (full 10Gbit
-  wire bandwidth at minimal client CPU) under many workloads. The code passes
-  the full Connectathon test suite and operates over both Infiniband and iWARP
-  RDMA adapters.
-
-Getting Help
-~~~~~~~~~~~~
-
-  If you get stuck, you can ask questions on the
-
-                nfs-rdma-devel@lists.sourceforge.net
-
-  mailing list.
-
-Installation
-~~~~~~~~~~~~
-
-  These instructions are a step by step guide to building a machine for
-  use with NFS/RDMA.
-
-  - Install an RDMA device
-
-    Any device supported by the drivers in drivers/infiniband/hw is acceptable.
-
-    Testing has been performed using several Mellanox-based IB cards, the
-    Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
-
-  - Install a Linux distribution and tools
-
-    The first kernel release to contain both the NFS/RDMA client and server was
-    Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
-    Linux kernel release should be installed.
-
-    The procedures described in this document have been tested with
-    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
-
-  - Install nfs-utils-1.1.2 or greater on the client
-
-    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
-    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
-    version with support for NFS/RDMA mounts, but for various reasons we
-    recommend using nfs-utils-1.1.2 or greater). To see which version of
-    mount.nfs you are using, type:
-
-    $ /sbin/mount.nfs -V
-
-    If the version is less than 1.1.2 or the command does not exist,
-    you should install the latest version of nfs-utils.
-
-    Download the latest package from:
-
-    http://www.kernel.org/pub/linux/utils/nfs
-
-    Uncompress the package and follow the installation instructions.
-
-    If you will not need the idmapper and gssd executables (you do not need
-    these to create an NFS/RDMA enabled mount command), the installation
-    process can be simplified by disabling these features when running
-    configure:
-
-    $ ./configure --disable-gss --disable-nfsv4
-
-    To build nfs-utils you will need the tcp_wrappers package installed. For
-    more information on this see the package's README and INSTALL files.
-
-    After building the nfs-utils package, there will be a mount.nfs binary in
-    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
-    or v4 mounts. To initiate a v4 mount, the binary must be called
-    mount.nfs4.  The standard technique is to create a symlink called
-    mount.nfs4 to mount.nfs.
-
-    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
-
-    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
-
-    In this location, mount.nfs will be invoked automatically for NFS mounts
-    by the system mount command.
-
-    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
-    on the NFS client machine. You do not need this specific version of
-    nfs-utils on the server. Furthermore, only the mount.nfs command from
-    nfs-utils-1.1.2 is needed on the client.
-
-  - Install a Linux kernel with NFS/RDMA
-
-    The NFS/RDMA client and server are both included in the mainline Linux
-    kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
-    kernel can be found at:
-
-    ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
-
-    Download the sources and place them in an appropriate location.
-
-  - Configure the RDMA stack
-
-    Make sure your kernel configuration has RDMA support enabled. Under
-    Device Drivers -> InfiniBand support, update the kernel configuration
-    to enable InfiniBand support [NOTE: the option name is misleading. Enabling
-    InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
-
-    Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
-    iWARP adapter support (amso, cxgb3, etc.).
-
-    If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
-
-  - Configure the NFS client and server
-
-    Your kernel configuration must also have NFS file system support and/or
-    NFS server support enabled. These and other NFS related configuration
-    options can be found under File Systems -> Network File Systems.
-
-  - Build, install, reboot
-
-    The NFS/RDMA code will be enabled automatically if NFS and RDMA
-    are turned on. The NFS/RDMA client and server are configured via the hidden
-    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
-    value of SUNRPC_XPRT_RDMA will be:
-
-     - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
-       and server will not be built
-     - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
-       in this case the NFS/RDMA client and server will be built as modules
-     - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
-       and server will be built into the kernel
-
-    Therefore, if you have followed the steps above and turned no NFS and RDMA,
-    the NFS/RDMA client and server will be built.
-
-    Build a new kernel, install it, boot it.
-
-Check RDMA and NFS Setup
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-    Before configuring the NFS/RDMA software, it is a good idea to test
-    your new kernel to ensure that the kernel is working correctly.
-    In particular, it is a good idea to verify that the RDMA stack
-    is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
-    is working properly.
-
-  - Check RDMA Setup
-
-    If you built the RDMA components as modules, load them at
-    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
-    card:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-
-    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
-    running on the network. If your IB switch has an embedded SM, you can
-    use it. Otherwise, you will need to run an SM, such as OpenSM, on one
-    of your end nodes.
-
-    If an SM is running on your network, you should see the following:
-
-    $ cat /sys/class/infiniband/driverX/ports/1/state
-    4: ACTIVE
-
-    where driverX is mthca0, ipath5, ehca3, etc.
-
-    To further test the InfiniBand software stack, use IPoIB (this
-    assumes you have two IB hosts named host1 and host2):
-
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
-    host1$ ping a.b.c.y
-    host2$ ping a.b.c.x
-
-    For other device types, follow the appropriate procedures.
-
-  - Check NFS Setup
-
-    For the NFS components enabled above (client and/or server),
-    test their functionality over standard Ethernet using TCP/IP or UDP/IP.
-
-NFS/RDMA Setup
-~~~~~~~~~~~~~~
-
-  We recommend that you use two machines, one to act as the client and
-  one to act as the server.
-
-  One time configuration:
-
-  - On the server system, configure the /etc/exports file and
-    start the NFS/RDMA server.
-
-    Exports entries with the following formats have been tested:
-
-    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
-    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
-
-    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
-    HCA or the cleint's iWARP address(es) for an RNIC.
-
-    NOTE: The "insecure" option must be used because the NFS/RDMA client does
-    not use a reserved port.
-
- Each time a machine boots:
-
-  - Load and configure the RDMA drivers
-
-    For InfiniBand using a Mellanox adapter:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
-
-    NOTE: use unique addresses for the client and server
-
-  - Start the NFS server
-
-    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA transport module:
-
-    $ modprobe svcrdma
-
-    Regardless of how the server was built (module or built-in), start the
-    server:
-
-    $ /etc/init.d/nfs start
-
-    or
-
-    $ service nfs start
-
-    Instruct the server to listen on the RDMA transport:
-
-    $ echo rdma 20049 > /proc/fs/nfsd/portlist
-
-  - On the client system
-
-    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA client module:
-
-    $ modprobe xprtrdma.ko
-
-    Regardless of how the client was built (module or built-in), use this
-    command to mount the NFS/RDMA server:
-
-    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
-
-    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
-    the "proto" field for the given mount.
-
-  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs.txt
deleted file mode 100644
index f50f26ce6cd0..000000000000
--- a/Documentation/filesystems/nfs.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-
-The NFS client
-==============
-
-The NFS version 2 protocol was first documented in RFC1094 (March 1989).
-Since then two more major releases of NFS have been published, with NFSv3
-being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
-2003).
-
-The Linux NFS client currently supports all the above published versions,
-and work is in progress on adding support for minor version 1 of the NFSv4
-protocol.
-
-The purpose of this document is to provide information on some of the
-upcall interfaces that are used in order to provide the NFS client with
-some of the information that it requires in order to fully comply with
-the NFS spec.
-
-The DNS resolver
-================
-
-NFSv4 allows for one server to refer the NFS client to data that has been
-migrated onto another server by means of the special "fs_locations"
-attribute. See
-	http://tools.ietf.org/html/rfc3530#section-6
-and
-	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
-
-The fs_locations information can take the form of either an ip address and
-a path, or a DNS hostname and a path. The latter requires the NFS client to
-do a DNS lookup in order to mount the new volume, and hence the need for an
-upcall to allow userland to provide this service.
-
-Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
-/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
-
-   (1) The process checks the dns_resolve cache to see if it contains a
-       valid entry. If so, it returns that entry and exits.
-
-   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
-       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
-       is run, with two arguments:
-		- the cache name, "dns_resolve"
-		- the hostname to resolve
-
-   (3) After looking up the corresponding ip address, the helper script
-       writes the result into the rpc_pipefs pseudo-file
-       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
-       in the following (text) format:
-
-		"<ip address> <hostname> <ttl>\n"
-
-       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
-       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
-       <hostname> is identical to the second argument of the helper
-       script, and <ttl> is the 'time to live' of this cache entry (in
-       units of seconds).
-
-       Note: If <ip address> is invalid, say the string "0", then a negative
-       entry is created, which will cause the kernel to treat the hostname
-       as having no valid DNS translation.
-
-
-
-
-A basic sample /sbin/nfs_cache_getent
-=====================================
-
-#!/bin/bash
-#
-ttl=600
-#
-cut=/usr/bin/cut
-getent=/usr/bin/getent
-rpc_pipefs=/var/lib/nfs/rpc_pipefs
-#
-die()
-{
-	echo "Usage: $0 cache_name entry_name"
-	exit 1
-}
-
-[ $# -lt 2 ] && die
-cachename="$1"
-cache_path=${rpc_pipefs}/cache/${cachename}/channel
-
-case "${cachename}" in
-	dns_resolve)
-		name="$2"
-		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
-		[ -z "${result}" ] && result="0"
-		;;
-	*)
-		die
-		;;
-esac
-echo "${result} ${name} ${ttl}" >${cache_path}
-
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
new file mode 100644
index 000000000000..6ff3d212027b
--- /dev/null
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -0,0 +1,12 @@
+00-INDEX
+	- this file (nfs-related documentation).
+Exporting
+	- explanation of how to make filesystems exportable.
+nfs.txt
+	- nfs client, and DNS resolution for fs_locations.
+nfs41-server.txt
+	- info on the Linux server implementation of NFSv4 minor version 1.
+nfs-rdma.txt
+	- how to install and setup the Linux NFS/RDMA client and server software
+nfsroot.txt
+	- short guide on setting up a diskless box with NFS root filesystem.
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
new file mode 100644
index 000000000000..87019d2b5981
--- /dev/null
+++ b/Documentation/filesystems/nfs/Exporting
@@ -0,0 +1,147 @@
+
+Making Filesystems Exportable
+=============================
+
+Overview
+--------
+
+All filesystem operations require a dentry (or two) as a starting
+point.  Local applications have a reference-counted hold on suitable
+dentries via open file descriptors or cwd/root.  However remote
+applications that access a filesystem via a remote filesystem protocol
+such as NFS may not be able to hold such a reference, and so need a
+different way to refer to a particular dentry.  As the alternative
+form of reference needs to be stable across renames, truncates, and
+server-reboot (among other things, though these tend to be the most
+problematic), there is no simple answer like 'filename'.
+
+The mechanism discussed here allows each filesystem implementation to
+specify how to generate an opaque (outside of the filesystem) byte
+string for any dentry, and how to find an appropriate dentry for any
+given opaque byte string.
+This byte string will be called a "filehandle fragment" as it
+corresponds to part of an NFS filehandle.
+
+A filesystem which supports the mapping between filehandle fragments
+and dentries will be termed "exportable".
+
+
+
+Dcache Issues
+-------------
+
+The dcache normally contains a proper prefix of any given filesystem
+tree.  This means that if any filesystem object is in the dcache, then
+all of the ancestors of that filesystem object are also in the dcache.
+As normal access is by filename this prefix is created naturally and
+maintained easily (by each object maintaining a reference count on
+its parent).
+
+However when objects are included into the dcache by interpreting a
+filehandle fragment, there is no automatic creation of a path prefix
+for the object.  This leads to two related but distinct features of
+the dcache that are not needed for normal filesystem access.
+
+1/ The dcache must sometimes contain objects that are not part of the
+   proper prefix. i.e that are not connected to the root.
+2/ The dcache must be prepared for a newly found (via ->lookup) directory
+   to already have a (non-connected) dentry, and must be able to move
+   that dentry into place (based on the parent and name in the
+   ->lookup).   This is particularly needed for directories as
+   it is a dcache invariant that directories only have one dentry.
+
+To implement these features, the dcache has:
+
+a/ A dentry flag DCACHE_DISCONNECTED which is set on
+   any dentry that might not be part of the proper prefix.
+   This is set when anonymous dentries are created, and cleared when a
+   dentry is noticed to be a child of a dentry which is in the proper
+   prefix. 
+
+b/ A per-superblock list "s_anon" of dentries which are the roots of
+   subtrees that are not in the proper prefix.  These dentries, as
+   well as the proper prefix, need to be released at unmount time.  As
+   these dentries will not be hashed, they are linked together on the
+   d_hash list_head.
+
+c/ Helper routines to allocate anonymous dentries, and to help attach
+   loose directory dentries at lookup time. They are:
+    d_alloc_anon(inode) will return a dentry for the given inode.
+      If the inode already has a dentry, one of those is returned.
+      If it doesn't, a new anonymous (IS_ROOT and
+        DCACHE_DISCONNECTED) dentry is allocated and attached.
+      In the case of a directory, care is taken that only one dentry
+      can ever be attached.
+    d_splice_alias(inode, dentry) will make sure that there is a
+      dentry with the same name and parent as the given dentry, and
+      which refers to the given inode.
+      If the inode is a directory and already has a dentry, then that
+      dentry is d_moved over the given dentry.
+      If the passed dentry gets attached, care is taken that this is
+      mutually exclusive to a d_alloc_anon operation.
+      If the passed dentry is used, NULL is returned, else the used
+      dentry is returned.  This corresponds to the calling pattern of
+      ->lookup.
+  
+ 
+Filesystem Issues
+-----------------
+
+For a filesystem to be exportable it must:
+ 
+   1/ provide the filehandle fragment routines described below.
+   2/ make sure that d_splice_alias is used rather than d_add
+      when ->lookup finds an inode for a given parent and name.
+      Typically the ->lookup routine will end with a:
+
+		return d_splice_alias(inode, dentry);
+	}
+
+
+
+  A file system implementation declares that instances of the filesystem
+are exportable by setting the s_export_op field in the struct
+super_block.  This field must point to a "struct export_operations"
+struct which has the following members:
+
+ encode_fh  (optional)
+    Takes a dentry and creates a filehandle fragment which can later be used
+    to find or create a dentry for the same object.  The default
+    implementation creates a filehandle fragment that encodes a 32bit inode
+    and generation number for the inode encoded, and if necessary the
+    same information for the parent.
+
+  fh_to_dentry (mandatory)
+    Given a filehandle fragment, this should find the implied object and
+    create a dentry for it (possibly with d_alloc_anon).
+
+  fh_to_parent (optional but strongly recommended)
+    Given a filehandle fragment, this should find the parent of the
+    implied object and create a dentry for it (possibly with d_alloc_anon).
+    May fail if the filehandle fragment is too small.
+
+  get_parent (optional but strongly recommended)
+    When given a dentry for a directory, this should return  a dentry for
+    the parent.  Quite possibly the parent dentry will have been allocated
+    by d_alloc_anon.  The default get_parent function just returns an error
+    so any filehandle lookup that requires finding a parent will fail.
+    ->lookup("..") is *not* used as a default as it can leave ".." entries
+    in the dcache which are too messy to work with.
+
+  get_name (optional)
+    When given a parent dentry and a child dentry, this should find a name
+    in the directory identified by the parent dentry, which leads to the
+    object identified by the child dentry.  If no get_name function is
+    supplied, a default implementation is provided which uses vfs_readdir
+    to find potential names, and matches inode numbers to find the correct
+    match.
+
+
+A filehandle fragment consists of an array of 1 or more 4byte words,
+together with a one byte "type".
+The decode_fh routine should not depend on the stated size that is
+passed to it.  This size may be larger than the original filehandle
+generated by encode_fh, in which case it will have been padded with
+nuls.  Rather, the encode_fh routine should choose a "type" which
+indicates the decode_fh how much of the filehandle is valid, and how
+it should be interpreted.
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
new file mode 100644
index 000000000000..e386f7e4bcee
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -0,0 +1,271 @@
+################################################################################
+#									       #
+#				NFS/RDMA README				       #
+#									       #
+################################################################################
+
+ Author: NetApp and Open Grid Computing
+ Date: May 29, 2008
+
+Table of Contents
+~~~~~~~~~~~~~~~~~
+ - Overview
+ - Getting Help
+ - Installation
+ - Check RDMA and NFS Setup
+ - NFS/RDMA Setup
+
+Overview
+~~~~~~~~
+
+  This document describes how to install and setup the Linux NFS/RDMA client
+  and server software.
+
+  The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+  was first included in the following release, Linux 2.6.25.
+
+  In our testing, we have obtained excellent performance results (full 10Gbit
+  wire bandwidth at minimal client CPU) under many workloads. The code passes
+  the full Connectathon test suite and operates over both Infiniband and iWARP
+  RDMA adapters.
+
+Getting Help
+~~~~~~~~~~~~
+
+  If you get stuck, you can ask questions on the
+
+                nfs-rdma-devel@lists.sourceforge.net
+
+  mailing list.
+
+Installation
+~~~~~~~~~~~~
+
+  These instructions are a step by step guide to building a machine for
+  use with NFS/RDMA.
+
+  - Install an RDMA device
+
+    Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+    Testing has been performed using several Mellanox-based IB cards, the
+    Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+  - Install a Linux distribution and tools
+
+    The first kernel release to contain both the NFS/RDMA client and server was
+    Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
+    Linux kernel release should be installed.
+
+    The procedures described in this document have been tested with
+    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+  - Install nfs-utils-1.1.2 or greater on the client
+
+    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+    version with support for NFS/RDMA mounts, but for various reasons we
+    recommend using nfs-utils-1.1.2 or greater). To see which version of
+    mount.nfs you are using, type:
+
+    $ /sbin/mount.nfs -V
+
+    If the version is less than 1.1.2 or the command does not exist,
+    you should install the latest version of nfs-utils.
+
+    Download the latest package from:
+
+    http://www.kernel.org/pub/linux/utils/nfs
+
+    Uncompress the package and follow the installation instructions.
+
+    If you will not need the idmapper and gssd executables (you do not need
+    these to create an NFS/RDMA enabled mount command), the installation
+    process can be simplified by disabling these features when running
+    configure:
+
+    $ ./configure --disable-gss --disable-nfsv4
+
+    To build nfs-utils you will need the tcp_wrappers package installed. For
+    more information on this see the package's README and INSTALL files.
+
+    After building the nfs-utils package, there will be a mount.nfs binary in
+    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+    or v4 mounts. To initiate a v4 mount, the binary must be called
+    mount.nfs4.  The standard technique is to create a symlink called
+    mount.nfs4 to mount.nfs.
+
+    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+    In this location, mount.nfs will be invoked automatically for NFS mounts
+    by the system mount command.
+
+    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
+    on the NFS client machine. You do not need this specific version of
+    nfs-utils on the server. Furthermore, only the mount.nfs command from
+    nfs-utils-1.1.2 is needed on the client.
+
+  - Install a Linux kernel with NFS/RDMA
+
+    The NFS/RDMA client and server are both included in the mainline Linux
+    kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
+    kernel can be found at:
+
+    ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+
+    Download the sources and place them in an appropriate location.
+
+  - Configure the RDMA stack
+
+    Make sure your kernel configuration has RDMA support enabled. Under
+    Device Drivers -> InfiniBand support, update the kernel configuration
+    to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+    InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+    Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+    iWARP adapter support (amso, cxgb3, etc.).
+
+    If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+  - Configure the NFS client and server
+
+    Your kernel configuration must also have NFS file system support and/or
+    NFS server support enabled. These and other NFS related configuration
+    options can be found under File Systems -> Network File Systems.
+
+  - Build, install, reboot
+
+    The NFS/RDMA code will be enabled automatically if NFS and RDMA
+    are turned on. The NFS/RDMA client and server are configured via the hidden
+    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+    value of SUNRPC_XPRT_RDMA will be:
+
+     - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+       and server will not be built
+     - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+       in this case the NFS/RDMA client and server will be built as modules
+     - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+       and server will be built into the kernel
+
+    Therefore, if you have followed the steps above and turned no NFS and RDMA,
+    the NFS/RDMA client and server will be built.
+
+    Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Before configuring the NFS/RDMA software, it is a good idea to test
+    your new kernel to ensure that the kernel is working correctly.
+    In particular, it is a good idea to verify that the RDMA stack
+    is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+    is working properly.
+
+  - Check RDMA Setup
+
+    If you built the RDMA components as modules, load them at
+    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+    card:
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+
+    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+    running on the network. If your IB switch has an embedded SM, you can
+    use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+    of your end nodes.
+
+    If an SM is running on your network, you should see the following:
+
+    $ cat /sys/class/infiniband/driverX/ports/1/state
+    4: ACTIVE
+
+    where driverX is mthca0, ipath5, ehca3, etc.
+
+    To further test the InfiniBand software stack, use IPoIB (this
+    assumes you have two IB hosts named host1 and host2):
+
+    host1$ ifconfig ib0 a.b.c.x
+    host2$ ifconfig ib0 a.b.c.y
+    host1$ ping a.b.c.y
+    host2$ ping a.b.c.x
+
+    For other device types, follow the appropriate procedures.
+
+  - Check NFS Setup
+
+    For the NFS components enabled above (client and/or server),
+    test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+~~~~~~~~~~~~~~
+
+  We recommend that you use two machines, one to act as the client and
+  one to act as the server.
+
+  One time configuration:
+
+  - On the server system, configure the /etc/exports file and
+    start the NFS/RDMA server.
+
+    Exports entries with the following formats have been tested:
+
+    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
+
+    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+    HCA or the cleint's iWARP address(es) for an RNIC.
+
+    NOTE: The "insecure" option must be used because the NFS/RDMA client does
+    not use a reserved port.
+
+ Each time a machine boots:
+
+  - Load and configure the RDMA drivers
+
+    For InfiniBand using a Mellanox adapter:
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+    $ ifconfig ib0 a.b.c.d
+
+    NOTE: use unique addresses for the client and server
+
+  - Start the NFS server
+
+    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA transport module:
+
+    $ modprobe svcrdma
+
+    Regardless of how the server was built (module or built-in), start the
+    server:
+
+    $ /etc/init.d/nfs start
+
+    or
+
+    $ service nfs start
+
+    Instruct the server to listen on the RDMA transport:
+
+    $ echo rdma 20049 > /proc/fs/nfsd/portlist
+
+  - On the client system
+
+    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA client module:
+
+    $ modprobe xprtrdma.ko
+
+    Regardless of how the client was built (module or built-in), use this
+    command to mount the NFS/RDMA server:
+
+    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
+
+    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+    the "proto" field for the given mount.
+
+  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
new file mode 100644
index 000000000000..f50f26ce6cd0
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs.txt
@@ -0,0 +1,98 @@
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+upcall interfaces that are used in order to provide the NFS client with
+some of the information that it requires in order to fully comply with
+the NFS spec.
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See
+	http://tools.ietf.org/html/rfc3530#section-6
+and
+	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+   (1) The process checks the dns_resolve cache to see if it contains a
+       valid entry. If so, it returns that entry and exits.
+
+   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+       is run, with two arguments:
+		- the cache name, "dns_resolve"
+		- the hostname to resolve
+
+   (3) After looking up the corresponding ip address, the helper script
+       writes the result into the rpc_pipefs pseudo-file
+       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+       in the following (text) format:
+
+		"<ip address> <hostname> <ttl>\n"
+
+       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+       <hostname> is identical to the second argument of the helper
+       script, and <ttl> is the 'time to live' of this cache entry (in
+       units of seconds).
+
+       Note: If <ip address> is invalid, say the string "0", then a negative
+       entry is created, which will cause the kernel to treat the hostname
+       as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+
+#!/bin/bash
+#
+ttl=600
+#
+cut=/usr/bin/cut
+getent=/usr/bin/getent
+rpc_pipefs=/var/lib/nfs/rpc_pipefs
+#
+die()
+{
+	echo "Usage: $0 cache_name entry_name"
+	exit 1
+}
+
+[ $# -lt 2 ] && die
+cachename="$1"
+cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+case "${cachename}" in
+	dns_resolve)
+		name="$2"
+		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+		[ -z "${result}" ] && result="0"
+		;;
+	*)
+		die
+		;;
+esac
+echo "${result} ${name} ${ttl}" >${cache_path}
+
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
new file mode 100644
index 000000000000..1bd0d0c05171
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -0,0 +1,222 @@
+NFSv4.1 Server Implementation
+
+Server support for minorversion 1 can be controlled using the
+/proc/fs/nfsd/versions control file.  The string output returned
+by reading this file will contain either "+4.1" or "-4.1"
+correspondingly.
+
+Currently, server support for minorversion 1 is disabled by default.
+It can be enabled at run time by writing the string "+4.1" to
+the /proc/fs/nfsd/versions control file.  Note that to write this
+control file, the nfsd service must be taken down.  Use your user-mode
+nfs-utils to set this up; see rpc.nfsd(8)
+
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively.  Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
+The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
+on the latest NFSv4.1 Internet Draft:
+http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
+
+From the many new features in NFSv4.1 the current implementation
+focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
+"exactly once" semantics and better control and throttling of the
+resources allocated for each client.
+
+Other NFSv4.1 features, Parallel NFS operations in particular,
+are still under development out of tree.
+See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
+for more information.
+
+The current implementation is intended for developers only: while it
+does support ordinary file operations on clients we have tested against
+(including the linux client), it is incomplete in ways which may limit
+features unexpectedly, cause known bugs in rare cases, or cause
+interoperability problems with future clients.  Known issues:
+
+	- gss support is questionable: currently mounts with kerberos
+	  from a linux client are possible, but we aren't really
+	  conformant with the spec (for example, we don't use kerberos
+	  on the backchannel correctly).
+	- no trunking support: no clients currently take advantage of
+	  trunking, but this is a mandatory feature, and its use is
+	  recommended to clients in a number of places.  (E.g. to ensure
+	  timely renewal in case an existing connection's retry timeouts
+	  have gotten too long; see section 8.3 of the draft.)
+	  Therefore, lack of this feature may cause future clients to
+	  fail.
+	- Incomplete backchannel support: incomplete backchannel gss
+	  support and no support for BACKCHANNEL_CTL mean that
+	  callbacks (hence delegations and layouts) may not be
+	  available and clients confused by the incomplete
+	  implementation may fail.
+	- Server reboot recovery is unsupported; if the server reboots,
+	  clients may fail.
+	- We do not support SSV, which provides security for shared
+	  client-server state (thus preventing unauthorized tampering
+	  with locks and opens, for example).  It is mandatory for
+	  servers to support this, though no clients use it yet.
+	- Mandatory operations which we do not support, such as
+	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
+	  TEST_STATEID, are not currently used by clients, but will be
+	  (and the spec recommends their uses in common cases), and
+	  clients should not be expected to know how to recover from the
+	  case where they are not supported.  This will eventually cause
+	  interoperability failures.
+
+In addition, some limitations are inherited from the current NFSv4
+implementation:
+
+	- Incomplete delegation enforcement: if a file is renamed or
+	  unlinked, a client holding a delegation may continue to
+	  indefinitely allow opens of the file under the old name.
+
+The table below, taken from the NFSv4.1 document, lists
+the operations that are mandatory to implement (REQ), optional
+(OPT), and NFSv4.0 operations that are required not to implement (MNI)
+in minor version 1.  The first column indicates the operations that
+are not supported yet by the linux server implementation.
+
+The OPTIONAL features identified and their abbreviations are as follows:
+	pNFS	Parallel NFS
+	FDELG	File Delegations
+	DDELG	Directory Delegations
+
+The following abbreviations indicate the linux server implementation status.
+	I	Implemented NFSv4.1 operations.
+	NS	Not Supported.
+	NS*	unimplemented optional feature.
+	P	pNFS features implemented out of tree.
+	PNS	pNFS features that are not supported yet (out of tree).
+
+Operations
+
+   +----------------------+------------+--------------+----------------+
+   | Operation            | REQ, REC,  | Feature      | Definition     |
+   |                      | OPT, or    | (REQ, REC,   |                |
+   |                      | MNI        | or OPT)      |                |
+   +----------------------+------------+--------------+----------------+
+   | ACCESS               | REQ        |              | Section 18.1   |
+NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
+NS | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
+   | CLOSE                | REQ        |              | Section 18.2   |
+   | COMMIT               | REQ        |              | Section 18.3   |
+   | CREATE               | REQ        |              | Section 18.4   |
+I  | CREATE_SESSION       | REQ        |              | Section 18.36  |
+NS*| DELEGPURGE           | OPT        | FDELG (REQ)  | Section 18.5   |
+   | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
+   |                      |            | DDELG, pNFS  |                |
+   |                      |            | (REQ)        |                |
+NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
+I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
+I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
+NS | FREE_STATEID         | REQ        |              | Section 18.38  |
+   | GETATTR              | REQ        |              | Section 18.7   |
+P  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
+P  | GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
+   | GETFH                | REQ        |              | Section 18.8   |
+NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
+P  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
+P  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
+P  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
+   | LINK                 | OPT        |              | Section 18.9   |
+   | LOCK                 | REQ        |              | Section 18.10  |
+   | LOCKT                | REQ        |              | Section 18.11  |
+   | LOCKU                | REQ        |              | Section 18.12  |
+   | LOOKUP               | REQ        |              | Section 18.13  |
+   | LOOKUPP              | REQ        |              | Section 18.14  |
+   | NVERIFY              | REQ        |              | Section 18.15  |
+   | OPEN                 | REQ        |              | Section 18.16  |
+NS*| OPENATTR             | OPT        |              | Section 18.17  |
+   | OPEN_CONFIRM         | MNI        |              | N/A            |
+   | OPEN_DOWNGRADE       | REQ        |              | Section 18.18  |
+   | PUTFH                | REQ        |              | Section 18.19  |
+   | PUTPUBFH             | REQ        |              | Section 18.20  |
+   | PUTROOTFH            | REQ        |              | Section 18.21  |
+   | READ                 | REQ        |              | Section 18.22  |
+   | READDIR              | REQ        |              | Section 18.23  |
+   | READLINK             | OPT        |              | Section 18.24  |
+NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+   | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
+   | REMOVE               | REQ        |              | Section 18.25  |
+   | RENAME               | REQ        |              | Section 18.26  |
+   | RENEW                | MNI        |              | N/A            |
+   | RESTOREFH            | REQ        |              | Section 18.27  |
+   | SAVEFH               | REQ        |              | Section 18.28  |
+   | SECINFO              | REQ        |              | Section 18.29  |
+NS | SECINFO_NO_NAME      | REC        | pNFS files   | Section 18.45, |
+   |                      |            | layout (REQ) | Section 13.12  |
+I  | SEQUENCE             | REQ        |              | Section 18.46  |
+   | SETATTR              | REQ        |              | Section 18.30  |
+   | SETCLIENTID          | MNI        |              | N/A            |
+   | SETCLIENTID_CONFIRM  | MNI        |              | N/A            |
+NS | SET_SSV              | REQ        |              | Section 18.47  |
+NS | TEST_STATEID         | REQ        |              | Section 18.48  |
+   | VERIFY               | REQ        |              | Section 18.31  |
+NS*| WANT_DELEGATION      | OPT        | FDELG (OPT)  | Section 18.49  |
+   | WRITE                | REQ        |              | Section 18.32  |
+
+Callback Operations
+
+   +-------------------------+-----------+-------------+---------------+
+   | Operation               | REQ, REC, | Feature     | Definition    |
+   |                         | OPT, or   | (REQ, REC,  |               |
+   |                         | MNI       | or OPT)     |               |
+   +-------------------------+-----------+-------------+---------------+
+   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
+P  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
+NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
+P  | CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
+NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
+NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
+   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS*| CB_RECALL_ANY           | OPT       | FDELG,      | Section 20.6  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS | CB_RECALL_SLOT          | REQ       |             | Section 20.8  |
+NS*| CB_RECALLABLE_OBJ_AVAIL | OPT       | DDELG, pNFS | Section 20.7  |
+   |                         |           | (REQ)       |               |
+I  | CB_SEQUENCE             | OPT       | FDELG,      | Section 20.9  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+   +-------------------------+-----------+-------------+---------------+
+
+Implementation notes:
+
+DELEGPURGE:
+* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+  persist across client reboots).  Thus we need not implement this for
+  now.
+
+EXCHANGE_ID:
+* only SP4_NONE state protection supported
+* implementation ids are ignored
+
+CREATE_SESSION:
+* backchannel attributes are ignored
+* backchannel security parameters are ignored
+
+SEQUENCE:
+* no support for dynamic slot table renegotiation (optional)
+
+nfsv4.1 COMPOUND rules:
+The following cases aren't supported yet:
+* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
+  DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
+* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
+
+Nonstandard compound limitations:
+* No support for a sessions fore channel RPC compound that requires both a
+  ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
+  fail to live up to the promise we made in CREATE_SESSION fore channel
+  negotiation.
+* No more than one IO operation (read, write, readdir) allowed per
+  compound.
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
new file mode 100644
index 000000000000..3ba0b945aaf8
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -0,0 +1,270 @@
+Mounting the root filesystem via NFS (nfsroot)
+===============================================
+
+Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
+Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
+Updated 2006 by Horms <horms@verge.net.au>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server
+for example, it is necessary for the root filesystem to be present on a
+non-disk device. This may be an initramfs (see Documentation/filesystems/
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
+filesystem mounted via NFS. The following text describes on how to use NFS
+for the root filesystem. For the rest of this text 'client' means the
+diskless system, and 'server' means the NFS server.
+
+
+
+
+1.) Enabling nfsroot capabilities
+    -----------------------------
+
+In order to use nfsroot, NFS client support needs to be selected as
+built-in during configuration. Once this has been selected, the nfsroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+
+
+2.) Kernel command line
+    -------------------
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of nfsroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+
+root=/dev/nfs
+
+  This is necessary to enable the pseudo-NFS-device. Note that it's not a
+  real device but just a synonym to tell the kernel to use NFS instead of
+  a real device.
+
+
+nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+
+  If the `nfsroot' parameter is NOT given on the command line,
+  the default "/tftpboot/%s" will be used.
+
+  <server-ip>	Specifies the IP address of the NFS server.
+		The default address is determined by the `ip' parameter
+		(see below). This parameter allows the use of different
+		servers for IP autoconfiguration and NFS.
+
+  <root-dir>	Name of the directory on the server to mount as root.
+		If there is a "%s" token in the string, it will be
+		replaced by the ASCII-representation of the client's
+		IP address.
+
+  <nfs-options>	Standard NFS options. All options are separated by commas.
+		The following defaults are used:
+			port		= as given by server portmap daemon
+			rsize		= 4096
+			wsize		= 4096
+			timeo		= 7
+			retrans		= 3
+			acregmin	= 3
+			acregmax	= 60
+			acdirmin	= 30
+			acdirmax	= 60
+			flags		= hard, nointr, noposix, cto, ac
+
+
+ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
+
+  This parameter tells the kernel how to configure IP addresses of devices
+  and also how to set up the IP routing table. It was originally called
+  `nfsaddrs', but now the boot-time IP configuration works independently of
+  NFS, so it was renamed to `ip' and the old name remained as an alias for
+  compatibility reasons.
+
+  If this parameter is missing from the kernel command line, all fields are
+  assumed to be empty, and the defaults mentioned below apply. In general
+  this means that the kernel tries to configure everything using
+  autoconfiguration.
+
+  The <autoconf> parameter can appear alone as the value to the `ip'
+  parameter (without all the ':' characters before).  If the value is
+  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
+  autoconfiguration will take place.  The most common way to use this
+  is "ip=dhcp".
+
+  <client-ip>	IP address of the client.
+
+  		Default:  Determined using autoconfiguration.
+
+  <server-ip>	IP address of the NFS server. If RARP is used to determine
+		the client address and this parameter is NOT empty only
+		replies from the specified server are accepted.
+
+		Only required for NFS root. That is autoconfiguration
+		will not be triggered if it is missing and NFS root is not
+		in operation.
+
+		Default: Determined using autoconfiguration.
+		         The address of the autoconfiguration server is used.
+
+  <gw-ip>	IP address of a gateway if the server is on a different subnet.
+
+		Default: Determined using autoconfiguration.
+
+  <netmask>	Netmask for local network interface. If unspecified
+		the netmask is derived from the client IP address assuming
+		classful addressing.
+
+		Default:  Determined using autoconfiguration.
+
+  <hostname>	Name of the client. May be supplied by autoconfiguration,
+  		but its absence will not trigger autoconfiguration.
+
+  		Default: Client IP address is used in ASCII notation.
+
+  <device>	Name of network device to use.
+
+		Default: If the host only has one device, it is used.
+			 Otherwise the device is determined using
+			 autoconfiguration. This is done by sending
+			 autoconfiguration requests out of all devices,
+			 and using the device that received the first reply.
+
+  <autoconf>	Method to use for autoconfiguration. In the case of options
+                which specify multiple autoconfiguration protocols,
+		requests are sent using all protocols, and the first one
+		to reply is used.
+
+		Only autoconfiguration protocols that have been compiled
+		into the kernel will be used, regardless of the value of
+		this option.
+
+                  off or none: don't use autoconfiguration
+				(do static IP assignment instead)
+		  on or any:   use any protocol available in the kernel
+			       (default)
+		  dhcp:        use DHCP
+		  bootp:       use BOOTP
+		  rarp:        use RARP
+		  both:        use both BOOTP and RARP but not DHCP
+		               (old option kept for backwards compatibility)
+
+                Default: any
+
+
+
+
+3.) Boot Loader
+    ----------
+
+To get the kernel into memory different approaches can be used.
+They depend on various facilities being available:
+
+
+3.1)  Booting from a floppy using syslinux
+
+	When building kernels, an easy way to create a boot floppy that uses
+	syslinux is to use the zdisk or bzdisk make targets which use zimage
+      	and bzimage images respectively. Both targets accept the
+     	FDARGS parameter which can be used to set the kernel command line.
+
+	e.g.
+	   make bzdisk FDARGS="root=/dev/nfs"
+
+   	Note that the user running this command will need to have
+     	access to the floppy drive device, /dev/fd0
+
+     	For more information on syslinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+	N.B: Previously it was possible to write a kernel directly to
+	     a floppy using dd, configure the boot device using rdev, and
+	     boot using the resulting floppy. Linux no longer supports this
+	     method of booting.
+
+3.2) Booting from a cdrom using isolinux
+
+     	When building kernels, an easy way to create a bootable cdrom that
+     	uses isolinux is to use the isoimage target which uses a bzimage
+     	image. Like zdisk and bzdisk, this target accepts the FDARGS
+     	parameter which can be used to set the kernel command line.
+
+	e.g.
+	  make isoimage FDARGS="root=/dev/nfs"
+
+     	The resulting iso image will be arch/<ARCH>/boot/image.iso
+     	This can be written to a cdrom using a variety of tools including
+     	cdrecord.
+
+	e.g.
+	  cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
+
+     	For more information on isolinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+3.2) Using LILO
+	When using LILO all the necessary command line parameters may be
+	specified using the 'append=' directive in the LILO configuration
+	file.
+
+	However, to use the 'root=' directive you also need to create
+	a dummy root device, which may be removed after LILO is run.
+
+	mknod /dev/boot255 c 0 255
+
+	For information on configuring LILO, please refer to its documentation.
+
+3.3) Using GRUB
+	When using GRUB, kernel parameter are simply appended after the kernel
+	specification: kernel <kernel> <parameters>
+
+3.4) Using loadlin
+	loadlin may be used to boot Linux from a DOS command prompt without
+	requiring a local hard disk to mount as root. This has not been
+	thoroughly tested by the authors of this document, but in general
+	it should be possible configure the kernel command line similarly
+	to the configuration of LILO.
+
+	Please refer to the loadlin documentation for further information.
+
+3.5) Using a boot ROM
+	This is probably the most elegant way of booting a diskless client.
+	With a boot ROM the kernel is loaded using the TFTP protocol. The
+	authors of this document are not aware of any no commercial boot
+	ROMs that support booting Linux over the network. However, there
+	are two free implementations of a boot ROM, netboot-nfs and
+	etherboot, both of which are available on sunsite.unc.edu, and both
+	of which contain everything you need to boot a diskless Linux client.
+
+3.6) Using pxelinux
+	Pxelinux may be used to boot linux using the PXE boot loader
+	which is present on many modern network cards.
+
+	When using pxelinux, the kernel image is specified using
+	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
+	are passed to the kernel by adding them to the "append" line.
+	It is common to use serial console in conjunction with pxeliunx,
+	see Documentation/serial-console.txt for more information.
+
+	For more information on isolinux, including how to create bootdisks
+	for prebuilt kernels, see http://syslinux.zytor.com/
+
+
+
+
+4.) Credits
+    -------
+
+  The nfsroot code in the kernel and the RARP support have been written
+  by Gero Kuhlmann <gero@gkminix.han.de>.
+
+  The rest of the IP layer autoconfiguration code has been written
+  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
+
+  In order to write the initial version of nfsroot I would like to thank
+  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
deleted file mode 100644
index 1bd0d0c05171..000000000000
--- a/Documentation/filesystems/nfs41-server.txt
+++ /dev/null
@@ -1,222 +0,0 @@
-NFSv4.1 Server Implementation
-
-Server support for minorversion 1 can be controlled using the
-/proc/fs/nfsd/versions control file.  The string output returned
-by reading this file will contain either "+4.1" or "-4.1"
-correspondingly.
-
-Currently, server support for minorversion 1 is disabled by default.
-It can be enabled at run time by writing the string "+4.1" to
-the /proc/fs/nfsd/versions control file.  Note that to write this
-control file, the nfsd service must be taken down.  Use your user-mode
-nfs-utils to set this up; see rpc.nfsd(8)
-
-(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
-"-4", respectively.  Therefore, code meant to work on both new and old
-kernels must turn 4.1 on or off *before* turning support for version 4
-on or off; rpc.nfsd does this correctly.)
-
-The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
-on the latest NFSv4.1 Internet Draft:
-http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
-
-From the many new features in NFSv4.1 the current implementation
-focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
-"exactly once" semantics and better control and throttling of the
-resources allocated for each client.
-
-Other NFSv4.1 features, Parallel NFS operations in particular,
-are still under development out of tree.
-See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
-for more information.
-
-The current implementation is intended for developers only: while it
-does support ordinary file operations on clients we have tested against
-(including the linux client), it is incomplete in ways which may limit
-features unexpectedly, cause known bugs in rare cases, or cause
-interoperability problems with future clients.  Known issues:
-
-	- gss support is questionable: currently mounts with kerberos
-	  from a linux client are possible, but we aren't really
-	  conformant with the spec (for example, we don't use kerberos
-	  on the backchannel correctly).
-	- no trunking support: no clients currently take advantage of
-	  trunking, but this is a mandatory feature, and its use is
-	  recommended to clients in a number of places.  (E.g. to ensure
-	  timely renewal in case an existing connection's retry timeouts
-	  have gotten too long; see section 8.3 of the draft.)
-	  Therefore, lack of this feature may cause future clients to
-	  fail.
-	- Incomplete backchannel support: incomplete backchannel gss
-	  support and no support for BACKCHANNEL_CTL mean that
-	  callbacks (hence delegations and layouts) may not be
-	  available and clients confused by the incomplete
-	  implementation may fail.
-	- Server reboot recovery is unsupported; if the server reboots,
-	  clients may fail.
-	- We do not support SSV, which provides security for shared
-	  client-server state (thus preventing unauthorized tampering
-	  with locks and opens, for example).  It is mandatory for
-	  servers to support this, though no clients use it yet.
-	- Mandatory operations which we do not support, such as
-	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
-	  TEST_STATEID, are not currently used by clients, but will be
-	  (and the spec recommends their uses in common cases), and
-	  clients should not be expected to know how to recover from the
-	  case where they are not supported.  This will eventually cause
-	  interoperability failures.
-
-In addition, some limitations are inherited from the current NFSv4
-implementation:
-
-	- Incomplete delegation enforcement: if a file is renamed or
-	  unlinked, a client holding a delegation may continue to
-	  indefinitely allow opens of the file under the old name.
-
-The table below, taken from the NFSv4.1 document, lists
-the operations that are mandatory to implement (REQ), optional
-(OPT), and NFSv4.0 operations that are required not to implement (MNI)
-in minor version 1.  The first column indicates the operations that
-are not supported yet by the linux server implementation.
-
-The OPTIONAL features identified and their abbreviations are as follows:
-	pNFS	Parallel NFS
-	FDELG	File Delegations
-	DDELG	Directory Delegations
-
-The following abbreviations indicate the linux server implementation status.
-	I	Implemented NFSv4.1 operations.
-	NS	Not Supported.
-	NS*	unimplemented optional feature.
-	P	pNFS features implemented out of tree.
-	PNS	pNFS features that are not supported yet (out of tree).
-
-Operations
-
-   +----------------------+------------+--------------+----------------+
-   | Operation            | REQ, REC,  | Feature      | Definition     |
-   |                      | OPT, or    | (REQ, REC,   |                |
-   |                      | MNI        | or OPT)      |                |
-   +----------------------+------------+--------------+----------------+
-   | ACCESS               | REQ        |              | Section 18.1   |
-NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
-NS | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
-   | CLOSE                | REQ        |              | Section 18.2   |
-   | COMMIT               | REQ        |              | Section 18.3   |
-   | CREATE               | REQ        |              | Section 18.4   |
-I  | CREATE_SESSION       | REQ        |              | Section 18.36  |
-NS*| DELEGPURGE           | OPT        | FDELG (REQ)  | Section 18.5   |
-   | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
-   |                      |            | DDELG, pNFS  |                |
-   |                      |            | (REQ)        |                |
-NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
-I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
-I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
-NS | FREE_STATEID         | REQ        |              | Section 18.38  |
-   | GETATTR              | REQ        |              | Section 18.7   |
-P  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
-P  | GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
-   | GETFH                | REQ        |              | Section 18.8   |
-NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
-P  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
-P  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
-P  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
-   | LINK                 | OPT        |              | Section 18.9   |
-   | LOCK                 | REQ        |              | Section 18.10  |
-   | LOCKT                | REQ        |              | Section 18.11  |
-   | LOCKU                | REQ        |              | Section 18.12  |
-   | LOOKUP               | REQ        |              | Section 18.13  |
-   | LOOKUPP              | REQ        |              | Section 18.14  |
-   | NVERIFY              | REQ        |              | Section 18.15  |
-   | OPEN                 | REQ        |              | Section 18.16  |
-NS*| OPENATTR             | OPT        |              | Section 18.17  |
-   | OPEN_CONFIRM         | MNI        |              | N/A            |
-   | OPEN_DOWNGRADE       | REQ        |              | Section 18.18  |
-   | PUTFH                | REQ        |              | Section 18.19  |
-   | PUTPUBFH             | REQ        |              | Section 18.20  |
-   | PUTROOTFH            | REQ        |              | Section 18.21  |
-   | READ                 | REQ        |              | Section 18.22  |
-   | READDIR              | REQ        |              | Section 18.23  |
-   | READLINK             | OPT        |              | Section 18.24  |
-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
-   | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
-   | REMOVE               | REQ        |              | Section 18.25  |
-   | RENAME               | REQ        |              | Section 18.26  |
-   | RENEW                | MNI        |              | N/A            |
-   | RESTOREFH            | REQ        |              | Section 18.27  |
-   | SAVEFH               | REQ        |              | Section 18.28  |
-   | SECINFO              | REQ        |              | Section 18.29  |
-NS | SECINFO_NO_NAME      | REC        | pNFS files   | Section 18.45, |
-   |                      |            | layout (REQ) | Section 13.12  |
-I  | SEQUENCE             | REQ        |              | Section 18.46  |
-   | SETATTR              | REQ        |              | Section 18.30  |
-   | SETCLIENTID          | MNI        |              | N/A            |
-   | SETCLIENTID_CONFIRM  | MNI        |              | N/A            |
-NS | SET_SSV              | REQ        |              | Section 18.47  |
-NS | TEST_STATEID         | REQ        |              | Section 18.48  |
-   | VERIFY               | REQ        |              | Section 18.31  |
-NS*| WANT_DELEGATION      | OPT        | FDELG (OPT)  | Section 18.49  |
-   | WRITE                | REQ        |              | Section 18.32  |
-
-Callback Operations
-
-   +-------------------------+-----------+-------------+---------------+
-   | Operation               | REQ, REC, | Feature     | Definition    |
-   |                         | OPT, or   | (REQ, REC,  |               |
-   |                         | MNI       | or OPT)     |               |
-   +-------------------------+-----------+-------------+---------------+
-   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
-P  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
-NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
-P  | CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
-NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
-NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
-   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS*| CB_RECALL_ANY           | OPT       | FDELG,      | Section 20.6  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS | CB_RECALL_SLOT          | REQ       |             | Section 20.8  |
-NS*| CB_RECALLABLE_OBJ_AVAIL | OPT       | DDELG, pNFS | Section 20.7  |
-   |                         |           | (REQ)       |               |
-I  | CB_SEQUENCE             | OPT       | FDELG,      | Section 20.9  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-   +-------------------------+-----------+-------------+---------------+
-
-Implementation notes:
-
-DELEGPURGE:
-* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
-  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
-  persist across client reboots).  Thus we need not implement this for
-  now.
-
-EXCHANGE_ID:
-* only SP4_NONE state protection supported
-* implementation ids are ignored
-
-CREATE_SESSION:
-* backchannel attributes are ignored
-* backchannel security parameters are ignored
-
-SEQUENCE:
-* no support for dynamic slot table renegotiation (optional)
-
-nfsv4.1 COMPOUND rules:
-The following cases aren't supported yet:
-* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
-  DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
-* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
-
-Nonstandard compound limitations:
-* No support for a sessions fore channel RPC compound that requires both a
-  ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
-  fail to live up to the promise we made in CREATE_SESSION fore channel
-  negotiation.
-* No more than one IO operation (read, write, readdir) allowed per
-  compound.
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
deleted file mode 100644
index 3ba0b945aaf8..000000000000
--- a/Documentation/filesystems/nfsroot.txt
+++ /dev/null
@@ -1,270 +0,0 @@
-Mounting the root filesystem via NFS (nfsroot)
-===============================================
-
-Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
-Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
-Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
-Updated 2006 by Horms <horms@verge.net.au>
-
-
-
-In order to use a diskless system, such as an X-terminal or printer server
-for example, it is necessary for the root filesystem to be present on a
-non-disk device. This may be an initramfs (see Documentation/filesystems/
-ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
-filesystem mounted via NFS. The following text describes on how to use NFS
-for the root filesystem. For the rest of this text 'client' means the
-diskless system, and 'server' means the NFS server.
-
-
-
-
-1.) Enabling nfsroot capabilities
-    -----------------------------
-
-In order to use nfsroot, NFS client support needs to be selected as
-built-in during configuration. Once this has been selected, the nfsroot
-option will become available, which should also be selected.
-
-In the networking options, kernel level autoconfiguration can be selected,
-along with the types of autoconfiguration to support. Selecting all of
-DHCP, BOOTP and RARP is safe.
-
-
-
-
-2.) Kernel command line
-    -------------------
-
-When the kernel has been loaded by a boot loader (see below) it needs to be
-told what root fs device to use. And in the case of nfsroot, where to find
-both the server and the name of the directory on the server to mount as root.
-This can be established using the following kernel command line parameters:
-
-
-root=/dev/nfs
-
-  This is necessary to enable the pseudo-NFS-device. Note that it's not a
-  real device but just a synonym to tell the kernel to use NFS instead of
-  a real device.
-
-
-nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
-
-  If the `nfsroot' parameter is NOT given on the command line,
-  the default "/tftpboot/%s" will be used.
-
-  <server-ip>	Specifies the IP address of the NFS server.
-		The default address is determined by the `ip' parameter
-		(see below). This parameter allows the use of different
-		servers for IP autoconfiguration and NFS.
-
-  <root-dir>	Name of the directory on the server to mount as root.
-		If there is a "%s" token in the string, it will be
-		replaced by the ASCII-representation of the client's
-		IP address.
-
-  <nfs-options>	Standard NFS options. All options are separated by commas.
-		The following defaults are used:
-			port		= as given by server portmap daemon
-			rsize		= 4096
-			wsize		= 4096
-			timeo		= 7
-			retrans		= 3
-			acregmin	= 3
-			acregmax	= 60
-			acdirmin	= 30
-			acdirmax	= 60
-			flags		= hard, nointr, noposix, cto, ac
-
-
-ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
-
-  This parameter tells the kernel how to configure IP addresses of devices
-  and also how to set up the IP routing table. It was originally called
-  `nfsaddrs', but now the boot-time IP configuration works independently of
-  NFS, so it was renamed to `ip' and the old name remained as an alias for
-  compatibility reasons.
-
-  If this parameter is missing from the kernel command line, all fields are
-  assumed to be empty, and the defaults mentioned below apply. In general
-  this means that the kernel tries to configure everything using
-  autoconfiguration.
-
-  The <autoconf> parameter can appear alone as the value to the `ip'
-  parameter (without all the ':' characters before).  If the value is
-  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
-  autoconfiguration will take place.  The most common way to use this
-  is "ip=dhcp".
-
-  <client-ip>	IP address of the client.
-
-  		Default:  Determined using autoconfiguration.
-
-  <server-ip>	IP address of the NFS server. If RARP is used to determine
-		the client address and this parameter is NOT empty only
-		replies from the specified server are accepted.
-
-		Only required for NFS root. That is autoconfiguration
-		will not be triggered if it is missing and NFS root is not
-		in operation.
-
-		Default: Determined using autoconfiguration.
-		         The address of the autoconfiguration server is used.
-
-  <gw-ip>	IP address of a gateway if the server is on a different subnet.
-
-		Default: Determined using autoconfiguration.
-
-  <netmask>	Netmask for local network interface. If unspecified
-		the netmask is derived from the client IP address assuming
-		classful addressing.
-
-		Default:  Determined using autoconfiguration.
-
-  <hostname>	Name of the client. May be supplied by autoconfiguration,
-  		but its absence will not trigger autoconfiguration.
-
-  		Default: Client IP address is used in ASCII notation.
-
-  <device>	Name of network device to use.
-
-		Default: If the host only has one device, it is used.
-			 Otherwise the device is determined using
-			 autoconfiguration. This is done by sending
-			 autoconfiguration requests out of all devices,
-			 and using the device that received the first reply.
-
-  <autoconf>	Method to use for autoconfiguration. In the case of options
-                which specify multiple autoconfiguration protocols,
-		requests are sent using all protocols, and the first one
-		to reply is used.
-
-		Only autoconfiguration protocols that have been compiled
-		into the kernel will be used, regardless of the value of
-		this option.
-
-                  off or none: don't use autoconfiguration
-				(do static IP assignment instead)
-		  on or any:   use any protocol available in the kernel
-			       (default)
-		  dhcp:        use DHCP
-		  bootp:       use BOOTP
-		  rarp:        use RARP
-		  both:        use both BOOTP and RARP but not DHCP
-		               (old option kept for backwards compatibility)
-
-                Default: any
-
-
-
-
-3.) Boot Loader
-    ----------
-
-To get the kernel into memory different approaches can be used.
-They depend on various facilities being available:
-
-
-3.1)  Booting from a floppy using syslinux
-
-	When building kernels, an easy way to create a boot floppy that uses
-	syslinux is to use the zdisk or bzdisk make targets which use zimage
-      	and bzimage images respectively. Both targets accept the
-     	FDARGS parameter which can be used to set the kernel command line.
-
-	e.g.
-	   make bzdisk FDARGS="root=/dev/nfs"
-
-   	Note that the user running this command will need to have
-     	access to the floppy drive device, /dev/fd0
-
-     	For more information on syslinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-	N.B: Previously it was possible to write a kernel directly to
-	     a floppy using dd, configure the boot device using rdev, and
-	     boot using the resulting floppy. Linux no longer supports this
-	     method of booting.
-
-3.2) Booting from a cdrom using isolinux
-
-     	When building kernels, an easy way to create a bootable cdrom that
-     	uses isolinux is to use the isoimage target which uses a bzimage
-     	image. Like zdisk and bzdisk, this target accepts the FDARGS
-     	parameter which can be used to set the kernel command line.
-
-	e.g.
-	  make isoimage FDARGS="root=/dev/nfs"
-
-     	The resulting iso image will be arch/<ARCH>/boot/image.iso
-     	This can be written to a cdrom using a variety of tools including
-     	cdrecord.
-
-	e.g.
-	  cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
-
-     	For more information on isolinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-3.2) Using LILO
-	When using LILO all the necessary command line parameters may be
-	specified using the 'append=' directive in the LILO configuration
-	file.
-
-	However, to use the 'root=' directive you also need to create
-	a dummy root device, which may be removed after LILO is run.
-
-	mknod /dev/boot255 c 0 255
-
-	For information on configuring LILO, please refer to its documentation.
-
-3.3) Using GRUB
-	When using GRUB, kernel parameter are simply appended after the kernel
-	specification: kernel <kernel> <parameters>
-
-3.4) Using loadlin
-	loadlin may be used to boot Linux from a DOS command prompt without
-	requiring a local hard disk to mount as root. This has not been
-	thoroughly tested by the authors of this document, but in general
-	it should be possible configure the kernel command line similarly
-	to the configuration of LILO.
-
-	Please refer to the loadlin documentation for further information.
-
-3.5) Using a boot ROM
-	This is probably the most elegant way of booting a diskless client.
-	With a boot ROM the kernel is loaded using the TFTP protocol. The
-	authors of this document are not aware of any no commercial boot
-	ROMs that support booting Linux over the network. However, there
-	are two free implementations of a boot ROM, netboot-nfs and
-	etherboot, both of which are available on sunsite.unc.edu, and both
-	of which contain everything you need to boot a diskless Linux client.
-
-3.6) Using pxelinux
-	Pxelinux may be used to boot linux using the PXE boot loader
-	which is present on many modern network cards.
-
-	When using pxelinux, the kernel image is specified using
-	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
-	are passed to the kernel by adding them to the "append" line.
-	It is common to use serial console in conjunction with pxeliunx,
-	see Documentation/serial-console.txt for more information.
-
-	For more information on isolinux, including how to create bootdisks
-	for prebuilt kernels, see http://syslinux.zytor.com/
-
-
-
-
-4.) Credits
-    -------
-
-  The nfsroot code in the kernel and the RARP support have been written
-  by Gero Kuhlmann <gero@gkminix.han.de>.
-
-  The rest of the IP layer autoconfiguration code has been written
-  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
-
-  In order to write the initial version of nfsroot I would like to thank
-  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 92b888d540a6..a7e9746ee7ea 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -140,7 +140,7 @@ Callers of notify_change() need ->i_mutex now.
 New super_block field "struct export_operations *s_export_op" for
 explicit support for exporting, e.g. via NFS.  The structure is fully
 documented at its declaration in include/linux/fs.h, and in
-Documentation/filesystems/Exporting.
+Documentation/filesystems/nfs/Exporting.
 
 Briefly it allows for the definition of decode_fh and encode_fh operations
 to encode and decode filehandles, and allows the filesystem to use
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9107b387e91f..dab0f04b4264 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1017,7 +1017,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			No delay
 
 	ip=		[IP_PNP]
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	ip2=		[HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
 			See comment before ip2_setup() in
@@ -1538,10 +1538,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			going to be removed in 2.6.29.
 
 	nfsaddrs=	[NFS]
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	nfsroot=	[NFS] nfs root filesystem for disk-less boxes.
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	nfs.callback_tcpport=
 			[NFS] set the TCP port on which the NFSv4 callback
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1b..6177f7cca16a 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
  */
 
  /*
-  * See Documentation/filesystems/Exporting
+  * See Documentation/filesystems/nfs/Exporting
   * and examples in fs/exportfs
   *
   * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c7..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
  * and for mapping back from file handles to dentries.
  *
  * For details on why we do all the strange and hairy things in here
- * take a look at Documentation/filesystems/Exporting.
+ * take a look at Documentation/filesystems/nfs/Exporting.
  */
 #include <linux/exportfs.h>
 #include <linux/fs.h>
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba9..ed752cb38474 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
  *
  * The following files are helpful:
  *
- *     Documentation/filesystems/Exporting
+ *     Documentation/filesystems/nfs/Exporting
  *     fs/exportfs/expfs.c.
  */
 
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5af..59e5673b4597 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
 	  If you want your system to mount its root file system via NFS,
 	  choose Y here.  This is common practice for managing systems
 	  without local permanent storage.  For details, read
-	  <file:Documentation/filesystems/nfsroot.txt>.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt>.
 
 	  Most people say N here.
 
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 27e772cefb6a..dc12f416a49f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -97,7 +97,7 @@ struct fid {
  * @get_name:       find the name for a given inode in a given directory
  * @get_parent:     find the parent of a given directory
  *
- * See Documentation/filesystems/Exporting for details on how to use
+ * See Documentation/filesystems/nfs/Exporting for details on how to use
  * this interface correctly.
  *
  * encode_fh:
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9035eb..0c94a1ac2946 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,7 +166,7 @@ config IP_PNP_DHCP
 
 	  If unsure, say Y. Note that if you want to use DHCP, a DHCP server
 	  must be operating on your network.  Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_BOOTP
 	bool "IP: BOOTP support"
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP
 	  does BOOTP itself, providing all necessary information on the kernel
 	  command line, you can say N here. If unsure, say Y. Note that if you
 	  want to use BOOTP, a BOOTP server must be operating on your network.
-	  Read <file:Documentation/filesystems/nfsroot.txt> for details.
+	  Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_RARP
 	bool "IP: RARP support"
@@ -194,7 +194,7 @@ config IP_PNP_RARP
 	  older protocol which is being obsoleted by BOOTP and DHCP), say Y
 	  here. Note that if you want to use RARP, a RARP server must be
 	  operating on your network. Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 # not yet ready..
 #   bool '    IP: ARP support' CONFIG_IP_PNP_ARP
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f8d04c256454..7dcbf4706099 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1447,7 +1447,7 @@ late_initcall(ip_auto_config);
 
 /*
  *  Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- *  command line parameter.  See Documentation/filesystems/nfsroot.txt.
+ *  command line parameter.  See Documentation/filesystems/nfs/nfsroot.txt.
  */
 static int __init ic_proto_name(char *name)
 {
-- 
cgit v1.2.3


From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 18 Oct 2009 01:09:29 +0000
Subject: perf_event: Add alignment-faults and emulation-faults software events

Add two more software events that are common to many cpus.

Alignment faults: When a load or store is not aligned properly.

Emulation faults: When an instruction is emulated in software.

Both cause a very significant slowdown (100x or worse), so identifying and
fixing them is very important.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_counter.h   | 2 ++
 include/linux/perf_event.h     | 2 ++
 kernel/perf_event.c            | 2 ++
 tools/perf/design.txt          | 2 ++
 tools/perf/util/parse-events.c | 4 ++++
 5 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..d6b95d1e79f0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,8 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..a33707a3a788 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -102,6 +102,8 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9d0b5c665883..0683b33cbb28 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4186,6 +4186,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
 	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
+	case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+	case PERF_COUNT_SW_EMULATION_FAULTS:
 		if (!event->parent) {
 			atomic_inc(&perf_swevent_enabled[event_id]);
 			event->destroy = sw_perf_event_destroy;
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index fdd42a824c98..f000c30877ac 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -137,6 +137,8 @@ enum sw_event_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS	= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS	= 8,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8cfb48cbbea0..34bd84423933 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -46,6 +46,8 @@ static struct event_symbol event_symbols[] = {
   { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
   { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
+  { CSW(ALIGNMENT_FAULTS),	"alignment-faults",	""		},
+  { CSW(EMULATION_FAULTS),	"emulation-faults",	""		},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -74,6 +76,8 @@ static const char *sw_event_names[] = {
 	"CPU-migrations",
 	"minor-faults",
 	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
 };
 
 #define MAX_ALIASES 8
-- 
cgit v1.2.3


From 1af60fbd759d31f565552fea315c2033947cfbe6 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Fri, 2 Oct 2009 18:56:53 -0400
Subject: block: get rid of the WRITE_ODIRECT flag

Hi,

The WRITE_ODIRECT flag is only used in one place, and that code path
happens to also call blk_run_address_space.  The introduction of this
flag, then, could result in the device being unplugged twice for every
I/O.

Further, with the batching changes in the next patch, we don't want an
O_DIRECT write to imply a queue unplug.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/direct-io.c     | 2 +-
 include/linux/fs.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8b10b87dc01a..c86d35f142de 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
-		rw = WRITE_ODIRECT;
+		rw = WRITE_SYNC_PLUG;
 
 	if (bdev)
 		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2620a8c63571..2f5fca4147c2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -129,7 +129,6 @@ struct inodes_stat_t {
  * WRITE_SYNC		Like WRITE_SYNC_PLUG, but also unplugs the device
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
- * WRITE_ODIRECT	Special case write for O_DIRECT only.
  * SWRITE_SYNC
  * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
  *			See SWRITE.
@@ -151,7 +150,6 @@ struct inodes_stat_t {
 #define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_ODIRECT	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
 #define SWRITE_SYNC_PLUG	\
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-- 
cgit v1.2.3


From 44a0873d52282f24b1894c58c0f157e0f626ddc9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Oct 2009 07:03:04 +0000
Subject: net: Introduce unregister_netdevice_queue()

This patchs adds an unreg_list anchor to struct net_device, and
introduces an unregister_netdevice_queue() function, able to queue
a net_device to a list instead of immediately unregister it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 ++++++++-
 net/core/dev.c            | 20 +++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83800091a31a..0ded0a4768a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -683,6 +683,7 @@ struct net_device
 
 	struct list_head	dev_list;
 	struct list_head	napi_list;
+	struct list_head	unreg_list;
 
 	/* Net device features */
 	unsigned long		features;
@@ -1116,7 +1117,13 @@ extern int		dev_close(struct net_device *dev);
 extern void		dev_disable_lro(struct net_device *dev);
 extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
-extern void		unregister_netdevice(struct net_device *dev);
+extern void		unregister_netdevice_queue(struct net_device *dev,
+						   struct list_head *head);
+static inline void unregister_netdevice(struct net_device *dev)
+{
+	unregister_netdevice_queue(dev, NULL);
+}
+
 extern void		free_netdev(struct net_device *dev);
 extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 950c13fa60d2..ff94e2b8df7f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5245,25 +5245,31 @@ void synchronize_net(void)
 EXPORT_SYMBOL(synchronize_net);
 
 /**
- *	unregister_netdevice - remove device from the kernel
+ *	unregister_netdevice_queue - remove device from the kernel
  *	@dev: device
- *
+ *	@head: list
+
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
+ *	If head not NULL, device is queued to be unregistered later.
  *
  *	Callers must hold the rtnl semaphore.  You may want
  *	unregister_netdev() instead of this.
  */
 
-void unregister_netdevice(struct net_device *dev)
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
 {
 	ASSERT_RTNL();
 
-	rollback_registered(dev);
-	/* Finish processing unregister after unlock */
-	net_set_todo(dev);
+	if (head) {
+		list_add_tail(&dev->unreg_list, head);
+	} else {
+		rollback_registered(dev);
+		/* Finish processing unregister after unlock */
+		net_set_todo(dev);
+	}
 }
-EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(unregister_netdevice_queue);
 
 /**
  *	unregister_netdev - remove device from the kernel
-- 
cgit v1.2.3


From 9b5e383c11b08784eb0087617f880077982ef769 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Oct 2009 07:04:19 +0000
Subject: net: Introduce unregister_netdevice_many()

Introduce rollback_registered_many() and unregister_netdevice_many()

rollback_registered_many() is able to perform necessary steps at device dismantle
time, factorizing two expensive synchronize_net() calls.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 97 +++++++++++++++++++++++++++++++----------------
 2 files changed, 66 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0ded0a4768a0..e7c227d7cb98 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1119,6 +1119,7 @@ extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
 extern void		unregister_netdevice_queue(struct net_device *dev,
 						   struct list_head *head);
+extern void		unregister_netdevice_many(struct list_head *head);
 static inline void unregister_netdevice(struct net_device *dev)
 {
 	unregister_netdevice_queue(dev, NULL);
diff --git a/net/core/dev.c b/net/core/dev.c
index ff94e2b8df7f..04d3e3014020 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4637,59 +4637,76 @@ static void net_set_todo(struct net_device *dev)
 	list_add_tail(&dev->todo_list, &net_todo_list);
 }
 
-static void rollback_registered(struct net_device *dev)
+static void rollback_registered_many(struct list_head *head)
 {
+	struct net_device *dev;
+
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 
-	/* Some devices call without registering for initialization unwind. */
-	if (dev->reg_state == NETREG_UNINITIALIZED) {
-		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
-				  "was registered\n", dev->name, dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/* Some devices call without registering
+		 * for initialization unwind.
+		 */
+		if (dev->reg_state == NETREG_UNINITIALIZED) {
+			pr_debug("unregister_netdevice: device %s/%p never "
+				 "was registered\n", dev->name, dev);
 
-		WARN_ON(1);
-		return;
-	}
+			WARN_ON(1);
+			return;
+		}
 
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+		BUG_ON(dev->reg_state != NETREG_REGISTERED);
 
-	/* If device is running, close it first. */
-	dev_close(dev);
+		/* If device is running, close it first. */
+		dev_close(dev);
 
-	/* And unlink it from device chain. */
-	unlist_netdevice(dev);
+		/* And unlink it from device chain. */
+		unlist_netdevice(dev);
 
-	dev->reg_state = NETREG_UNREGISTERING;
+		dev->reg_state = NETREG_UNREGISTERING;
+	}
 
 	synchronize_net();
 
-	/* Shutdown queueing discipline. */
-	dev_shutdown(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/* Shutdown queueing discipline. */
+		dev_shutdown(dev);
 
 
-	/* Notify protocols, that we are about to destroy
-	   this device. They should clean all the things.
-	*/
-	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+		/* Notify protocols, that we are about to destroy
+		   this device. They should clean all the things.
+		*/
+		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
-	/*
-	 *	Flush the unicast and multicast chains
-	 */
-	dev_unicast_flush(dev);
-	dev_addr_discard(dev);
+		/*
+		 *	Flush the unicast and multicast chains
+		 */
+		dev_unicast_flush(dev);
+		dev_addr_discard(dev);
 
-	if (dev->netdev_ops->ndo_uninit)
-		dev->netdev_ops->ndo_uninit(dev);
+		if (dev->netdev_ops->ndo_uninit)
+			dev->netdev_ops->ndo_uninit(dev);
 
-	/* Notifier chain MUST detach us from master device. */
-	WARN_ON(dev->master);
+		/* Notifier chain MUST detach us from master device. */
+		WARN_ON(dev->master);
 
-	/* Remove entries from kobject tree */
-	netdev_unregister_kobject(dev);
+		/* Remove entries from kobject tree */
+		netdev_unregister_kobject(dev);
+	}
 
 	synchronize_net();
 
-	dev_put(dev);
+	list_for_each_entry(dev, head, unreg_list)
+		dev_put(dev);
+}
+
+static void rollback_registered(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	rollback_registered_many(&single);
 }
 
 static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -5271,6 +5288,22 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
 }
 EXPORT_SYMBOL(unregister_netdevice_queue);
 
+/**
+ *	unregister_netdevice_many - unregister many devices
+ *	@head: list of devices
+ *
+ */
+void unregister_netdevice_many(struct list_head *head)
+{
+	struct net_device *dev;
+
+	if (!list_empty(head)) {
+		rollback_registered_many(head);
+		list_for_each_entry(dev, head, unreg_list)
+			net_set_todo(dev);
+	}
+}
+
 /**
  *	unregister_netdev - remove device from the kernel
  *	@dev: device
-- 
cgit v1.2.3


From 63c8099d90096db56ee1c66c31f05d4fcfbc1c69 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Oct 2009 07:06:49 +0000
Subject: vlan: Optimize multiple unregistration

Use unregister_netdevice_many() to speedup master device unregister.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  1 +
 net/8021q/vlan.c        | 49 +++++++++++++++++++++++++++++++++----------------
 net/core/dev.c          |  1 +
 3 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8898cbebcf34..71a4870c09a9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -85,6 +85,7 @@ struct vlan_group {
 					    * the vlan is attached to.
 					    */
 	unsigned int		nr_vlans;
+	int			killall;
 	struct hlist_node	hlist;	/* linked list */
 	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6b5c9dddaa72..511afe72af31 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -159,11 +159,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
-	vlan_group_set_device(grp, vlan_id, NULL);
 	grp->nr_vlans--;
 
-	synchronize_net();
-
+	if (!grp->killall) {
+		vlan_group_set_device(grp, vlan_id, NULL);
+		synchronize_net();
+	}
 	unregister_netdevice_queue(dev, head);
 
 	/* If the group is now empty, kill off the group. */
@@ -183,6 +184,34 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	dev_put(real_dev);
 }
 
+void unregister_vlan_dev_alls(struct vlan_group *grp)
+{
+	LIST_HEAD(list);
+	int i;
+	struct net_device *vlandev;
+	struct vlan_group save;
+
+	memcpy(&save, grp, sizeof(save));
+	memset(&grp->vlan_devices_arrays, 0, sizeof(grp->vlan_devices_arrays));
+	grp->killall = 1;
+
+	synchronize_net();
+
+	/* Delete all VLANs for this dev. */
+	for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		vlandev = vlan_group_get_device(&save, i);
+		if (!vlandev)
+			continue;
+
+		unregister_vlan_dev(vlandev, &list);
+		if (grp->nr_vlans == 0)
+			break;
+	}
+	unregister_netdevice_many(&list);
+	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+		kfree(save.vlan_devices_arrays[i]);
+}
+
 static void vlan_transfer_operstate(const struct net_device *dev,
 				    struct net_device *vlandev)
 {
@@ -524,19 +553,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_UNREGISTER:
-		/* Delete all VLANs for this dev. */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
-			/* unregistration of last vlan destroys group, abort
-			 * afterwards */
-			if (grp->nr_vlans == 1)
-				i = VLAN_GROUP_ARRAY_LEN;
-
-			unregister_vlan_dev(vlandev, NULL);
-		}
+		unregister_vlan_dev_alls(grp);
 		break;
 	}
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4513dfd5718e..09551cc143a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5303,6 +5303,7 @@ void unregister_netdevice_many(struct list_head *head)
 			net_set_todo(dev);
 	}
 }
+EXPORT_SYMBOL(unregister_netdevice_many);
 
 /**
  *	unregister_netdev - remove device from the kernel
-- 
cgit v1.2.3


From c017b4be3e84176cab10eca5e6c4faeb8cfc6f3e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 28 Oct 2009 13:33:09 +0000
Subject: kmemleak: Simplify the kmemleak_scan_area() function prototype

This function was taking non-necessary arguments which can be determined
by kmemleak. The patch also modifies the calling sites.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/kmemleak.h |  6 ++----
 kernel/module.c          |  7 ++-----
 mm/kmemleak.c            | 49 +++++++++++++++++++++---------------------------
 mm/slab.c                |  4 ++--
 4 files changed, 27 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 3c7497d46ee9..99d9a6766f7e 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -32,8 +32,7 @@ extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
 extern void kmemleak_ignore(const void *ptr) __ref;
-extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-			       size_t length, gfp_t gfp) __ref;
+extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
 extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
@@ -84,8 +83,7 @@ static inline void kmemleak_not_leak(const void *ptr)
 static inline void kmemleak_ignore(const void *ptr)
 {
 }
-static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
-				      size_t length, gfp_t gfp)
+static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 }
 static inline void kmemleak_erase(void **ptr)
diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..1eb952097077 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2043,9 +2043,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 	unsigned int i;
 
 	/* only scan the sections containing data */
-	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
-			   (unsigned long)mod->module_core,
-			   sizeof(struct module), GFP_KERNEL);
+	kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
 
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2054,8 +2052,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
 			continue;
 
-		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
-				   (unsigned long)mod->module_core,
+		kmemleak_scan_area((void *)sechdrs[i].sh_addr,
 				   sechdrs[i].sh_size, GFP_KERNEL);
 	}
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..96106358e042 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -119,8 +119,8 @@
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
 	struct hlist_node node;
-	unsigned long offset;
-	size_t length;
+	unsigned long start;
+	size_t size;
 };
 
 #define KMEMLEAK_GREY	0
@@ -241,8 +241,6 @@ struct early_log {
 	const void *ptr;		/* allocated/freed memory block */
 	size_t size;			/* memory block size */
 	int min_count;			/* minimum reference count */
-	unsigned long offset;		/* scan area offset */
-	size_t length;			/* scan area length */
 	unsigned long trace[MAX_TRACE];	/* stack trace */
 	unsigned int trace_len;		/* stack trace length */
 };
@@ -720,14 +718,13 @@ static void make_black_object(unsigned long ptr)
  * Add a scanning area to the object. If at least one such area is added,
  * kmemleak will only scan these ranges rather than the whole memory block.
  */
-static void add_scan_area(unsigned long ptr, unsigned long offset,
-			  size_t length, gfp_t gfp)
+static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
 	unsigned long flags;
 	struct kmemleak_object *object;
 	struct kmemleak_scan_area *area;
 
-	object = find_and_get_object(ptr, 0);
+	object = find_and_get_object(ptr, 1);
 	if (!object) {
 		kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
 			      ptr);
@@ -741,7 +738,7 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	spin_lock_irqsave(&object->lock, flags);
-	if (offset + length > object->size) {
+	if (ptr + size > object->pointer + object->size) {
 		kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
 		dump_object_info(object);
 		kmem_cache_free(scan_area_cache, area);
@@ -749,8 +746,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	INIT_HLIST_NODE(&area->node);
-	area->offset = offset;
-	area->length = length;
+	area->start = ptr;
+	area->size = size;
 
 	hlist_add_head(&area->node, &object->area_list);
 out_unlock:
@@ -786,7 +783,7 @@ static void object_no_scan(unsigned long ptr)
  * processed later once kmemleak is fully initialized.
  */
 static void __init log_early(int op_type, const void *ptr, size_t size,
-			     int min_count, unsigned long offset, size_t length)
+			     int min_count)
 {
 	unsigned long flags;
 	struct early_log *log;
@@ -808,8 +805,6 @@ static void __init log_early(int op_type, const void *ptr, size_t size,
 	log->ptr = ptr;
 	log->size = size;
 	log->min_count = min_count;
-	log->offset = offset;
-	log->length = length;
 	if (op_type == KMEMLEAK_ALLOC)
 		log->trace_len = __save_stack_trace(log->trace);
 	crt_early_log++;
@@ -858,7 +853,7 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		create_object((unsigned long)ptr, size, min_count, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
@@ -873,7 +868,7 @@ void __ref kmemleak_free(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_full((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
@@ -888,7 +883,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_part((unsigned long)ptr, size);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+		log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
@@ -903,7 +898,7 @@ void __ref kmemleak_not_leak(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_gray_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
@@ -919,22 +914,21 @@ void __ref kmemleak_ignore(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_black_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
-			      size_t length, gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
-		add_scan_area((unsigned long)ptr, offset, length, gfp);
+		add_scan_area((unsigned long)ptr, size, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+		log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
@@ -948,7 +942,7 @@ void __ref kmemleak_no_scan(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		object_no_scan((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
@@ -1075,9 +1069,9 @@ static void scan_object(struct kmemleak_object *object)
 		}
 	} else
 		hlist_for_each_entry(area, elem, &object->area_list, node)
-			scan_block((void *)(object->pointer + area->offset),
-				   (void *)(object->pointer + area->offset
-					    + area->length), object, 0);
+			scan_block((void *)area->start,
+				   (void *)(area->start + area->size),
+				   object, 0);
 out:
 	spin_unlock_irqrestore(&object->lock, flags);
 }
@@ -1642,8 +1636,7 @@ void __init kmemleak_init(void)
 			kmemleak_ignore(log->ptr);
 			break;
 		case KMEMLEAK_SCAN_AREA:
-			kmemleak_scan_area(log->ptr, log->offset, log->length,
-					   GFP_KERNEL);
+			kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
 			break;
 		case KMEMLEAK_NO_SCAN:
 			kmemleak_no_scan(log->ptr);
diff --git a/mm/slab.c b/mm/slab.c
index 646db3085193..d2713a944ebd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2584,8 +2584,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 		 * kmemleak does not treat the ->s_mem pointer as a reference
 		 * to the object. Otherwise we will not report the leak.
 		 */
-		kmemleak_scan_area(slabp, offsetof(struct slab, list),
-				   sizeof(struct list_head), local_flags);
+		kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
+				   local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
-- 
cgit v1.2.3


From 65afac7d80ab3bc9f81e75eafb71eeb92a3ebdef Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Oct 2009 08:56:16 -0600
Subject: param: fix lots of bugs with writing charp params from sysfs, by
 leaking mem.

e180a6b7759a "param: fix charp parameters set via sysfs" fixed the case
where charp parameters written via sysfs were freed, leaving drivers
accessing random memory.

Unfortunately, storing a flag in the kparam struct was a bad idea: it's
rodata so setting it causes an oops on some archs.  But that's not all:

1) module_param_array() on charp doesn't work reliably, since we use an
   uninitialized temporary struct kernel_param.
2) there's a fundamental race if a module uses this parameter and then
   it's changed: they will still access the old, freed, memory.

The simplest fix (ie. for 2.6.32) is to never free the memory.  This
prevents all these problems, at cost of a memory leak.  In practice, there
are only 18 places where a charp is writable via sysfs, and all are
root-only writable.

Reported-by: Takashi Iwai <tiwai@suse.de>
Cc: Sitsofe Wheeler <sitsofe@yahoo.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@kernel.org
---
 include/linux/moduleparam.h |  1 -
 kernel/params.c             | 10 +---------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6547c3cdbc4c..82a9124f7d75 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -37,7 +37,6 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp);
 typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
 /* Flag bits for kernel_param.flags */
-#define KPARAM_KMALLOCED	1
 #define KPARAM_ISBOOL		2
 
 struct kernel_param {
diff --git a/kernel/params.c b/kernel/params.c
index 9da58eabdcb2..95ef27cf8e82 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -218,13 +218,9 @@ int param_set_charp(const char *val, struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	if (kp->flags & KPARAM_KMALLOCED)
-		kfree(*(char **)kp->arg);
-
 	/* This is a hack.  We can't need to strdup in early boot, and we
 	 * don't need to; this mangled commandline is preserved. */
 	if (slab_is_available()) {
-		kp->flags |= KPARAM_KMALLOCED;
 		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
 		if (!kp->arg)
 			return -ENOMEM;
@@ -605,11 +601,7 @@ void module_param_sysfs_remove(struct module *mod)
 
 void destroy_params(const struct kernel_param *params, unsigned num)
 {
-	unsigned int i;
-
-	for (i = 0; i < num; i++)
-		if (params[i].flags & KPARAM_KMALLOCED)
-			kfree(*(char **)params[i].arg);
+	/* FIXME: This should free kmalloced charp parameters.  It doesn't. */
 }
 
 static void __init kernel_add_sysfs_param(const char *name,
-- 
cgit v1.2.3


From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 28 Oct 2009 12:26:39 -0700
Subject: tpm: fix header for modular build

Fix build for TCG_TPM=m.  Header file doesn't handle this
and incorrectly builds stubs.

drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read'
include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here
drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend'
include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here

Repairs linux-next's

commit d6ba452128178091dab7a04d54f7e66fdc32fb39
Author: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date:   Mon Oct 26 09:26:18 2009 -0400

    tpm add default function definitions

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/tpm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8eaa8f83effb..ac5d1c1285d9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -27,7 +27,7 @@
  */
 #define	TPM_ANY_NUM 0xFFFF
 
-#if defined(CONFIG_TCG_TPM)
+#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
-- 
cgit v1.2.3


From df5c79452f26f2a3d0883a213102515cfeb7aae9 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Wed, 28 Oct 2009 18:24:35 +0000
Subject: net: Add ndo_fcoe_get_wwn to net_device_ops

Add ndo_fcoe_get_wwn so Fiber Channel over Ethernet (FCoE) can make use of
the provided World Wide Port Name (WWPN) and World Wide Node Name (WWNN)
from the underlying network interface driver.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7c227d7cb98..656110a46e96 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -635,6 +635,10 @@ struct net_device_ops {
 						      unsigned int sgc);
 	int			(*ndo_fcoe_ddp_done)(struct net_device *dev,
 						     u16 xid);
+#define NETDEV_FCOE_WWNN 0
+#define NETDEV_FCOE_WWPN 1
+	int			(*ndo_fcoe_get_wwn)(struct net_device *dev,
+						    u64 *wwn, int type);
 #endif
 };
 
-- 
cgit v1.2.3


From 1aba721eba1d84a2defce45b950272cee1e6c72a Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@codefidence.com>
Date: Wed, 28 Oct 2009 04:15:24 +0000
Subject: Add the no SACK route option feature

Implement querying and acting upon the no sack bit in the features
field.

Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Sigend-off-by: Ori Finkelman <ori@comsleep.com>
Sigend-off-by: Yony Amit <yony@comsleep.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 +-
 net/ipv4/tcp_input.c      | 3 ++-
 net/ipv4/tcp_output.c     | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index adf2068d12b5..9c802a6b04d3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -377,7 +377,7 @@ enum
 #define RTAX_MAX (__RTAX_MAX - 1)
 
 #define RTAX_FEATURE_ECN	0x00000001
-#define RTAX_FEATURE_SACK	0x00000002
+#define RTAX_FEATURE_NO_SACK	0x00000002
 #define RTAX_FEATURE_TIMESTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7625005486d..5fb25f977451 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3763,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-				    !estab && sysctl_tcp_sack) {
+				    !estab && sysctl_tcp_sack &&
+				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
 					opt_rx->sack_ok = 1;
 					tcp_sack_reset(opt_rx);
 				}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2e2eb74ac4cc..b35802af3c4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned size = 0;
+	struct dst_entry *dst = __sk_dst_get(sk);
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -498,7 +499,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		opts->options |= OPTION_WSCALE;
 		size += TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(sysctl_tcp_sack)) {
+	if (likely(sysctl_tcp_sack &&
+		   !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
 		opts->options |= OPTION_SACK_ADVERTISE;
 		if (unlikely(!(OPTION_TS & opts->options)))
 			size += TCPOLEN_SACKPERM_ALIGNED;
-- 
cgit v1.2.3


From cda42ebd67ee5fdf09d7057b5a4584d36fe8a335 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@codefidence.com>
Date: Wed, 28 Oct 2009 04:15:25 +0000
Subject: Allow disabling TCP timestamp options per route

Implement querying and acting upon the no timestamp bit in the feature
field.

Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Sigend-off-by: Ori Finkelman <ori@comsleep.com>
Sigend-off-by: Yony Amit <yony@comsleep.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 +-
 net/ipv4/tcp_input.c      | 3 ++-
 net/ipv4/tcp_output.c     | 8 ++++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9c802a6b04d3..2ab8c758b46c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -378,7 +378,7 @@ enum
 
 #define RTAX_FEATURE_ECN	0x00000001
 #define RTAX_FEATURE_NO_SACK	0x00000002
-#define RTAX_FEATURE_TIMESTAMP	0x00000004
+#define RTAX_FEATURE_NO_TSTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
 
 struct rta_session
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5fb25f977451..6097491aa9fc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3755,7 +3755,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 			case TCPOPT_TIMESTAMP:
 				if ((opsize == TCPOLEN_TIMESTAMP) &&
 				    ((estab && opt_rx->tstamp_ok) ||
-				     (!estab && sysctl_tcp_timestamps))) {
+				     (!estab && sysctl_tcp_timestamps &&
+				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
 					opt_rx->saw_tstamp = 1;
 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b35802af3c4c..8819eba8ebb8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -488,7 +488,9 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	opts->mss = tcp_advertise_mss(sk);
 	size += TCPOLEN_MSS_ALIGNED;
 
-	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
+	if (likely(sysctl_tcp_timestamps &&
+		   !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
+		   *md5 == NULL)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = tp->rx_opt.ts_recent;
@@ -2317,7 +2319,9 @@ static void tcp_connect_init(struct sock *sk)
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
 	 */
 	tp->tcp_header_len = sizeof(struct tcphdr) +
-		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+		(sysctl_tcp_timestamps &&
+		(!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
+		  TCPOLEN_TSTAMP_ALIGNED : 0));
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
-- 
cgit v1.2.3


From 345cda2fd695534be5a4494f1b59da9daed33663 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@codefidence.com>
Date: Wed, 28 Oct 2009 04:15:26 +0000
Subject: Allow to turn off TCP window scale opt per route

Add and use no window scale bit in the features field.

Note that this is not the same as setting a window scale of 0
as would happen with window limit on route.

Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Sigend-off-by: Ori Finkelman <ori@comsleep.com>
Sigend-off-by: Yony Amit <yony@comsleep.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 net/ipv4/tcp_input.c      | 3 ++-
 net/ipv4/tcp_output.c     | 6 ++++--
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2ab8c758b46c..6784b342cbbf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -380,6 +380,7 @@ enum
 #define RTAX_FEATURE_NO_SACK	0x00000002
 #define RTAX_FEATURE_NO_TSTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
+#define RTAX_FEATURE_NO_WSCALE	0x00000010
 
 struct rta_session
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6097491aa9fc..393c56921dcb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3739,7 +3739,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_WINDOW:
 				if (opsize == TCPOLEN_WINDOW && th->syn &&
-				    !estab && sysctl_tcp_window_scaling) {
+				    !estab && sysctl_tcp_window_scaling &&
+				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
 					__u8 snd_wscale = *(__u8 *)ptr;
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8819eba8ebb8..616c686ca253 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -496,7 +496,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		opts->tsecr = tp->rx_opt.ts_recent;
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
-	if (likely(sysctl_tcp_window_scaling)) {
+	if (likely(sysctl_tcp_window_scaling &&
+		   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
 		opts->ws = tp->rx_opt.rcv_wscale;
 		opts->options |= OPTION_WSCALE;
 		size += TCPOLEN_WSCALE_ALIGNED;
@@ -2347,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk)
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
-				  sysctl_tcp_window_scaling,
+				  (sysctl_tcp_window_scaling &&
+				   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
 				  &rcv_wscale);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
-- 
cgit v1.2.3


From dc343475ed062e13fc260acccaab91d7d80fd5b2 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@codefidence.com>
Date: Wed, 28 Oct 2009 04:15:27 +0000
Subject: Allow disabling of DSACK TCP option per route

Add and use no DSCAK bit in the features field.

Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Sigend-off-by: Ori Finkelman <ori@comsleep.com>
Sigend-off-by: Yony Amit <yony@comsleep.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 net/ipv4/tcp_input.c      | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6784b342cbbf..e78b60cd65a4 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -381,6 +381,7 @@ enum
 #define RTAX_FEATURE_NO_TSTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
 #define RTAX_FEATURE_NO_WSCALE	0x00000010
+#define RTAX_FEATURE_NO_DSACK	0x00000020
 
 struct rta_session
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 393c56921dcb..ba0eab65fe80 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4080,8 +4080,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
+	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4110,13 +4112,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
+		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
-- 
cgit v1.2.3


From fb699dfd426a189fe33b91586c15176a75c8aed0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Oct 2009 19:18:49 +0000
Subject: net: Introduce dev_get_by_index_rcu()

Some workloads hit dev_base_lock rwlock pretty hard.
We can use RCU lookups to avoid touching this rwlock.

netdevices are already freed after a RCU grace period, so this patch
adds no penalty at device dismantle time.

dev_ifname() converted to dev_get_by_index_rcu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 48 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 39 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 656110a46e96..ffc3106cc037 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1139,6 +1139,7 @@ extern void		netdev_resync_ops(struct net_device *dev);
 extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
 extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
+extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
 extern int		dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
 extern int		netpoll_trap(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09551cc143a9..68a1bb68b5a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -214,12 +214,15 @@ static int list_netdevice(struct net_device *dev)
 	write_lock_bh(&dev_base_lock);
 	list_add_tail(&dev->dev_list, &net->dev_base_head);
 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
-	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+	hlist_add_head_rcu(&dev->index_hlist,
+			   dev_index_hash(net, dev->ifindex));
 	write_unlock_bh(&dev_base_lock);
 	return 0;
 }
 
-/* Device list removal */
+/* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
 static void unlist_netdevice(struct net_device *dev)
 {
 	ASSERT_RTNL();
@@ -228,7 +231,7 @@ static void unlist_netdevice(struct net_device *dev)
 	write_lock_bh(&dev_base_lock);
 	list_del(&dev->dev_list);
 	hlist_del(&dev->name_hlist);
-	hlist_del(&dev->index_hlist);
+	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
 }
 
@@ -646,6 +649,31 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 }
 EXPORT_SYMBOL(__dev_get_by_index);
 
+/**
+ *	dev_get_by_index_rcu - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_index_hash(net, ifindex);
+
+	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+		if (dev->ifindex == ifindex)
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_index_rcu);
+
 
 /**
  *	dev_get_by_index - find a device by its ifindex
@@ -662,11 +690,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_index(net, ifindex);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
 		dev_hold(dev);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_index);
@@ -2939,15 +2967,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
 	if (!dev) {
-		read_unlock(&dev_base_lock);
+		rcu_read_unlock();
 		return -ENODEV;
 	}
 
 	strcpy(ifr.ifr_name, dev->name);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
 		return -EFAULT;
-- 
cgit v1.2.3


From 38bfd8f5bec496e8e0db8849e01c99a33479418a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 29 Oct 2009 02:59:18 -0700
Subject: net,socket: introduce DECLARE_SOCKADDR helper to catch overflow at
 build time

proto_ops->getname implies copying protocol specific data
into storage unit (particulary to __kernel_sockaddr_storage).
So when we implement new protocol support we should keep such
a detail in mind (which is easy to forget about).

Lets introduce DECLARE_SOCKADDR helper which check if
storage unit is not overfowed at build time.

Eventually inet_getname is switched to use DECLARE_SOCKADDR
(to show example of usage).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h    | 3 +++
 include/linux/socket.h | 3 +++
 net/ipv4/af_inet.c     | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index b42bb60fe92f..4da9d571b053 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -199,6 +199,9 @@ struct proto_ops {
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 };
 
+#define DECLARE_SOCKADDR(type, dst, src)	\
+	type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; })
+
 struct net_proto_family {
 	int		family;
 	int		(*create)(struct net *net, struct socket *sock, int protocol);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 59966f12990c..7b3aae2052a6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -24,6 +24,9 @@ struct __kernel_sockaddr_storage {
 #include <linux/types.h>		/* pid_t			*/
 #include <linux/compiler.h>		/* __user			*/
 
+#define __sockaddr_check_size(size)	\
+	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
+
 #ifdef __KERNEL__
 # ifdef CONFIG_PROC_FS
 struct seq_file;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 04a14b1600ac..538e84d0bcba 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -685,7 +685,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
-	struct sockaddr_in *sin	= (struct sockaddr_in *)uaddr;
+	DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
 
 	sin->sin_family = AF_INET;
 	if (peer) {
-- 
cgit v1.2.3


From b9d128f1088ea5245109dfc9bbceb128b6371a77 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 29 Oct 2009 13:59:26 +0100
Subject: block: move bdi/address_space unplug functions to backing-dev.h

There's nothing block related about them, the backing device
is used by things like NFS etc as well. This gets rid of the
need to protect such calls by CONFIG_BLOCK.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/aio.c                    |  1 +
 include/linux/backing-dev.h | 13 +++++++++++++
 include/linux/blkdev.h      | 13 -------------
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index cf0bef428f88..c30dfc006108 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
 #include <linux/aio_abi.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/backing-dev.h>
 #include <linux/uio.h>
 
 #define DEBUG 0
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index b449e738533a..fcbc26af00e4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -331,4 +331,17 @@ static inline int bdi_sched_wait(void *word)
 	return 0;
 }
 
+static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
+				       struct page *page)
+{
+	if (bdi && bdi->unplug_io_fn)
+		bdi->unplug_io_fn(bdi, page);
+}
+
+static inline void blk_run_address_space(struct address_space *mapping)
+{
+	if (mapping)
+		blk_run_backing_dev(mapping->backing_dev_info, NULL);
+}
+
 #endif		/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 221cecd86bd3..39c601f783a0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -823,19 +823,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 	return bdev->bd_disk->queue;
 }
 
-static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
-				       struct page *page)
-{
-	if (bdi && bdi->unplug_io_fn)
-		bdi->unplug_io_fn(bdi, page);
-}
-
-static inline void blk_run_address_space(struct address_space *mapping)
-{
-	if (mapping)
-		blk_run_backing_dev(mapping->backing_dev_info, NULL);
-}
-
 /*
  * blk_rq_pos()			: the current sector
  * blk_rq_bytes()		: bytes left in the entire request
-- 
cgit v1.2.3


From 64ef291f46d795917f32a0f5975e2b76f6fe206a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:12 +0900
Subject: percpu: make alloc_percpu() handle array types

alloc_percpu() couldn't handle array types like "int [100]" due to the
way return type was casted.  Fix it by using typeof() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3d9ba92b104f..519d6876590f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -164,8 +164,8 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 
 #endif /* CONFIG_SMP */
 
-#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
-						       __alignof__(type))
+#define alloc_percpu(type)	\
+	(typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
 
 /*
  * Optional methods for optimized non-lvalue per-cpu variable access.
-- 
cgit v1.2.3


From 0f5e4816dbf38ce9488e611ca2296925c1e90d5e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:12 +0900
Subject: percpu: remove some sparse warnings

Make the following changes to remove some sparse warnings.

* Make DEFINE_PER_CPU_SECTION() declare __pcpu_unique_* before
  defining it.

* Annotate pcpu_extend_area_map() that it is entered with pcpu_lock
  held, releases it and then reacquires it.

* Make percpu related macros use unique nested variable names.

* While at it, add pcpu prefix to __size_call[_return]() macros as
  to-be-implemented sparse annotations will add percpu specific stuff
  to these macros.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/percpu.h | 26 +++++++++++------------
 include/linux/percpu-defs.h   |  1 +
 include/linux/percpu.h        | 48 +++++++++++++++++++++----------------------
 mm/percpu.c                   |  1 +
 4 files changed, 39 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 8b5ec19bdef4..0c44196b78ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -74,31 +74,31 @@ extern void __bad_percpu_size(void);
 
 #define percpu_to_op(op, var, val)			\
 do {							\
-	typedef typeof(var) T__;			\
+	typedef typeof(var) pto_T__;			\
 	if (0) {					\
-		T__ tmp__;				\
-		tmp__ = (val);				\
+		pto_T__ pto_tmp__;			\
+		pto_tmp__ = (val);			\
 	}						\
 	switch (sizeof(var)) {				\
 	case 1:						\
 		asm(op "b %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "qi" ((T__)(val)));		\
+		    : "qi" ((pto_T__)(val)));		\
 		break;					\
 	case 2:						\
 		asm(op "w %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "ri" ((T__)(val)));		\
+		    : "ri" ((pto_T__)(val)));		\
 		break;					\
 	case 4:						\
 		asm(op "l %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "ri" ((T__)(val)));		\
+		    : "ri" ((pto_T__)(val)));		\
 		break;					\
 	case 8:						\
 		asm(op "q %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "re" ((T__)(val)));		\
+		    : "re" ((pto_T__)(val)));		\
 		break;					\
 	default: __bad_percpu_size();			\
 	}						\
@@ -106,31 +106,31 @@ do {							\
 
 #define percpu_from_op(op, var, constraint)		\
 ({							\
-	typeof(var) ret__;				\
+	typeof(var) pfo_ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
 		asm(op "b "__percpu_arg(1)",%0"		\
-		    : "=q" (ret__)			\
+		    : "=q" (pfo_ret__)			\
 		    : constraint);			\
 		break;					\
 	case 2:						\
 		asm(op "w "__percpu_arg(1)",%0"		\
-		    : "=r" (ret__)			\
+		    : "=r" (pfo_ret__)			\
 		    : constraint);			\
 		break;					\
 	case 4:						\
 		asm(op "l "__percpu_arg(1)",%0"		\
-		    : "=r" (ret__)			\
+		    : "=r" (pfo_ret__)			\
 		    : constraint);			\
 		break;					\
 	case 8:						\
 		asm(op "q "__percpu_arg(1)",%0"		\
-		    : "=r" (ret__)			\
+		    : "=r" (pfo_ret__)			\
 		    : constraint);			\
 		break;					\
 	default: __bad_percpu_size();			\
 	}						\
-	ret__;						\
+	pfo_ret__;					\
 })
 
 /*
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 9bd03193ecd4..5a5d6ce4bd55 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -60,6 +60,7 @@
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
+	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
 	__typeof__(type) per_cpu__##name
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 519d6876590f..522f421ec213 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -226,20 +226,20 @@ do {									\
 
 extern void __bad_size_call_parameter(void);
 
-#define __size_call_return(stem, variable)				\
-({	typeof(variable) ret__;						\
+#define __pcpu_size_call_return(stem, variable)				\
+({	typeof(variable) pscr_ret__;					\
 	switch(sizeof(variable)) {					\
-	case 1: ret__ = stem##1(variable);break;			\
-	case 2: ret__ = stem##2(variable);break;			\
-	case 4: ret__ = stem##4(variable);break;			\
-	case 8: ret__ = stem##8(variable);break;			\
+	case 1: pscr_ret__ = stem##1(variable);break;			\
+	case 2: pscr_ret__ = stem##2(variable);break;			\
+	case 4: pscr_ret__ = stem##4(variable);break;			\
+	case 8: pscr_ret__ = stem##8(variable);break;			\
 	default:							\
 		__bad_size_call_parameter();break;			\
 	}								\
-	ret__;								\
+	pscr_ret__;							\
 })
 
-#define __size_call(stem, variable, ...)				\
+#define __pcpu_size_call(stem, variable, ...)				\
 do {									\
 	switch(sizeof(variable)) {					\
 		case 1: stem##1(variable, __VA_ARGS__);break;		\
@@ -299,7 +299,7 @@ do {									\
 # ifndef this_cpu_read_8
 #  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
 # endif
-# define this_cpu_read(pcp)	__size_call_return(this_cpu_read_, (pcp))
+# define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
 #endif
 
 #define _this_cpu_generic_to_op(pcp, val, op)				\
@@ -322,7 +322,7 @@ do {									\
 # ifndef this_cpu_write_8
 #  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
 # endif
-# define this_cpu_write(pcp, val)	__size_call(this_cpu_write_, (pcp), (val))
+# define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
 #endif
 
 #ifndef this_cpu_add
@@ -338,7 +338,7 @@ do {									\
 # ifndef this_cpu_add_8
 #  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
 # endif
-# define this_cpu_add(pcp, val)		__size_call(this_cpu_add_, (pcp), (val))
+# define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
 #endif
 
 #ifndef this_cpu_sub
@@ -366,7 +366,7 @@ do {									\
 # ifndef this_cpu_and_8
 #  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
 # endif
-# define this_cpu_and(pcp, val)		__size_call(this_cpu_and_, (pcp), (val))
+# define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
 #endif
 
 #ifndef this_cpu_or
@@ -382,7 +382,7 @@ do {									\
 # ifndef this_cpu_or_8
 #  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
 # endif
-# define this_cpu_or(pcp, val)		__size_call(this_cpu_or_, (pcp), (val))
+# define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
 #endif
 
 #ifndef this_cpu_xor
@@ -398,7 +398,7 @@ do {									\
 # ifndef this_cpu_xor_8
 #  define this_cpu_xor_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), ^=)
 # endif
-# define this_cpu_xor(pcp, val)		__size_call(this_cpu_or_, (pcp), (val))
+# define this_cpu_xor(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
 #endif
 
 /*
@@ -428,7 +428,7 @@ do {									\
 # ifndef __this_cpu_read_8
 #  define __this_cpu_read_8(pcp)	(*__this_cpu_ptr(&(pcp)))
 # endif
-# define __this_cpu_read(pcp)	__size_call_return(__this_cpu_read_, (pcp))
+# define __this_cpu_read(pcp)	__pcpu_size_call_return(__this_cpu_read_, (pcp))
 #endif
 
 #define __this_cpu_generic_to_op(pcp, val, op)				\
@@ -449,7 +449,7 @@ do {									\
 # ifndef __this_cpu_write_8
 #  define __this_cpu_write_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), =)
 # endif
-# define __this_cpu_write(pcp, val)	__size_call(__this_cpu_write_, (pcp), (val))
+# define __this_cpu_write(pcp, val)	__pcpu_size_call(__this_cpu_write_, (pcp), (val))
 #endif
 
 #ifndef __this_cpu_add
@@ -465,7 +465,7 @@ do {									\
 # ifndef __this_cpu_add_8
 #  define __this_cpu_add_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), +=)
 # endif
-# define __this_cpu_add(pcp, val)	__size_call(__this_cpu_add_, (pcp), (val))
+# define __this_cpu_add(pcp, val)	__pcpu_size_call(__this_cpu_add_, (pcp), (val))
 #endif
 
 #ifndef __this_cpu_sub
@@ -493,7 +493,7 @@ do {									\
 # ifndef __this_cpu_and_8
 #  define __this_cpu_and_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), &=)
 # endif
-# define __this_cpu_and(pcp, val)	__size_call(__this_cpu_and_, (pcp), (val))
+# define __this_cpu_and(pcp, val)	__pcpu_size_call(__this_cpu_and_, (pcp), (val))
 #endif
 
 #ifndef __this_cpu_or
@@ -509,7 +509,7 @@ do {									\
 # ifndef __this_cpu_or_8
 #  define __this_cpu_or_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), |=)
 # endif
-# define __this_cpu_or(pcp, val)	__size_call(__this_cpu_or_, (pcp), (val))
+# define __this_cpu_or(pcp, val)	__pcpu_size_call(__this_cpu_or_, (pcp), (val))
 #endif
 
 #ifndef __this_cpu_xor
@@ -525,7 +525,7 @@ do {									\
 # ifndef __this_cpu_xor_8
 #  define __this_cpu_xor_8(pcp, val)	__this_cpu_generic_to_op((pcp), (val), ^=)
 # endif
-# define __this_cpu_xor(pcp, val)	__size_call(__this_cpu_xor_, (pcp), (val))
+# define __this_cpu_xor(pcp, val)	__pcpu_size_call(__this_cpu_xor_, (pcp), (val))
 #endif
 
 /*
@@ -556,7 +556,7 @@ do {									\
 # ifndef irqsafe_cpu_add_8
 #  define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
 # endif
-# define irqsafe_cpu_add(pcp, val) __size_call(irqsafe_cpu_add_, (pcp), (val))
+# define irqsafe_cpu_add(pcp, val) __pcpu_size_call(irqsafe_cpu_add_, (pcp), (val))
 #endif
 
 #ifndef irqsafe_cpu_sub
@@ -584,7 +584,7 @@ do {									\
 # ifndef irqsafe_cpu_and_8
 #  define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
 # endif
-# define irqsafe_cpu_and(pcp, val) __size_call(irqsafe_cpu_and_, (val))
+# define irqsafe_cpu_and(pcp, val) __pcpu_size_call(irqsafe_cpu_and_, (val))
 #endif
 
 #ifndef irqsafe_cpu_or
@@ -600,7 +600,7 @@ do {									\
 # ifndef irqsafe_cpu_or_8
 #  define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
 # endif
-# define irqsafe_cpu_or(pcp, val) __size_call(irqsafe_cpu_or_, (val))
+# define irqsafe_cpu_or(pcp, val) __pcpu_size_call(irqsafe_cpu_or_, (val))
 #endif
 
 #ifndef irqsafe_cpu_xor
@@ -616,7 +616,7 @@ do {									\
 # ifndef irqsafe_cpu_xor_8
 #  define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
 # endif
-# define irqsafe_cpu_xor(pcp, val) __size_call(irqsafe_cpu_xor_, (val))
+# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val))
 #endif
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/percpu.c b/mm/percpu.c
index ec158bb5f86d..e2e80fc78601 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -365,6 +365,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
  * 0 if noop, 1 if successfully extended, -errno on failure.
  */
 static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
+	__releases(lock) __acquires(lock)
 {
 	int new_alloc;
 	int *new;
-- 
cgit v1.2.3


From 2eca40a8ccd4160dbfaa5cbd61038d921d0e5f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 26 Oct 2009 16:49:29 -0700
Subject: cpufreq: add cpufreq_get() stub for CONFIG_CPU_FREQ=n

When CONFIG_CPU_FREQ is disabled, cpufreq_get() needs a stub.  Used by kvm
(although it looks like a bit of the kvm code could be omitted when
CONFIG_CPU_FREQ is disabled).

arch/x86/built-in.o: In function `kvm_arch_init':
(.text+0x10de7): undefined reference to `cpufreq_get'

(Needed in linux-next's KVM tree, but it's correct in 2.6.32).

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Tested-by: Eric Paris <eparis@redhat.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpufreq.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 44717eb47639..79a2340d83cd 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -291,8 +291,15 @@ struct global_attr {
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 int cpufreq_update_policy(unsigned int cpu);
 
+#ifdef CONFIG_CPU_FREQ
 /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */
 unsigned int cpufreq_get(unsigned int cpu);
+#else
+static inline unsigned int cpufreq_get(unsigned int cpu)
+{
+	return 0;
+}
+#endif
 
 /* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */
 #ifdef CONFIG_CPU_FREQ
-- 
cgit v1.2.3


From 0a1b71b4008d332e57b5605a8228ea7aa96687e8 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 26 Oct 2009 16:49:37 -0700
Subject: strstrip(): mark as as must_check

strstrip() can return a modified value of its input argument, when
removing elading whitesapce.  So it is surely bug for this function's
return value to be ignored.  The caller is probably going to use the
incorrect original pointer.

So mark it __must_check to prevent this frm happening (as it has before).

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 489019ef1694..b8508868d5ad 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -62,7 +62,7 @@ extern char * strnchr(const char *, size_t, int);
 #ifndef __HAVE_ARCH_STRRCHR
 extern char * strrchr(const char *,int);
 #endif
-extern char * strstrip(char *);
+extern char * __must_check strstrip(char *);
 #ifndef __HAVE_ARCH_STRSTR
 extern char * strstr(const char *,const char *);
 #endif
-- 
cgit v1.2.3


From 1b62cbf2140df510a56d38b9d49df2aae95cd0d2 Mon Sep 17 00:00:00 2001
From: "Krauth.Julien" <Krauth.Julien@addi-data.com>
Date: Mon, 26 Oct 2009 16:50:04 -0700
Subject: serial: add ADDI-DATA GmbH PCI-Express communication cards in
 8250_pci.c and pci_ids.h

Add support for ADDI-DATA GmbH PCI-Express communication cards:

APCIe-7300
APCIe-7420
APCIe-7500
APCIe-7800

Warning: 8250_pci.c depends on pci_ids.h. 8250_pci.c

Signed-off-by: Krauth Julien <Krauth.Julien@addi-data.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h   |  4 +++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index e7108e75653d..329469369908 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -1566,6 +1566,10 @@ enum pci_board_num_t {
 	pbn_ni8430_4,
 	pbn_ni8430_8,
 	pbn_ni8430_16,
+	pbn_ADDIDATA_PCIe_1_3906250,
+	pbn_ADDIDATA_PCIe_2_3906250,
+	pbn_ADDIDATA_PCIe_4_3906250,
+	pbn_ADDIDATA_PCIe_8_3906250,
 };
 
 /*
@@ -2185,6 +2189,37 @@ static struct pciserial_board pci_boards[] __devinitdata = {
 		.uart_offset	= 0x10,
 		.first_offset	= 0x800,
 	},
+	/*
+	 * ADDI-DATA GmbH PCI-Express communication cards <info@addi-data.com>
+	 */
+	[pbn_ADDIDATA_PCIe_1_3906250] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 1,
+		.base_baud	= 3906250,
+		.uart_offset	= 0x200,
+		.first_offset	= 0x1000,
+	},
+	[pbn_ADDIDATA_PCIe_2_3906250] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 2,
+		.base_baud	= 3906250,
+		.uart_offset	= 0x200,
+		.first_offset	= 0x1000,
+	},
+	[pbn_ADDIDATA_PCIe_4_3906250] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 4,
+		.base_baud	= 3906250,
+		.uart_offset	= 0x200,
+		.first_offset	= 0x1000,
+	},
+	[pbn_ADDIDATA_PCIe_8_3906250] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 8,
+		.base_baud	= 3906250,
+		.uart_offset	= 0x200,
+		.first_offset	= 0x1000,
+	},
 };
 
 static const struct pci_device_id softmodem_blacklist[] = {
@@ -3556,6 +3591,38 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0,
 		pbn_b0_8_115200 },
 
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCIe7500,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_ADDIDATA_PCIe_4_3906250 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCIe7420,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_ADDIDATA_PCIe_2_3906250 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCIe7300,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_ADDIDATA_PCIe_1_3906250 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCIe7800,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_ADDIDATA_PCIe_8_3906250 },
+
 	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9835,
 		PCI_VENDOR_ID_IBM, 0x0299,
 		0, 0, pbn_b0_bt_2_115200 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 86257a412732..df48628d870b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2160,6 +2160,10 @@
 #define PCI_DEVICE_ID_ADDIDATA_APCI7420_3      0x700D
 #define PCI_DEVICE_ID_ADDIDATA_APCI7300_3      0x700E
 #define PCI_DEVICE_ID_ADDIDATA_APCI7800_3      0x700F
+#define PCI_DEVICE_ID_ADDIDATA_APCIe7300       0x7010
+#define PCI_DEVICE_ID_ADDIDATA_APCIe7420       0x7011
+#define PCI_DEVICE_ID_ADDIDATA_APCIe7500       0x7012
+#define PCI_DEVICE_ID_ADDIDATA_APCIe7800       0x7013
 
 #define PCI_VENDOR_ID_PDC		0x15e9
 
-- 
cgit v1.2.3


From c68d2b1594548cda7f6dbac6a4d9d30a9b01558c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 26 Oct 2009 16:50:05 -0700
Subject: 8250_pci: add IBM Saturn serial card

The IBM Saturn serial card has only one port. Without that fixup,
the kernel thinks it has two, which confuses userland setup and
admin tools as well.

[akpm@linux-foundation.org: fix pci-ids.h layout]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Alan Cox <alan@linux.intel.com>
Cc: Michael Reed <mreed10@us.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 11 +++++++++++
 include/linux/pci_ids.h   |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 329469369908..42e8550cd2b6 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -1561,6 +1561,7 @@ enum pci_board_num_t {
 	pbn_exar_XR17C152,
 	pbn_exar_XR17C154,
 	pbn_exar_XR17C158,
+	pbn_exar_ibm_saturn,
 	pbn_pasemi_1682M,
 	pbn_ni8430_2,
 	pbn_ni8430_4,
@@ -2150,6 +2151,13 @@ static struct pciserial_board pci_boards[] __devinitdata = {
 		.base_baud	= 921600,
 		.uart_offset	= 0x200,
 	},
+	[pbn_exar_ibm_saturn] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 1,
+		.base_baud	= 921600,
+		.uart_offset	= 0x200,
+	},
+
 	/*
 	 * PA Semi PWRficient PA6T-1682M on-chip UART
 	 */
@@ -2684,6 +2692,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_SUBVENDOR_ID_CONNECT_TECH,
 		PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485, 0, 0,
 		pbn_b0_8_1843200_200 },
+	{	PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152,
+		PCI_VENDOR_ID_IBM, PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT,
+		0, 0, pbn_exar_ibm_saturn },
 
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_U530,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index df48628d870b..b0f0f3851cd4 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -482,6 +482,9 @@
 #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361
 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL	0x252
 
+#define PCI_SUBVENDOR_ID_IBM		0x1014
+#define PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT	0x03d4
+
 #define PCI_VENDOR_ID_UNISYS		0x1018
 #define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C
 
-- 
cgit v1.2.3


From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Thu, 29 Oct 2009 11:40:17 -0500
Subject: define convenient securebits masks for prctl users (v2)

Hi James, would you mind taking the following into
security-testing?

The securebits are used by passing them to prctl with the
PR_{S,G}ET_SECUREBITS commands.  But the defines must be
shifted to be used in prctl, which begs to be confused and
misused by userspace.  So define some more convenient
values for userspace to specify.  This way userspace does

	prctl(PR_SET_SECUREBITS, SECBIT_NOROOT);

instead of

	prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT);

(Thanks to Michael for the idea)

This patch also adds include/linux/securebits to the installed headers.
Then perhaps it can be included by glibc's sys/prctl.h.

Changelog:
	Oct 29: Stephen Rothwell points out that issecure can
		be under __KERNEL__.
	Oct 14: (Suggestions by Michael Kerrisk):
		1. spell out SETUID in SECBIT_NO_SETUID*
		2. SECBIT_X_LOCKED does not imply SECBIT_X
		3. add definitions for keepcaps
        Oct 14: As suggested by Michael Kerrisk, don't
		use SB_* as that convention is already in
		use.  Use SECBIT_ prefix instead.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/Kbuild       |  1 +
 include/linux/securebits.h | 24 ++++++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index cff4a101f266..ffcdb9b509db 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -329,6 +329,7 @@ unifdef-y += scc.h
 unifdef-y += sched.h
 unifdef-y += screen_info.h
 unifdef-y += sdla.h
+unifdef-y += securebits.h
 unifdef-y += selinux_netlink.h
 unifdef-y += sem.h
 unifdef-y += serial_core.h
diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index d2c5ed845bcc..33406174cbe8 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -1,6 +1,15 @@
 #ifndef _LINUX_SECUREBITS_H
 #define _LINUX_SECUREBITS_H 1
 
+/* Each securesetting is implemented using two bits. One bit specifies
+   whether the setting is on or off. The other bit specify whether the
+   setting is locked or not. A setting which is locked cannot be
+   changed from user-level. */
+#define issecure_mask(X)	(1 << (X))
+#ifdef __KERNEL__
+#define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
+#endif
+
 #define SECUREBITS_DEFAULT 0x00000000
 
 /* When set UID 0 has no special privileges. When unset, we support
@@ -12,6 +21,9 @@
 #define SECURE_NOROOT			0
 #define SECURE_NOROOT_LOCKED		1  /* make bit-0 immutable */
 
+#define SECBIT_NOROOT		(issecure_mask(SECURE_NOROOT))
+#define SECBIT_NOROOT_LOCKED	(issecure_mask(SECURE_NOROOT_LOCKED))
+
 /* When set, setuid to/from uid 0 does not trigger capability-"fixup".
    When unset, to provide compatiblility with old programs relying on
    set*uid to gain/lose privilege, transitions to/from uid 0 cause
@@ -19,6 +31,10 @@
 #define SECURE_NO_SETUID_FIXUP		2
 #define SECURE_NO_SETUID_FIXUP_LOCKED	3  /* make bit-2 immutable */
 
+#define SECBIT_NO_SETUID_FIXUP	(issecure_mask(SECURE_NO_SETUID_FIXUP))
+#define SECBIT_NO_SETUID_FIXUP_LOCKED \
+			(issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED))
+
 /* When set, a process can retain its capabilities even after
    transitioning to a non-root user (the set-uid fixup suppressed by
    bit 2). Bit-4 is cleared when a process calls exec(); setting both
@@ -27,12 +43,8 @@
 #define SECURE_KEEP_CAPS		4
 #define SECURE_KEEP_CAPS_LOCKED		5  /* make bit-4 immutable */
 
-/* Each securesetting is implemented using two bits. One bit specifies
-   whether the setting is on or off. The other bit specify whether the
-   setting is locked or not. A setting which is locked cannot be
-   changed from user-level. */
-#define issecure_mask(X)	(1 << (X))
-#define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
+#define SECBIT_KEEP_CAPS	(issecure_mask(SECURE_KEEP_CAPS))
+#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED))
 
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
-- 
cgit v1.2.3


From 5b252f0c2f98df21fadf0f6cf189b87a0b938228 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 29 Oct 2009 07:17:09 +0000
Subject: gro: Name the GRO result enumeration type

This clarifies which return and parameter types are GRO result codes
and not RX result codes.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++----
 net/8021q/vlan_core.c     |  5 +++--
 net/core/dev.c            | 19 ++++++++++++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ffc3106cc037..6e777efe149e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -348,13 +348,14 @@ enum
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 };
 
-enum {
+enum gro_result {
 	GRO_MERGED,
 	GRO_MERGED_FREE,
 	GRO_HELD,
 	GRO_NORMAL,
 	GRO_DROP,
 };
+typedef enum gro_result gro_result_t;
 
 extern void __napi_schedule(struct napi_struct *n);
 
@@ -1480,16 +1481,17 @@ extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
-extern int		dev_gro_receive(struct napi_struct *napi,
+extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
 					struct sk_buff *skb);
-extern int		napi_skb_finish(int ret, struct sk_buff *skb);
+extern int		napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
 extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_reuse_skb(struct napi_struct *napi,
 				       struct sk_buff *skb);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
 extern int		napi_frags_finish(struct napi_struct *napi,
-					  struct sk_buff *skb, int ret);
+					  struct sk_buff *skb,
+					  gro_result_t ret);
 extern struct sk_buff *	napi_frags_skb(struct napi_struct *napi);
 extern int		napi_gro_frags(struct napi_struct *napi);
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 7f7de1a04de6..47a80d65c3b7 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -74,8 +74,9 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
 
-static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
-			   unsigned int vlan_tci, struct sk_buff *skb)
+static gro_result_t
+vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
+		unsigned int vlan_tci, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 68a1bb68b5a8..1dc13744684c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2476,7 +2476,7 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_type *ptype;
@@ -2484,7 +2484,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int same_flow;
 	int mac_len;
-	int ret;
+	enum gro_result ret;
 
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
@@ -2568,7 +2568,8 @@ normal:
 }
 EXPORT_SYMBOL(dev_gro_receive);
 
-static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static gro_result_t
+__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
@@ -2585,7 +2586,7 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	return dev_gro_receive(napi, skb);
 }
 
-int napi_skb_finish(int ret, struct sk_buff *skb)
+int napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	int err = NET_RX_SUCCESS;
 
@@ -2600,6 +2601,10 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
 	case GRO_MERGED_FREE:
 		kfree_skb(skb);
 		break;
+
+	case GRO_HELD:
+	case GRO_MERGED:
+		break;
 	}
 
 	return err;
@@ -2652,7 +2657,8 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+		      gro_result_t ret)
 {
 	int err = NET_RX_SUCCESS;
 
@@ -2674,6 +2680,9 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
+
+	case GRO_MERGED:
+		break;
 	}
 
 	return err;
-- 
cgit v1.2.3


From c7c4b3b6e976b95facbb723951bdcd554a3530a4 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 29 Oct 2009 21:36:53 -0700
Subject: gro: Change all receive functions to return GRO result codes

This will allow drivers to adjust their receive path dynamically
based on whether GRO is being applied successfully.

Currently all in-tree callers ignore the return values of these
functions and do not need to be changed.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   | 25 ++++++++++++++-----------
 include/linux/netdevice.h |  8 ++++----
 net/8021q/vlan_core.c     | 16 +++++++++-------
 net/core/dev.c            | 38 +++++++++++++++-----------------------
 4 files changed, 42 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 71a4870c09a9..153f6b9e722c 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -120,10 +120,12 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
 extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
-extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-			    unsigned int vlan_tci, struct sk_buff *skb);
-extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-			  unsigned int vlan_tci);
+extern gro_result_t
+vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
+		 unsigned int vlan_tci, struct sk_buff *skb);
+extern gro_result_t
+vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
+	       unsigned int vlan_tci);
 
 #else
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -150,17 +152,18 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int vlan_gro_receive(struct napi_struct *napi,
-				   struct vlan_group *grp,
-				   unsigned int vlan_tci, struct sk_buff *skb)
+static inline gro_result_t
+vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
+		 unsigned int vlan_tci, struct sk_buff *skb)
 {
-	return NET_RX_DROP;
+	return GRO_DROP;
 }
 
-static inline int vlan_gro_frags(struct napi_struct *napi,
-				 struct vlan_group *grp, unsigned int vlan_tci)
+static inline gro_result_t
+vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
+	       unsigned int vlan_tci)
 {
-	return NET_RX_DROP;
+	return GRO_DROP;
 }
 #endif
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6e777efe149e..193b637889f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1483,17 +1483,17 @@ extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
 extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
 					struct sk_buff *skb);
-extern int		napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
-extern int		napi_gro_receive(struct napi_struct *napi,
+extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
+extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_reuse_skb(struct napi_struct *napi,
 				       struct sk_buff *skb);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern int		napi_frags_finish(struct napi_struct *napi,
+extern gro_result_t	napi_frags_finish(struct napi_struct *napi,
 					  struct sk_buff *skb,
 					  gro_result_t ret);
 extern struct sk_buff *	napi_frags_skb(struct napi_struct *napi);
-extern int		napi_gro_frags(struct napi_struct *napi);
+extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
 {
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 47a80d65c3b7..8d5ca2ac4f8d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -102,11 +102,12 @@ drop:
 	return GRO_DROP;
 }
 
-int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-		     unsigned int vlan_tci, struct sk_buff *skb)
+gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
+			      unsigned int vlan_tci, struct sk_buff *skb)
 {
 	if (netpoll_rx_on(skb))
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci);
+		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
+			? GRO_DROP : GRO_NORMAL;
 
 	skb_gro_reset_offset(skb);
 
@@ -114,17 +115,18 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_receive);
 
-int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-		   unsigned int vlan_tci)
+gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
+			    unsigned int vlan_tci)
 {
 	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
-		return NET_RX_DROP;
+		return GRO_DROP;
 
 	if (netpoll_rx_on(skb)) {
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci);
+		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
+			? GRO_DROP : GRO_NORMAL;
 	}
 
 	return napi_frags_finish(napi, skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 1dc13744684c..631cc40da197 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2586,18 +2586,15 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	return dev_gro_receive(napi, skb);
 }
 
-int napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
-	int err = NET_RX_SUCCESS;
-
 	switch (ret) {
 	case GRO_NORMAL:
-		return netif_receive_skb(skb);
+		if (netif_receive_skb(skb))
+			ret = GRO_DROP;
+		break;
 
 	case GRO_DROP:
-		err = NET_RX_DROP;
-		/* fall through */
-
 	case GRO_MERGED_FREE:
 		kfree_skb(skb);
 		break;
@@ -2607,7 +2604,7 @@ int napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 		break;
 	}
 
-	return err;
+	return ret;
 }
 EXPORT_SYMBOL(napi_skb_finish);
 
@@ -2627,7 +2624,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(skb_gro_reset_offset);
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	skb_gro_reset_offset(skb);
 
@@ -2657,26 +2654,21 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
-		      gro_result_t ret)
+gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+			       gro_result_t ret)
 {
-	int err = NET_RX_SUCCESS;
-
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
 		skb->protocol = eth_type_trans(skb, napi->dev);
 
-		if (ret == GRO_NORMAL)
-			return netif_receive_skb(skb);
-
-		skb_gro_pull(skb, -ETH_HLEN);
+		if (ret == GRO_HELD)
+			skb_gro_pull(skb, -ETH_HLEN);
+		else if (netif_receive_skb(skb))
+			ret = GRO_DROP;
 		break;
 
 	case GRO_DROP:
-		err = NET_RX_DROP;
-		/* fall through */
-
 	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
@@ -2685,7 +2677,7 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 		break;
 	}
 
-	return err;
+	return ret;
 }
 EXPORT_SYMBOL(napi_frags_finish);
 
@@ -2726,12 +2718,12 @@ out:
 }
 EXPORT_SYMBOL(napi_frags_skb);
 
-int napi_gro_frags(struct napi_struct *napi)
+gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
-		return NET_RX_DROP;
+		return GRO_DROP;
 
 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
 }
-- 
cgit v1.2.3


From 14d18a81b5171d4433e41129619c75748b4f4d26 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 29 Oct 2009 00:10:37 +0000
Subject: net: fix kmemcheck annotations

struct sk_buff kmemcheck annotations enlarged this structure by 8/16 bytes

Fix this by moving 'protocol' inside flags1 bitfield,
and queue_mapping inside flags2 bitfield.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23ac66e6..6aebfceca3eb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -354,8 +354,8 @@ struct sk_buff {
 				ipvs_property:1,
 				peeked:1,
 				nf_trace:1;
+	__be16			protocol:16;
 	kmemcheck_bitfield_end(flags1);
-	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -367,7 +367,6 @@ struct sk_buff {
 #endif
 
 	int			iif;
-	__u16			queue_mapping;
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
@@ -376,6 +375,7 @@ struct sk_buff {
 #endif
 
 	kmemcheck_bitfield_begin(flags2);
+	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-- 
cgit v1.2.3


From 9d410c796067686b1e032d54ce475b7055537138 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 30 Oct 2009 05:03:53 +0000
Subject: net: fix sk_forward_alloc corruption

On UDP sockets, we must call skb_free_datagram() with socket locked,
or risk sk_forward_alloc corruption. This requirement is not respected
in SUNRPC.

Add a convenient helper, skb_free_datagram_locked() and use it in SUNRPC

Reported-by: Francis Moreau <francis.moro@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/datagram.c    | 10 +++++++++-
 net/ipv4/udp.c         |  4 +---
 net/ipv6/udp.c         |  4 +---
 net/sunrpc/svcsock.c   | 10 +++++-----
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6aebfceca3eb..bcdd6606f468 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1757,6 +1757,8 @@ extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
 						     int to_offset,
 						     int size);
 extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+extern void	       skb_free_datagram_locked(struct sock *sk,
+						struct sk_buff *skb);
 extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
 					 unsigned int flags);
 extern __wsum	       skb_checksum(const struct sk_buff *skb, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 1c6cf3a1a4f6..4ade3011bb3c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -224,6 +224,15 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 	consume_skb(skb);
 	sk_mem_reclaim_partial(sk);
 }
+EXPORT_SYMBOL(skb_free_datagram);
+
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
+{
+	lock_sock(sk);
+	skb_free_datagram(sk, skb);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(skb_free_datagram_locked);
 
 /**
  *	skb_kill_datagram - Free a datagram skbuff forcibly
@@ -752,5 +761,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 EXPORT_SYMBOL(datagram_poll);
 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
 EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_free_datagram);
 EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d0d436d6216c..0fa9f70e4b19 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -999,9 +999,7 @@ try_again:
 		err = ulen;
 
 out_free:
-	lock_sock(sk);
-	skb_free_datagram(sk, skb);
-	release_sock(sk);
+	skb_free_datagram_locked(sk, skb);
 out:
 	return err;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3a60f12b34ed..cf538ed5ef6a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -288,9 +288,7 @@ try_again:
 		err = ulen;
 
 out_free:
-	lock_sock(sk);
-	skb_free_datagram(sk, skb);
-	release_sock(sk);
+	skb_free_datagram_locked(sk, skb);
 out:
 	return err;
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ccc5e83cae5d..1c246a4f491e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -111,7 +111,7 @@ static void svc_release_skb(struct svc_rqst *rqstp)
 		rqstp->rq_xprt_ctxt = NULL;
 
 		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
-		skb_free_datagram(svsk->sk_sk, skb);
+		skb_free_datagram_locked(svsk->sk_sk, skb);
 	}
 }
 
@@ -578,7 +578,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 				"svc: received unknown control message %d/%d; "
 				"dropping RPC reply datagram\n",
 					cmh->cmsg_level, cmh->cmsg_type);
-		skb_free_datagram(svsk->sk_sk, skb);
+		skb_free_datagram_locked(svsk->sk_sk, skb);
 		return 0;
 	}
 
@@ -588,18 +588,18 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
 			local_bh_enable();
 			/* checksum error */
-			skb_free_datagram(svsk->sk_sk, skb);
+			skb_free_datagram_locked(svsk->sk_sk, skb);
 			return 0;
 		}
 		local_bh_enable();
-		skb_free_datagram(svsk->sk_sk, skb);
+		skb_free_datagram_locked(svsk->sk_sk, skb);
 	} else {
 		/* we can use it in-place */
 		rqstp->rq_arg.head[0].iov_base = skb->data +
 			sizeof(struct udphdr);
 		rqstp->rq_arg.head[0].iov_len = len;
 		if (skb_checksum_complete(skb)) {
-			skb_free_datagram(svsk->sk_sk, skb);
+			skb_free_datagram_locked(svsk->sk_sk, skb);
 			return 0;
 		}
 		rqstp->rq_xprt_ctxt = skb;
-- 
cgit v1.2.3


From 0c509a6c9393b27a8c5a01acd4a72616206cfc24 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Thu, 29 Oct 2009 14:18:21 +0000
Subject: net: Allow devices to specify a device specific sysfs group.

This isn't beautifully abstracted, but it is simple,
simplifies uses and so far is only needed for the bonding driver.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 net/core/net-sysfs.c      | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 193b637889f9..e5ece8dceaad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -900,8 +900,8 @@ struct net_device
 
 	/* class/net/name entry */
 	struct device		dev;
-	/* space for optional statistics and wireless sysfs groups */
-	const struct attribute_group *sysfs_groups[3];
+	/* space for optional device, statistics, and wireless sysfs groups */
+	const struct attribute_group *sysfs_groups[4];
 
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 89de182353b0..157645c0da73 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -544,8 +544,11 @@ int netdev_register_kobject(struct net_device *net)
 	dev_set_name(dev, "%s", net->name);
 
 #ifdef CONFIG_SYSFS
-	*groups++ = &netstat_group;
+	/* Allow for a device specific group */
+	if (*groups)
+		groups++;
 
+	*groups++ = &netstat_group;
 #ifdef CONFIG_WIRELESS_EXT_SYSFS
 	if (net->ieee80211_ptr)
 		*groups++ = &wireless_group;
-- 
cgit v1.2.3


From 22403def134e2c1017cb04ae9129a38e841b2d8c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 30 Oct 2009 12:55:03 +0100
Subject: mac80211: also drop qos-nullfunc frames silently

We drop nullfunc frames, but not qos-nullfunc frames,
even though those could be used for PS state control
as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 12 +++++++++++-
 net/mac80211/rx.c         | 15 ++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 52e15e079c61..0aa831467493 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -472,7 +472,7 @@ static inline int ieee80211_is_cfendack(__le16 fc)
 }
 
 /**
- * ieee80211_is_nullfunc - check if FTYPE=IEEE80211_FTYPE_DATA and STYPE=IEEE80211_STYPE_NULLFUNC
+ * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
  */
 static inline int ieee80211_is_nullfunc(__le16 fc)
@@ -481,6 +481,16 @@ static inline int ieee80211_is_nullfunc(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC);
 }
 
+/**
+ * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline int ieee80211_is_qos_nullfunc(__le16 fc)
+{
+	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
+	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
+}
+
 struct ieee80211s_hdr {
 	u8 flags;
 	u8 ttl;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f862399f7ce1..51cb8bc3af81 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -886,12 +886,17 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		}
 	}
 
-	/* Drop data::nullfunc frames silently, since they are used only to
-	 * control station power saving mode. */
-	if (ieee80211_is_nullfunc(hdr->frame_control)) {
+	/*
+	 * Drop (qos-)data::nullfunc frames silently, since they
+	 * are used only to control station power saving mode.
+	 */
+	if (ieee80211_is_nullfunc(hdr->frame_control) ||
+	    ieee80211_is_qos_nullfunc(hdr->frame_control)) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc);
-		/* Update counter and free packet here to avoid counting this
-		 * as a dropped packed. */
+		/*
+		 * Update counter and free packet here to avoid
+		 * counting this as a dropped packed.
+		 */
 		sta->rx_packets++;
 		dev_kfree_skb(rx->skb);
 		return RX_QUEUED;
-- 
cgit v1.2.3


From 1a6f2a7512021ceae3c4201c7aab07f032e9ce91 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 12 Oct 2009 20:17:41 -0700
Subject: Driver core: allow certain drivers prohibit bind/unbind via sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Platform drivers registered via platform_driver_probe() can be bound
to devices only once, upon registration, because discard their probe()
routines to save memory. Unbinding the driver through sysfs 'unbind'
leaves the device stranded and confuses users so let's not create
bind and unbind attributes for such drivers.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Cc: Éric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c      | 17 +++++++++++------
 drivers/base/platform.c |  6 +++++-
 include/linux/device.h  |  4 +++-
 3 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 973bf2ad4e0d..63c143e54a57 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -689,15 +689,19 @@ int bus_add_driver(struct device_driver *drv)
 		printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n",
 			__func__, drv->name);
 	}
-	error = add_bind_files(drv);
-	if (error) {
-		/* Ditto */
-		printk(KERN_ERR "%s: add_bind_files(%s) failed\n",
-			__func__, drv->name);
+
+	if (!drv->suppress_bind_attrs) {
+		error = add_bind_files(drv);
+		if (error) {
+			/* Ditto */
+			printk(KERN_ERR "%s: add_bind_files(%s) failed\n",
+				__func__, drv->name);
+		}
 	}
 
 	kobject_uevent(&priv->kobj, KOBJ_ADD);
 	return 0;
+
 out_unregister:
 	kfree(drv->p);
 	drv->p = NULL;
@@ -720,7 +724,8 @@ void bus_remove_driver(struct device_driver *drv)
 	if (!drv->bus)
 		return;
 
-	remove_bind_files(drv);
+	if (!drv->suppress_bind_attrs)
+		remove_bind_files(drv);
 	driver_remove_attrs(drv->bus, drv);
 	driver_remove_file(drv, &driver_attr_uevent);
 	klist_remove(&drv->p->knode_bus);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index ed156a13aa40..4fa954b07ac4 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -521,11 +521,15 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 {
 	int retval, code;
 
+	/* make sure driver won't have bind/unbind attributes */
+	drv->driver.suppress_bind_attrs = true;
+
 	/* temporary section violation during probe() */
 	drv->probe = probe;
 	retval = code = platform_driver_register(drv);
 
-	/* Fixup that section violation, being paranoid about code scanning
+	/*
+	 * Fixup that section violation, being paranoid about code scanning
 	 * the list of drivers in order to probe new devices.  Check to see
 	 * if the probe was successful, and make sure any forced probes of
 	 * new devices fail.
diff --git a/include/linux/device.h b/include/linux/device.h
index aca31bf7d8ed..2ea3e4921812 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -124,7 +124,9 @@ struct device_driver {
 	struct bus_type		*bus;
 
 	struct module		*owner;
-	const char 		*mod_name;	/* used for built-in modules */
+	const char		*mod_name;	/* used for built-in modules */
+
+	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
 
 	int (*probe) (struct device *dev);
 	int (*remove) (struct device *dev);
-- 
cgit v1.2.3


From 244546f0d3101c5441f5b14cfe8a79d62679eaea Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Fri, 30 Oct 2009 08:54:53 +0000
Subject: RDS: Add GET_MR_FOR_DEST sockopt

RDS currently supports a GET_MR sockopt to establish a
memory region (MR) for a chunk of memory. However, the fastreg
method ties a MR to a particular destination. The GET_MR_FOR_DEST
sockopt allows the remote machine to be specified, and thus
support for fastreg (aka FRWRs).

Note that this patch does *not* do all of this - it simply
implements the new sockopt in terms of the old one, so applications
can begin to use the new sockopt in preparation for cutover to
FRWRs.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rds.h |  8 ++++++++
 net/rds/af_rds.c    |  3 +++
 net/rds/rdma.c      | 24 ++++++++++++++++++++++++
 net/rds/rdma.h      |  1 +
 4 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 89d46e1afbb1..cab4994c2f63 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -56,6 +56,7 @@
 /* deprecated: RDS_BARRIER 4 */
 #define RDS_RECVERR			5
 #define RDS_CONG_MONITOR		6
+#define RDS_GET_MR_FOR_DEST		7
 
 /*
  * Control message types for SOL_RDS.
@@ -224,6 +225,13 @@ struct rds_get_mr_args {
 	uint64_t	flags;
 };
 
+struct rds_get_mr_for_dest_args {
+	struct sockaddr_storage	dest_addr;
+	struct rds_iovec 	vec;
+	u_int64_t		cookie_addr;
+	uint64_t		flags;
+};
+
 struct rds_free_mr_args {
 	rds_rdma_cookie_t cookie;
 	u_int64_t	flags;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a202e5b36079..2b978dc6e75d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -265,6 +265,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
 	case RDS_GET_MR:
 		ret = rds_get_mr(rs, optval, optlen);
 		break;
+	case RDS_GET_MR_FOR_DEST:
+		ret = rds_get_mr_for_dest(rs, optval, optlen);
+		break;
 	case RDS_FREE_MR:
 		ret = rds_free_mr(rs, optval, optlen);
 		break;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8dc83d2caa58..971b5a668458 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -317,6 +317,30 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
 	return __rds_rdma_map(rs, &args, NULL, NULL);
 }
 
+int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
+{
+	struct rds_get_mr_for_dest_args args;
+	struct rds_get_mr_args new_args;
+
+	if (optlen != sizeof(struct rds_get_mr_for_dest_args))
+		return -EINVAL;
+
+	if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval,
+			   sizeof(struct rds_get_mr_for_dest_args)))
+		return -EFAULT;
+
+	/*
+	 * Initially, just behave like get_mr().
+	 * TODO: Implement get_mr as wrapper around this
+	 *	 and deprecate it.
+	 */
+	new_args.vec = args.vec;
+	new_args.cookie_addr = args.cookie_addr;
+	new_args.flags = args.flags;
+
+	return __rds_rdma_map(rs, &new_args, NULL, NULL);
+}
+
 /*
  * Free the MR indicated by the given R_Key
  */
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
index 425512098b0b..909c39835a5d 100644
--- a/net/rds/rdma.h
+++ b/net/rds/rdma.h
@@ -61,6 +61,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 }
 
 int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
+int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
 int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
 void rds_rdma_drop_keys(struct rds_sock *rs);
 int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
-- 
cgit v1.2.3


From 19593ffdb6daa6ba691d247a2400cece12687c52 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Wed, 14 Oct 2009 20:40:10 +0200
Subject: firewire: ohci: 0 may be a valid DMA address

I was told that there are obscure architectures with non-coherent DMA
which may DMA-map to bus address 0.  We shall not use 0 as a magic
number of uninitialized bus address variables.

The packet->payload_length > 0 test cannot be used either (except in
at_context_queue_packet) because local requests are not DMA-mapped
regardless of payload_length.  Hence add a state flag to struct
fw_packet.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-transaction.c | 4 ++--
 drivers/firewire/ohci.c             | 9 +++++----
 include/linux/firewire.h            | 1 +
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 66789c3cc561..842739df23e2 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -226,7 +226,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
 	packet->speed = speed;
 	packet->generation = generation;
 	packet->ack = 0;
-	packet->payload_bus = 0;
+	packet->payload_mapped = false;
 }
 
 /**
@@ -601,7 +601,7 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header,
 		WARN(1, KERN_ERR "wrong tcode %d", tcode);
 	}
 
-	response->payload_bus = 0;
+	response->payload_mapped = false;
 }
 EXPORT_SYMBOL(fw_fill_response);
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 418415564791..a71477541dc7 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -995,7 +995,8 @@ static int at_context_queue_packet(struct context *ctx,
 			packet->ack = RCODE_SEND_ERROR;
 			return -1;
 		}
-		packet->payload_bus = payload_bus;
+		packet->payload_bus	= payload_bus;
+		packet->payload_mapped	= true;
 
 		d[2].req_count    = cpu_to_le16(packet->payload_length);
 		d[2].data_address = cpu_to_le32(payload_bus);
@@ -1023,7 +1024,7 @@ static int at_context_queue_packet(struct context *ctx,
 	 */
 	if (ohci->generation != packet->generation ||
 	    reg_read(ohci, OHCI1394_IntEventSet) & OHCI1394_busReset) {
-		if (packet->payload_length > 0)
+		if (packet->payload_mapped)
 			dma_unmap_single(ohci->card.device, payload_bus,
 					 packet->payload_length, DMA_TO_DEVICE);
 		packet->ack = RCODE_GENERATION;
@@ -1059,7 +1060,7 @@ static int handle_at_packet(struct context *context,
 		/* This packet was cancelled, just continue. */
 		return 1;
 
-	if (packet->payload_bus)
+	if (packet->payload_mapped)
 		dma_unmap_single(ohci->card.device, packet->payload_bus,
 				 packet->payload_length, DMA_TO_DEVICE);
 
@@ -1723,7 +1724,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
 	if (packet->ack != 0)
 		goto out;
 
-	if (packet->payload_bus)
+	if (packet->payload_mapped)
 		dma_unmap_single(ohci->card.device, packet->payload_bus,
 				 packet->payload_length, DMA_TO_DEVICE);
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 211a5d7d87b3..9416a461b696 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -267,6 +267,7 @@ struct fw_packet {
 	void *payload;
 	size_t payload_length;
 	dma_addr_t payload_bus;
+	bool payload_mapped;
 	u32 timestamp;
 
 	/*
-- 
cgit v1.2.3


From 45b9deaf14e74543371aa8faea69c14e27b038c6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 2 Nov 2009 15:43:20 +0900
Subject: sh: intc: Handle legacy IRQ reservation in vector map.

Different CPUs will have different starting vectors, with varying
amounts of reserved or unusable vector space prior to the first slot.
This introduces a legacy vector reservation system that inserts itself in
between the CPU vector map registration and the platform specific IRQ
setup. This works fine in practice as the only new vectors that boards
need to establish on their own should be dynamically allocated rather
than arbitrarily assigned. As a plus, this also makes all of the
converted platforms sparseirq ready.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c    |  6 ++++++
 drivers/sh/intc.c       | 25 +++++++++++++++++++++++++
 include/linux/sh_intc.h |  3 +++
 3 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 7aa89fac1f81..e1913f28f418 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -263,6 +263,12 @@ void __init init_IRQ(void)
 {
 	plat_irq_setup();
 
+	/*
+	 * Pin any of the legacy IRQ vectors that haven't already been
+	 * grabbed by the platform
+	 */
+	reserve_irq_legacy();
+
 	/* Perform the machine specific initialisation */
 	if (sh_mv.mv_init_irq)
 		sh_mv.mv_init_irq();
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 4789df43c0f9..a7e5c2e9986c 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -928,3 +928,28 @@ void destroy_irq(unsigned int irq)
 	__clear_bit(irq, intc_irq_map);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
+
+int reserve_irq_vector(unsigned int irq)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	if (test_and_set_bit(irq, intc_irq_map))
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	return ret;
+}
+
+void reserve_irq_legacy(void)
+{
+	unsigned long flags;
+	int i, j;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	j = find_first_bit(intc_irq_map, nr_irqs);
+	for (i = 0; i < j; i++)
+		__set_bit(i, intc_irq_map);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 4e4b22d50164..4ef246f14654 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -84,4 +84,7 @@ struct intc_desc symbol __initdata = {					\
 void __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
+int reserve_irq_vector(unsigned int irq);
+void reserve_irq_legacy(void);
+
 #endif /* __SH_INTC_H */
-- 
cgit v1.2.3


From 72c9528bab94cc052d00ce241b8e85f5d71e45f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 30 Oct 2009 07:11:27 +0000
Subject: net: Introduce dev_get_by_name_rcu()

Some workloads hit dev_base_lock rwlock pretty hard.
We can use RCU lookups to avoid touching this rwlock
(and avoid touching netdevice refcount)

netdevices are already freed after a RCU grace period, so this patch
adds no penalty at device dismantle time.

However, it adds a synchronize_rcu() call in dev_change_name()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 49 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 41 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e5ece8dceaad..bcf1083857fc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1115,6 +1115,7 @@ extern void		__dev_remove_pack(struct packet_type *pt);
 extern struct net_device	*dev_get_by_flags(struct net *net, unsigned short flags,
 						  unsigned short mask);
 extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
+extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
 extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
 extern int		dev_alloc_name(struct net_device *dev, const char *name);
 extern int		dev_open(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 94f42a15fff1..f54d8b8a434b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -213,7 +213,7 @@ static int list_netdevice(struct net_device *dev)
 
 	write_lock_bh(&dev_base_lock);
 	list_add_tail(&dev->dev_list, &net->dev_base_head);
-	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	hlist_add_head_rcu(&dev->index_hlist,
 			   dev_index_hash(net, dev->ifindex));
 	write_unlock_bh(&dev_base_lock);
@@ -230,7 +230,7 @@ static void unlist_netdevice(struct net_device *dev)
 	/* Unlink dev from the device chain */
 	write_lock_bh(&dev_base_lock);
 	list_del(&dev->dev_list);
-	hlist_del(&dev->name_hlist);
+	hlist_del_rcu(&dev->name_hlist);
 	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
 }
@@ -598,6 +598,32 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name)
 }
 EXPORT_SYMBOL(__dev_get_by_name);
 
+/**
+ *	dev_get_by_name_rcu	- find a device by its name
+ *	@net: the applicable net namespace
+ *	@name: name to find
+ *
+ *	Find an interface by name.
+ *	If the name is found a pointer to the device is returned.
+ * 	If the name is not found then %NULL is returned.
+ *	The reference counters are not incremented so the caller must be
+ *	careful with locks. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_name_hash(net, name);
+
+	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_name_rcu);
+
 /**
  *	dev_get_by_name		- find a device by its name
  *	@net: the applicable net namespace
@@ -614,11 +640,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_name(net, name);
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
 	if (dev)
 		dev_hold(dev);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_name);
@@ -960,7 +986,12 @@ rollback:
 
 	write_lock_bh(&dev_base_lock);
 	hlist_del(&dev->name_hlist);
-	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+	write_unlock_bh(&dev_base_lock);
+
+	synchronize_rcu();
+
+	write_lock_bh(&dev_base_lock);
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	write_unlock_bh(&dev_base_lock);
 
 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1062,9 +1093,9 @@ void dev_load(struct net *net, const char *name)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_name(net, name);
-	read_unlock(&dev_base_lock);
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	rcu_read_unlock();
 
 	if (!dev && capable(CAP_NET_ADMIN))
 		request_module("%s", name);
-- 
cgit v1.2.3


From 4f570f995f68ef77aae7e5a441222f59232f2d0e Mon Sep 17 00:00:00 2001
From: Alberto Bertogli <albertito@blitiri.com.ar>
Date: Mon, 2 Nov 2009 11:40:16 +0100
Subject: Do not __always_inline bvec_kmap_irq() and bvec_kunmap_irq()

So remove both the comment and the inline requirement, going back to the
inline hint.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5be93f18d842..474792b825d0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -450,11 +450,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 /*
  * remember never ever reenable interrupts between a bvec_kmap_irq and
  * bvec_kunmap_irq!
- *
- * This function MUST be inlined - it plays with the CPU interrupt flags.
  */
-static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
-		unsigned long *flags)
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
 {
 	unsigned long addr;
 
@@ -470,8 +467,7 @@ static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
 	return (char *) addr + bvec->bv_offset;
 }
 
-static __always_inline void bvec_kunmap_irq(char *buffer,
-		unsigned long *flags)
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
 {
 	unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
 
-- 
cgit v1.2.3


From 7b2a35132ad0a70902dcd2844c27ed64cda0ce9b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 2 Nov 2009 08:50:52 +0800
Subject: compiler: Introduce __always_unused

I wrote some code which is used as compile-time checker, and the
code should be elided after compile.

So I need to annotate the code as "always unused", compared to
"maybe unused".

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4AEE2CEC.8040206@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler-gcc.h | 1 +
 include/linux/compiler.h     | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a3ed7cb8ca34..73dcf804bc94 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -79,6 +79,7 @@
 #define  noinline			__attribute__((noinline))
 #define __attribute_const__		__attribute__((__const__))
 #define __maybe_unused			__attribute__((unused))
+#define __always_unused			__attribute__((unused))
 
 #define __gcc_header(x) #x
 #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..7947f4f6fa51 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -213,6 +213,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __maybe_unused		/* unimplemented */
 #endif
 
+#ifndef __always_unused
+# define __always_unused	/* unimplemented */
+#endif
+
 #ifndef noinline
 #define noinline
 #endif
-- 
cgit v1.2.3


From e74c2e81fc9e1e674f2747c85fe8cfeaaafa55f6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 2 Nov 2009 21:57:39 -0800
Subject: Input: mark custom_data in ff_periodic_effect as __user

The custom_data pointer in ff_periodict_effect structure is a
userspace pointer and should be marked as such.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 0ccfc30cd40f..9ee67b4b2b48 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -890,7 +890,7 @@ struct ff_periodic_effect {
 	struct ff_envelope envelope;
 
 	__u32 custom_len;
-	__s16 *custom_data;
+	__s16 __user *custom_data;
 };
 
 /**
-- 
cgit v1.2.3


From 8649f13d2d810406da444a6101906041b796fbde Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 2 Nov 2009 14:30:00 +0000
Subject: broadcom: Consolidate dev_flags definitions

This patch moves all the dev_flags enumerations outside the broadcom.c
file to include/linux/brcmphy.h.  The existing flags were not used yet
and have been re-enumerated to avoid conflicts.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 11 +----------
 include/linux/brcmphy.h    | 17 +++++++++++------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 1a2b2f2a273a..ace0ccc87a00 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -16,6 +16,7 @@
 
 #include <linux/module.h>
 #include <linux/phy.h>
+#include <linux/brcmphy.h>
 
 #define PHY_ID_BCM50610		0x0143bd60
 #define PHY_ID_BCM50610M	0x0143bd70
@@ -138,16 +139,6 @@
 #define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
 #define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
 
-/*
- * Device flags for PHYs that can be configured for different operating
- * modes.
- */
-#define PHY_BCM_FLAGS_VALID		0x80000000
-#define PHY_BCM_FLAGS_INTF_XAUI		0x00000020
-#define PHY_BCM_FLAGS_INTF_SGMII	0x00000010
-#define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
-#define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
-
 
 /*****************************************************************************/
 /* Fast Ethernet Transceiver definitions. */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 9b64b6d67873..daa1480fcf49 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,6 +1,11 @@
-#define PHY_BRCM_WIRESPEED_ENABLE	0x00000001
-#define PHY_BRCM_AUTO_PWRDWN_ENABLE	0x00000002
-#define PHY_BRCM_APD_CLK125_ENABLE	0x00000004
-#define PHY_BRCM_STD_IBND_DISABLE	0x00000008
-#define PHY_BRCM_EXT_IBND_RX_ENABLE	0x00000010
-#define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00000020
+#define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
+#define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
+#define PHY_BCM_FLAGS_INTF_SGMII	0x00000010
+#define PHY_BCM_FLAGS_INTF_XAUI		0x00000020
+#define PHY_BRCM_WIRESPEED_ENABLE	0x00000100
+#define PHY_BRCM_AUTO_PWRDWN_ENABLE	0x00000200
+#define PHY_BRCM_APD_CLK125_ENABLE	0x00000400
+#define PHY_BRCM_STD_IBND_DISABLE	0x00000800
+#define PHY_BRCM_EXT_IBND_RX_ENABLE	0x00001000
+#define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00002000
+#define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.2.3


From 63a14ce449dd6d647de2725809159eb072b2c44f Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 2 Nov 2009 14:30:40 +0000
Subject: tg3 / broadcom: Add PHY_BRCM_CLEAR_RGMII_MODE flag

Broadcom 50610M parts changed the default definitions of the RGMII mode
shadow register.  The 5785 needs the RGMII mode selection bits [4:3]
cleared.

The default value of the remaining bits in this register are zero.
Rather than unnecessarily burn an extra bit in the dev_flags member in
an attempt to enumerate all possible combinations, this patch take a
more course grained approach and labels the option as "clear all bits".

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 6 ++++++
 drivers/net/tg3.c          | 1 +
 include/linux/brcmphy.h    | 1 +
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index ace0ccc87a00..5d2a2e90aba8 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -105,6 +105,7 @@
 #define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
 					/* LED1 / ~LINKSPD[1] selector */
 #define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
 #define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
 #define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
 #define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
@@ -330,6 +331,11 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
+	if ((BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 ||
+	     BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) &&
+	    (phydev->dev_flags & PHY_BRCM_CLEAR_RGMII_MODE))
+		bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0);
+
 	bcm54xx_phydsp_config(phydev);
 
 	return 0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0aecd07d3f6b..e7128f6ae2d8 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1100,6 +1100,7 @@ static int tg3_mdio_init(struct tg3 *tp)
 		break;
 	case TG3_PHY_ID_BCM50610:
 	case TG3_PHY_ID_BCM50610M:
+		phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE;
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE)
 			phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE;
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN)
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index daa1480fcf49..6e7ffcee9c80 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -8,4 +8,5 @@
 #define PHY_BRCM_STD_IBND_DISABLE	0x00000800
 #define PHY_BRCM_EXT_IBND_RX_ENABLE	0x00001000
 #define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00002000
+#define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.2.3


From 32e5a8d651c0dbb02bf82ca954206282e44c4b11 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 2 Nov 2009 14:31:39 +0000
Subject: tg3 / broadcom: Add code to disable rxc refclk

The 5785 does not use the RXC reference clock.  Turning it off is
desirable as it saves power.

By default, the 50610 enables the RXC reference clock and the 50610M
disables it.  Presumably this is one of the reasons why the hardware
architect chose one over the other.

Adding a "rx reference clock disable" flag is not the ideal way to
describe the option, as it would force the MAC using a 50610M to set
the flag.  Ideally we want the flags to represent opt-in behavior that
deviates from hardware defaults.  Furthermore, the lack of a
"disable" flag implies that the requester wants the rx reference clock
enabled, which doesn't necessarily follow.

By presenting the option as a passive statement (rx reference clock
unused) rather than a command, I hope to convey an opt-in option to
disable the rx reference clock that falls back to hardware defaults if
not set.  A secondary benefit of this is that it keeps the
intelligence about phy defaults in the broadcom module where it belongs
and allows the broadcom module more latitude should a bug arise.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/tg3.c          |  3 ++-
 include/linux/brcmphy.h    |  2 +-
 3 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index bddf4a42ae68..74914335f72c 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -25,6 +25,9 @@
 #define BRCM_PHY_MODEL(phydev) \
 	((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
 
+#define BRCM_PHY_REV(phydev) \
+	((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask))
+
 
 #define MII_BCM54XX_ECR		0x10	/* BCM54xx extended control register */
 #define MII_BCM54XX_ECR_IM	0x1000	/* Interrupt mask */
@@ -95,11 +98,16 @@
 #define BCM_LED_SRC_OFF		0xe	/* Tied high */
 #define BCM_LED_SRC_ON		0xf	/* Tied low */
 
+
 /*
  * BCM5482: Shadow registers
  * Shadow values go into bits [14:10] of register 0x1c to select a shadow
  * register to access.
  */
+/* 00101: Spare Control Register 3 */
+#define BCM54XX_SHD_SCR3		0x05
+#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
+
 #define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
 					/* LED3 / ~LINKSPD[2] selector */
 #define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
@@ -112,6 +120,7 @@
 #define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
 #define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
 
+
 /*
  * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
  */
@@ -309,6 +318,37 @@ error:
 	return err ? err : err2;
 }
 
+static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
+{
+	u32 val, orig;
+
+	/* Abort if we are using an untested phy. */
+	if (BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610 ||
+	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610M)
+		return;
+
+	val = bcm54xx_shadow_read(phydev, BCM54XX_SHD_SCR3);
+	if (val < 0)
+		return;
+
+	orig = val;
+
+	if (phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) {
+		if ((BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610 ||
+		     BRCM_PHY_MODEL(phydev) == PHY_ID_BCM50610M) &&
+		    BRCM_PHY_REV(phydev) >= 0x3) {
+			/* Here, bit 0 _disables_ CLK125 when set */
+			val |= BCM54XX_SHD_SCR3_DEF_CLK125;
+		} else {
+			/* Here, bit 0 _enables_ CLK125 when set */
+			val &= ~BCM54XX_SHD_SCR3_DEF_CLK125;
+		}
+	}
+
+	if (orig != val)
+		bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val);
+}
+
 static int bcm54xx_config_init(struct phy_device *phydev)
 {
 	int reg, err;
@@ -336,6 +376,9 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	    (phydev->dev_flags & PHY_BRCM_CLEAR_RGMII_MODE))
 		bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0);
 
+	if (phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)
+		bcm54xx_adjust_rxrefclk(phydev);
+
 	bcm54xx_phydsp_config(phydev);
 
 	return 0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 592b5bf09e40..369ddba95821 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1100,7 +1100,8 @@ static int tg3_mdio_init(struct tg3 *tp)
 		break;
 	case TG3_PHY_ID_BCM50610:
 	case TG3_PHY_ID_BCM50610M:
-		phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE;
+		phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE |
+				     PHY_BRCM_RX_REFCLK_UNUSED;
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE)
 			phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE;
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN)
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6e7ffcee9c80..59432278ded2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -4,7 +4,7 @@
 #define PHY_BCM_FLAGS_INTF_XAUI		0x00000020
 #define PHY_BRCM_WIRESPEED_ENABLE	0x00000100
 #define PHY_BRCM_AUTO_PWRDWN_ENABLE	0x00000200
-#define PHY_BRCM_APD_CLK125_ENABLE	0x00000400
+#define PHY_BRCM_RX_REFCLK_UNUSED	0x00000400
 #define PHY_BRCM_STD_IBND_DISABLE	0x00000800
 #define PHY_BRCM_EXT_IBND_RX_ENABLE	0x00001000
 #define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00002000
-- 
cgit v1.2.3


From 52fae0837153e86e4dabaf5df517a0b8b7a20bd7 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 2 Nov 2009 14:32:38 +0000
Subject: tg3 / broadcom: Optionally disable TXC if no link

This patch adds code to disable the TXC and RXC reference clocks if link
is not available.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 5 +++++
 drivers/net/tg3.c          | 1 +
 include/linux/brcmphy.h    | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 7b10495fe8fb..f63c96a4ecb4 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -108,6 +108,7 @@
 #define BCM54XX_SHD_SCR3		0x05
 #define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
 #define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
+#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
 
 /* 01010: Auto Power-Down */
 #define BCM54XX_SHD_APD			0x0a
@@ -362,6 +363,9 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 	else
 		val |= BCM54XX_SHD_SCR3_DLLAPD_DIS;
 
+	if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY)
+		val |= BCM54XX_SHD_SCR3_TRDDAPD;
+
 	if (orig != val)
 		bcm54xx_shadow_write(phydev, BCM54XX_SHD_SCR3, val);
 
@@ -409,6 +413,7 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 		bcm54xx_shadow_write(phydev, BCM54XX_SHD_RGMII_MODE, 0);
 
 	if ((phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) ||
+	    (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) ||
 	    (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE))
 		bcm54xx_adjust_rxrefclk(phydev);
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f74bf91e78cc..50bb53de5ebd 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1103,6 +1103,7 @@ static int tg3_mdio_init(struct tg3 *tp)
 	case TG3_PHY_ID_BCM50610M:
 		phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE |
 				     PHY_BRCM_RX_REFCLK_UNUSED |
+				     PHY_BRCM_DIS_TXCRXC_NOENRGY |
 				     PHY_BRCM_AUTO_PWRDWN_ENABLE;
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE)
 			phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE;
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 59432278ded2..2b31b91f5871 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -9,4 +9,5 @@
 #define PHY_BRCM_EXT_IBND_RX_ENABLE	0x00001000
 #define PHY_BRCM_EXT_IBND_TX_ENABLE	0x00002000
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
+#define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 #define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.2.3


From 7a8b3372e29ff58ebdf94def26703afabd287f11 Mon Sep 17 00:00:00 2001
From: Sandeep Gopalpet <Sandeep.Kumar@freescale.com>
Date: Mon, 2 Nov 2009 07:03:40 +0000
Subject: gianfar: Basic Support for programming hash rules

This patch provides basic hash rules programming via the ethtool
interface.

Signed-off-by: Sandeep Gopalpet <Sandeep.Kumar@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c         |  73 +++++++++++++
 drivers/net/gianfar.h         |  93 +++++++++++++++++
 drivers/net/gianfar_ethtool.c | 236 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/ethtool.h       |   2 +
 4 files changed, 404 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index dc9fba09b17c..086d40dd526d 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -431,6 +431,9 @@ static const struct net_device_ops gfar_netdev_ops = {
 #endif
 };
 
+unsigned int ftp_rqfpr[MAX_FILER_IDX + 1];
+unsigned int ftp_rqfcr[MAX_FILER_IDX + 1];
+
 void lock_rx_qs(struct gfar_private *priv)
 {
 	int i = 0x0;
@@ -766,6 +769,73 @@ static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs)
 	}
 	return new_bit_map;
 }
+
+u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar, u32 class)
+{
+	u32 rqfpr = FPR_FILER_MASK;
+	u32 rqfcr = 0x0;
+
+	rqfar--;
+	rqfcr = RQFCR_CLE | RQFCR_PID_MASK | RQFCR_CMP_EXACT;
+	ftp_rqfpr[rqfar] = rqfpr;
+	ftp_rqfcr[rqfar] = rqfcr;
+	gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+	rqfar--;
+	rqfcr = RQFCR_CMP_NOMATCH;
+	ftp_rqfpr[rqfar] = rqfpr;
+	ftp_rqfcr[rqfar] = rqfcr;
+	gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+	rqfar--;
+	rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_PARSE | RQFCR_CLE | RQFCR_AND;
+	rqfpr = class;
+	ftp_rqfcr[rqfar] = rqfcr;
+	ftp_rqfpr[rqfar] = rqfpr;
+	gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+	rqfar--;
+	rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_MASK | RQFCR_AND;
+	rqfpr = class;
+	ftp_rqfcr[rqfar] = rqfcr;
+	ftp_rqfpr[rqfar] = rqfpr;
+	gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+	return rqfar;
+}
+
+static void gfar_init_filer_table(struct gfar_private *priv)
+{
+	int i = 0x0;
+	u32 rqfar = MAX_FILER_IDX;
+	u32 rqfcr = 0x0;
+	u32 rqfpr = FPR_FILER_MASK;
+
+	/* Default rule */
+	rqfcr = RQFCR_CMP_MATCH;
+	ftp_rqfcr[rqfar] = rqfcr;
+	ftp_rqfpr[rqfar] = rqfpr;
+	gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6);
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_UDP);
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_TCP);
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4);
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_UDP);
+	rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_TCP);
+
+	/* cur_filer_idx indicated the fisrt non-masked rule */
+	priv->cur_filer_idx = rqfar;
+
+	/* Rest are masked rules */
+	rqfcr = RQFCR_CMP_NOMATCH;
+	for (i = 0; i < rqfar; i++) {
+		ftp_rqfcr[i] = rqfcr;
+		ftp_rqfpr[i] = rqfpr;
+		gfar_write_filer(priv, i, rqfcr, rqfpr);
+	}
+}
+
 /* Set up the ethernet device structure, private data,
  * and anything else we need before we start */
 static int gfar_probe(struct of_device *ofdev,
@@ -1005,6 +1075,9 @@ static int gfar_probe(struct of_device *ofdev,
 			priv->gfargrp[i].int_name_tx[len_devname] = '\0';
 	}
 
+	/* Initialize the filer table */
+	gfar_init_filer_table(priv);
+
 	/* Create all the sysfs files */
 	gfar_init_sysfs(dev);
 
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index a2c1f963cdd6..44b63daa7ff3 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -381,6 +381,84 @@ extern const char gfar_driver_version[];
 #define BD_LFLAG(flags) ((flags) << 16)
 #define BD_LENGTH_MASK		0x0000ffff
 
+#define CLASS_CODE_UNRECOG		0x00
+#define CLASS_CODE_DUMMY1		0x01
+#define CLASS_CODE_ETHERTYPE1		0x02
+#define CLASS_CODE_ETHERTYPE2		0x03
+#define CLASS_CODE_USER_PROG1		0x04
+#define CLASS_CODE_USER_PROG2		0x05
+#define CLASS_CODE_USER_PROG3		0x06
+#define CLASS_CODE_USER_PROG4		0x07
+#define CLASS_CODE_TCP_IPV4		0x08
+#define CLASS_CODE_UDP_IPV4		0x09
+#define CLASS_CODE_AH_ESP_IPV4		0x0a
+#define CLASS_CODE_SCTP_IPV4		0x0b
+#define CLASS_CODE_TCP_IPV6		0x0c
+#define CLASS_CODE_UDP_IPV6		0x0d
+#define CLASS_CODE_AH_ESP_IPV6		0x0e
+#define CLASS_CODE_SCTP_IPV6		0x0f
+
+#define FPR_FILER_MASK	0xFFFFFFFF
+#define MAX_FILER_IDX	0xFF
+
+/* RQFCR register bits */
+#define RQFCR_GPI		0x80000000
+#define RQFCR_HASHTBL_Q		0x00000000
+#define RQFCR_HASHTBL_0		0x00020000
+#define RQFCR_HASHTBL_1		0x00040000
+#define RQFCR_HASHTBL_2		0x00060000
+#define RQFCR_HASHTBL_3		0x00080000
+#define RQFCR_HASH		0x00010000
+#define RQFCR_CLE		0x00000200
+#define RQFCR_RJE		0x00000100
+#define RQFCR_AND		0x00000080
+#define RQFCR_CMP_EXACT		0x00000000
+#define RQFCR_CMP_MATCH		0x00000020
+#define RQFCR_CMP_NOEXACT	0x00000040
+#define RQFCR_CMP_NOMATCH	0x00000060
+
+/* RQFCR PID values */
+#define	RQFCR_PID_MASK		0x00000000
+#define	RQFCR_PID_PARSE		0x00000001
+#define	RQFCR_PID_ARB		0x00000002
+#define	RQFCR_PID_DAH		0x00000003
+#define	RQFCR_PID_DAL		0x00000004
+#define	RQFCR_PID_SAH		0x00000005
+#define	RQFCR_PID_SAL		0x00000006
+#define	RQFCR_PID_ETY		0x00000007
+#define	RQFCR_PID_VID		0x00000008
+#define	RQFCR_PID_PRI		0x00000009
+#define	RQFCR_PID_TOS		0x0000000A
+#define	RQFCR_PID_L4P		0x0000000B
+#define	RQFCR_PID_DIA		0x0000000C
+#define	RQFCR_PID_SIA		0x0000000D
+#define	RQFCR_PID_DPT		0x0000000E
+#define	RQFCR_PID_SPT		0x0000000F
+
+/* RQFPR when PID is 0x0001 */
+#define RQFPR_HDR_GE_512	0x00200000
+#define RQFPR_LERR		0x00100000
+#define RQFPR_RAR		0x00080000
+#define RQFPR_RARQ		0x00040000
+#define RQFPR_AR		0x00020000
+#define RQFPR_ARQ		0x00010000
+#define RQFPR_EBC		0x00008000
+#define RQFPR_VLN		0x00004000
+#define RQFPR_CFI		0x00002000
+#define RQFPR_JUM		0x00001000
+#define RQFPR_IPF		0x00000800
+#define RQFPR_FIF		0x00000400
+#define RQFPR_IPV4		0x00000200
+#define RQFPR_IPV6		0x00000100
+#define RQFPR_ICC		0x00000080
+#define RQFPR_ICV		0x00000040
+#define RQFPR_TCP		0x00000020
+#define RQFPR_UDP		0x00000010
+#define RQFPR_TUC		0x00000008
+#define RQFPR_TUV		0x00000004
+#define RQFPR_PER		0x00000002
+#define RQFPR_EER		0x00000001
+
 /* TxBD status field bits */
 #define TXBD_READY		0x8000
 #define TXBD_PADCRC		0x4000
@@ -959,6 +1037,8 @@ struct gfar_private {
 	unsigned int rx_stash_size;
 	unsigned int rx_stash_index;
 
+	u32 cur_filer_idx;
+
 	struct sk_buff_head rx_recycle;
 
 	struct vlan_group *vlgrp;
@@ -1002,6 +1082,9 @@ struct gfar_private {
 	struct gfar_extra_stats extra_stats;
 };
 
+extern unsigned int ftp_rqfpr[MAX_FILER_IDX + 1];
+extern unsigned int ftp_rqfcr[MAX_FILER_IDX + 1];
+
 static inline u32 gfar_read(volatile unsigned __iomem *addr)
 {
 	u32 val;
@@ -1014,6 +1097,16 @@ static inline void gfar_write(volatile unsigned __iomem *addr, u32 val)
 	out_be32(addr, val);
 }
 
+static inline void gfar_write_filer(struct gfar_private *priv,
+		unsigned int far, unsigned int fcr, unsigned int fpr)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+
+	gfar_write(&regs->rqfar, far);
+	gfar_write(&regs->rqfcr, fcr);
+	gfar_write(&regs->rqfpr, fpr);
+}
+
 extern void lock_rx_qs(struct gfar_private *priv);
 extern void lock_tx_qs(struct gfar_private *priv);
 extern void unlock_rx_qs(struct gfar_private *priv);
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 562f6c20f591..1010367695e4 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -645,6 +645,241 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 }
 #endif
 
+static int gfar_ethflow_to_class(int flow_type, u64 *class)
+{
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+		*class = CLASS_CODE_TCP_IPV4;
+		break;
+	case UDP_V4_FLOW:
+		*class = CLASS_CODE_UDP_IPV4;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		*class = CLASS_CODE_AH_ESP_IPV4;
+		break;
+	case SCTP_V4_FLOW:
+		*class = CLASS_CODE_SCTP_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		*class = CLASS_CODE_TCP_IPV6;
+		break;
+	case UDP_V6_FLOW:
+		*class = CLASS_CODE_UDP_IPV6;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		*class = CLASS_CODE_AH_ESP_IPV6;
+		break;
+	case SCTP_V6_FLOW:
+		*class = CLASS_CODE_SCTP_IPV6;
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static void ethflow_to_filer_rules (struct gfar_private *priv, u64 ethflow)
+{
+	u32 fcr = 0x0, fpr = FPR_FILER_MASK;
+
+	if (ethflow & RXH_L2DA) {
+		fcr = RQFCR_PID_DAH |RQFCR_CMP_NOMATCH |
+			RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+
+		fcr = RQFCR_PID_DAL | RQFCR_AND | RQFCR_CMP_NOMATCH |
+				RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & RXH_VLAN) {
+		fcr = RQFCR_PID_VID | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+				RQFCR_AND | RQFCR_HASHTBL_0;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & RXH_IP_SRC) {
+		fcr = RQFCR_PID_SIA | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+			RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & (RXH_IP_DST)) {
+		fcr = RQFCR_PID_DIA | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+			RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & RXH_L3_PROTO) {
+		fcr = RQFCR_PID_L4P | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+			RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & RXH_L4_B_0_1) {
+		fcr = RQFCR_PID_SPT | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+			RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	if (ethflow & RXH_L4_B_2_3) {
+		fcr = RQFCR_PID_DPT | RQFCR_CMP_NOMATCH | RQFCR_HASH |
+			RQFCR_AND | RQFCR_HASHTBL_0;
+		ftp_rqfpr[priv->cur_filer_idx] = fpr;
+		ftp_rqfcr[priv->cur_filer_idx] = fcr;
+		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+}
+
+static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u64 class)
+{
+	unsigned int last_rule_idx = priv->cur_filer_idx;
+	unsigned int cmp_rqfpr;
+	unsigned int local_rqfpr[MAX_FILER_IDX + 1];
+	unsigned int local_rqfcr[MAX_FILER_IDX + 1];
+	int i = 0x0, k = 0x0;
+	int j = MAX_FILER_IDX, l = 0x0;
+
+	switch (class) {
+	case TCP_V4_FLOW:
+		cmp_rqfpr = RQFPR_IPV4 |RQFPR_TCP;
+		break;
+	case UDP_V4_FLOW:
+		cmp_rqfpr = RQFPR_IPV4 |RQFPR_UDP;
+		break;
+	case TCP_V6_FLOW:
+		cmp_rqfpr = RQFPR_IPV6 |RQFPR_TCP;
+		break;
+	case UDP_V6_FLOW:
+		cmp_rqfpr = RQFPR_IPV6 |RQFPR_UDP;
+		break;
+	case IPV4_FLOW:
+		cmp_rqfpr = RQFPR_IPV4;
+	case IPV6_FLOW:
+		cmp_rqfpr = RQFPR_IPV6;
+		break;
+	default:
+		printk(KERN_ERR "Right now this class is not supported\n");
+		return 0;
+	}
+
+	for (i = 0; i < MAX_FILER_IDX + 1; i++) {
+		local_rqfpr[j] = ftp_rqfpr[i];
+		local_rqfcr[j] = ftp_rqfcr[i];
+		j--;
+		if ((ftp_rqfcr[i] == (RQFCR_PID_PARSE |
+			RQFCR_CLE |RQFCR_AND)) &&
+			(ftp_rqfpr[i] == cmp_rqfpr))
+			break;
+	}
+
+	if (i == MAX_FILER_IDX + 1) {
+		printk(KERN_ERR "No parse rule found, ");
+		printk(KERN_ERR "can't create hash rules\n");
+		return 0;
+	}
+
+	/* If a match was found, then it begins the starting of a cluster rule
+	 * if it was already programmed, we need to overwrite these rules
+	 */
+	for (l = i+1; l < MAX_FILER_IDX; l++) {
+		if ((ftp_rqfcr[l] & RQFCR_CLE) &&
+			!(ftp_rqfcr[l] & RQFCR_AND)) {
+			ftp_rqfcr[l] = RQFCR_CLE | RQFCR_CMP_EXACT |
+				RQFCR_HASHTBL_0 | RQFCR_PID_MASK;
+			ftp_rqfpr[l] = FPR_FILER_MASK;
+			gfar_write_filer(priv, l, ftp_rqfcr[l], ftp_rqfpr[l]);
+			break;
+		}
+
+		if (!(ftp_rqfcr[l] & RQFCR_CLE) && (ftp_rqfcr[l] & RQFCR_AND))
+			continue;
+		else {
+			local_rqfpr[j] = ftp_rqfpr[l];
+			local_rqfcr[j] = ftp_rqfcr[l];
+			j--;
+		}
+	}
+
+	priv->cur_filer_idx = l - 1;
+	last_rule_idx = l;
+
+	/* hash rules */
+	ethflow_to_filer_rules(priv, ethflow);
+
+	/* Write back the popped out rules again */
+	for (k = j+1; k < MAX_FILER_IDX; k++) {
+		ftp_rqfpr[priv->cur_filer_idx] = local_rqfpr[k];
+		ftp_rqfcr[priv->cur_filer_idx] = local_rqfcr[k];
+		gfar_write_filer(priv, priv->cur_filer_idx,
+				local_rqfcr[k], local_rqfpr[k]);
+		if (!priv->cur_filer_idx)
+			break;
+		priv->cur_filer_idx = priv->cur_filer_idx - 1;
+	}
+
+	return 1;
+}
+
+static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd)
+{
+	u64 class;
+
+	if (!gfar_ethflow_to_class(cmd->flow_type, &class))
+		return -EINVAL;
+
+	if (class < CLASS_CODE_USER_PROG1 ||
+			class > CLASS_CODE_SCTP_IPV6)
+		return -EINVAL;
+
+	/* write the filer rules here */
+	if (!gfar_ethflow_to_filer_table(priv, cmd->data, cmd->flow_type))
+		return -1;
+
+	return 0;
+}
+
+static int gfar_set_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	int ret = 0;
+
+	switch(cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = gfar_set_hash_opts(priv, cmd);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
 const struct ethtool_ops gfar_ethtool_ops = {
 	.get_settings = gfar_gsettings,
 	.set_settings = gfar_ssettings,
@@ -670,4 +905,5 @@ const struct ethtool_ops gfar_ethtool_ops = {
 	.get_wol = gfar_get_wol,
 	.set_wol = gfar_set_wol,
 #endif
+	.set_rxnfc = gfar_set_nfc,
 };
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index eb1a48da2d43..edd03b79e8f8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -674,6 +674,8 @@ struct ethtool_ops {
 #define	AH_V6_FLOW	0x0b
 #define	ESP_V6_FLOW	0x0c
 #define	IP_USER_FLOW	0x0d
+#define IPV4_FLOW       0x10
+#define IPV6_FLOW       0x11
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
-- 
cgit v1.2.3


From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 25 Sep 2009 12:25:56 +0200
Subject: perf/core: Provide a kernel-internal interface to get to performance
 counters

There are reasons for kernel code to ask for, and use, performance
counters.
For example, in CPU freq governors this tends to be a good idea, but
there are other examples possible as well of course.

This patch adds the needed bits to do enable this functionality; they
have been tested in an experimental cpufreq driver that I'm working on,
and the changes are all that I needed to access counters properly.

[fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so
that we can profile a particular task too

TODO: Have a better error reporting, don't just return NULL in fail
case.]

v2: Remove the wrong comment about the fact
    perf_event_create_kernel_counter must be called from a kernel
    thread.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: "K.Prasad" <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Avi Kivity <avi@redhat.com>
LKML-Reference: <20090925122556.2f8bd939@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/perf_event.h |  6 ++++
 kernel/perf_event.c        | 75 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index df9d964c15fc..fa151d49a2ee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_event_context *ctx, int cpu);
 extern void perf_event_update_userpage(struct perf_event *event);
+extern int perf_event_release_kernel(struct perf_event *event);
+extern struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr,
+				int cpu,
+				pid_t pid);
+extern u64 perf_event_read_value(struct perf_event *event);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 12b5ec39bf97..02d4ff041b01 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1725,6 +1725,26 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+int perf_event_release_kernel(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_event_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&event->owner->perf_event_mutex);
+	list_del_init(&event->owner_entry);
+	mutex_unlock(&event->owner->perf_event_mutex);
+	put_task_struct(event->owner);
+
+	free_event(event);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
 static int perf_event_read_size(struct perf_event *event)
 {
 	int entry = sizeof(u64); /* value */
@@ -1750,7 +1770,7 @@ static int perf_event_read_size(struct perf_event *event)
 	return size;
 }
 
-static u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event)
 {
 	struct perf_event *child;
 	u64 total = 0;
@@ -1761,6 +1781,7 @@ static u64 perf_event_read_value(struct perf_event *event)
 
 	return total;
 }
+EXPORT_SYMBOL_GPL(perf_event_read_value);
 
 static int perf_event_read_entry(struct perf_event *event,
 				   u64 read_format, char __user *buf)
@@ -4638,6 +4659,58 @@ err_put_context:
 	return err;
 }
 
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+				 pid_t pid)
+{
+	struct perf_event *event;
+	struct perf_event_context *ctx;
+	int err;
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return NULL ;
+
+	event = perf_event_alloc(attr, cpu, ctx, NULL,
+				     NULL, GFP_KERNEL);
+	err = PTR_ERR(event);
+	if (IS_ERR(event))
+		goto err_put_context;
+
+	event->filp = NULL;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return event;
+
+err_put_context:
+	if (err < 0)
+		put_ctx(ctx);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
 /*
  * inherit a event from parent task to child task:
  */
-- 
cgit v1.2.3


From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 18 Oct 2009 15:33:50 +0200
Subject: perf/core: Add a callback to perf events

A simple callback in a perf event can be used for multiple purposes.
For example it is useful for triggered based events like hardware
breakpoints that need a callback to dispatch a triggered breakpoint
event.

v2: Simplify a bit the callback attribution as suggested by Paul
    Mackerras

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "K.Prasad" <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/perf_event.h |  7 ++++++-
 kernel/perf_event.c        | 14 ++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fa151d49a2ee..8d54e6d25eeb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -544,6 +544,8 @@ struct perf_pending_entry {
 	void (*func)(struct perf_pending_entry *);
 };
 
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -639,6 +641,8 @@ struct perf_event {
 	struct event_filter		*filter;
 #endif
 
+	perf_callback_t			callback;
+
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
-				pid_t pid);
+				pid_t pid,
+				perf_callback_t callback);
 extern u64 perf_event_read_value(struct perf_event *event);
 
 struct perf_sample_data {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 02d4ff041b01..5087125e2a00 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4293,6 +4293,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		   struct perf_event_context *ctx,
 		   struct perf_event *group_leader,
 		   struct perf_event *parent_event,
+		   perf_callback_t callback,
 		   gfp_t gfpflags)
 {
 	const struct pmu *pmu;
@@ -4335,6 +4336,11 @@ perf_event_alloc(struct perf_event_attr *attr,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
+	if (!callback && parent_event)
+		callback = parent_event->callback;
+	
+	event->callback	= callback;
+
 	if (attr->disabled)
 		event->state = PERF_EVENT_STATE_OFF;
 
@@ -4611,7 +4617,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
+				     NULL, NULL, GFP_KERNEL);
 	err = PTR_ERR(event);
 	if (IS_ERR(event))
 		goto err_put_context;
@@ -4668,7 +4674,7 @@ err_put_context:
  */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-				 pid_t pid)
+				 pid_t pid, perf_callback_t callback)
 {
 	struct perf_event *event;
 	struct perf_event_context *ctx;
@@ -4683,7 +4689,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		return NULL ;
 
 	event = perf_event_alloc(attr, cpu, ctx, NULL,
-				     NULL, GFP_KERNEL);
+				     NULL, callback, GFP_KERNEL);
 	err = PTR_ERR(event);
 	if (IS_ERR(event))
 		goto err_put_context;
@@ -4736,7 +4742,7 @@ inherit_event(struct perf_event *parent_event,
 	child_event = perf_event_alloc(&parent_event->attr,
 					   parent_event->cpu, child_ctx,
 					   group_leader, parent_event,
-					   GFP_KERNEL);
+					   NULL, GFP_KERNEL);
 	if (IS_ERR(child_event))
 		return child_event;
 	get_ctx(child_ctx);
-- 
cgit v1.2.3


From 1477b6a7edd9ffa7bba4f9779ce9a76ce92761ed Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:14:16 +0900
Subject: sched: Remove unused __schedule() declaration

__schedule() had been removed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF129C8.3030008@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 kernel/kgdb.c         | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..f18102c4d0b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,7 +349,6 @@ extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
-asmlinkage void __schedule(void);
 asmlinkage void schedule(void);
 extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
 
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 9147a3190c9d..7d7014634022 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
 
 	/*
 	 * All threads that don't have debuggerinfo should be
-	 * in __schedule() sleeping, since all other CPUs
+	 * in schedule() sleeping, since all other CPUs
 	 * are in kgdb_wait, and thus have debuggerinfo.
 	 */
 	if (local_debuggerinfo) {
-- 
cgit v1.2.3


From 2a2bb3142d326bb28b03875cabfc49baaac9a14a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:16:10 +0900
Subject: sched: Remove unused time_sync_thresh declaration

time_sync_thresh had been removed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF12A3A.5050200@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f18102c4d0b8..754b3deed02b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -171,8 +171,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 }
 #endif
 
-extern unsigned long long time_sync_thresh;
-
 /*
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
-- 
cgit v1.2.3


From 9824a2b728b63e7ff586b9fd9293c819be79f0f3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:16:54 +0900
Subject: sched: Remove unused cpu_nr_migrations()

cpu_nr_migrations() is not used, remove it.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 -
 kernel/sched.c        | 11 -----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 754b3deed02b..dfc21fb76bf1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
 
 
 extern void calc_global_load(void);
-extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 67be4d0dddaa..30fd0ba5f603 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -541,7 +541,6 @@ struct rq {
 	struct load_weight load;
 	unsigned long nr_load_updates;
 	u64 nr_switches;
-	u64 nr_migrations_in;
 
 	struct cfs_rq cfs;
 	struct rt_rq rt;
@@ -2049,7 +2048,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 #endif
 	if (old_cpu != new_cpu) {
 		p->se.nr_migrations++;
-		new_rq->nr_migrations_in++;
 #ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
@@ -2988,15 +2986,6 @@ static void calc_load_account_active(struct rq *this_rq)
 	}
 }
 
-/*
- * Externally visible per-cpu scheduler statistics:
- * cpu_nr_migrations(cpu) - number of migrations into that cpu
- */
-u64 cpu_nr_migrations(int cpu)
-{
-	return cpu_rq(cpu)->nr_migrations_in;
-}
-
 /*
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
-- 
cgit v1.2.3


From acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 3 Nov 2009 14:53:40 +1030
Subject: cpumask: Partition_sched_domains takes array of cpumask_var_t

Currently partition_sched_domains() takes a 'struct cpumask
*doms_new' which is a kmalloc'ed array of cpumask_t.  You can't
have such an array if 'struct cpumask' is undefined, as we plan
for CONFIG_CPUMASK_OFFSTACK=y.

So, we make this an array of cpumask_var_t instead: this is the
same for the CONFIG_CPUMASK_OFFSTACK=n case, but requires
multiple allocations for the CONFIG_CPUMASK_OFFSTACK=y case.
Hence we add alloc_sched_domains() and free_sched_domains()
functions.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <200911031453.40668.rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  8 ++++--
 kernel/cpuset.c       | 19 +++++++-------
 kernel/sched.c        | 68 ++++++++++++++++++++++++++++++++++-----------------
 3 files changed, 61 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dfc21fb76bf1..78ba664474f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1009,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 	return to_cpumask(sd->span);
 }
 
-extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 				    struct sched_domain_attr *dattr_new);
 
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
 /* Test a flag in parent sched domain */
 static inline int test_sd_parent(struct sched_domain *sd, int flag)
 {
@@ -1029,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			struct sched_domain_attr *dattr_new)
 {
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d247381e7371..3cf2183b472d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
  *	element of the partition (one sched domain) to be passed to
  *	partition_sched_domains().
  */
-/* FIXME: see the FIXME in partition_sched_domains() */
-static int generate_sched_domains(struct cpumask **domains,
+static int generate_sched_domains(cpumask_var_t **domains,
 			struct sched_domain_attr **attributes)
 {
 	LIST_HEAD(q);		/* queue of cpusets to be scanned */
@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
 	struct cpuset **csa;	/* array of all cpuset ptrs */
 	int csn;		/* how many cpuset ptrs in csa so far */
 	int i, j, k;		/* indices for partition finding loops */
-	struct cpumask *doms;	/* resulting partition; i.e. sched domains */
+	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
 
 	/* Special case for the 99% of systems with one, full, sched domain */
 	if (is_sched_load_balance(&top_cpuset)) {
-		doms = kmalloc(cpumask_size(), GFP_KERNEL);
+		ndoms = 1;
+		doms = alloc_sched_domains(ndoms);
 		if (!doms)
 			goto done;
 
@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
 			*dattr = SD_ATTR_INIT;
 			update_domain_attr_tree(dattr, &top_cpuset);
 		}
-		cpumask_copy(doms, top_cpuset.cpus_allowed);
+		cpumask_copy(doms[0], top_cpuset.cpus_allowed);
 
-		ndoms = 1;
 		goto done;
 	}
 
@@ -636,7 +635,7 @@ restart:
 	 * Now we know how many domains to create.
 	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
 	 */
-	doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
+	doms = alloc_sched_domains(ndoms);
 	if (!doms)
 		goto done;
 
@@ -656,7 +655,7 @@ restart:
 			continue;
 		}
 
-		dp = doms + nslot;
+		dp = doms[nslot];
 
 		if (nslot == ndoms) {
 			static int warnings = 10;
@@ -718,7 +717,7 @@ done:
 static void do_rebuild_sched_domains(struct work_struct *unused)
 {
 	struct sched_domain_attr *attr;
-	struct cpumask *doms;
+	cpumask_var_t *doms;
 	int ndoms;
 
 	get_online_cpus();
@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 				unsigned long phase, void *unused_cpu)
 {
 	struct sched_domain_attr *attr;
-	struct cpumask *doms;
+	cpumask_var_t *doms;
 	int ndoms;
 
 	switch (phase) {
diff --git a/kernel/sched.c b/kernel/sched.c
index 30fd0ba5f603..ae026aad145b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8846,7 +8846,7 @@ static int build_sched_domains(const struct cpumask *cpu_map)
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static struct cpumask *doms_cur;	/* current sched domains */
+static cpumask_var_t *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
@@ -8868,6 +8868,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
 	return 0;
 }
 
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
+{
+	int i;
+	cpumask_var_t *doms;
+
+	doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
+	if (!doms)
+		return NULL;
+	for (i = 0; i < ndoms; i++) {
+		if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
+			free_sched_domains(doms, i);
+			return NULL;
+		}
+	}
+	return doms;
+}
+
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
+{
+	unsigned int i;
+	for (i = 0; i < ndoms; i++)
+		free_cpumask_var(doms[i]);
+	kfree(doms);
+}
+
 /*
  * Set up scheduler domains and groups. Callers must hold the hotplug lock.
  * For now this just excludes isolated cpus, but could be used to
@@ -8879,12 +8904,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
+	doms_cur = alloc_sched_domains(ndoms_cur);
 	if (!doms_cur)
-		doms_cur = fallback_doms;
-	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
+		doms_cur = &fallback_doms;
+	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
-	err = build_sched_domains(doms_cur);
+	err = build_sched_domains(doms_cur[0]);
 	register_sched_domain_sysctl();
 
 	return err;
@@ -8934,19 +8959,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
  * current 'doms_cur' domains and in the new 'doms_new', we can leave
  * it as it is.
  *
- * The passed in 'doms_new' should be kmalloc'd. This routine takes
- * ownership of it and will kfree it when done with it. If the caller
- * failed the kmalloc call, then it can pass in doms_new == NULL &&
- * ndoms_new == 1, and partition_sched_domains() will fallback to
- * the single partition 'fallback_doms', it also forces the domains
- * to be rebuilt.
+ * The passed in 'doms_new' should be allocated using
+ * alloc_sched_domains.  This routine takes ownership of it and will
+ * free_sched_domains it when done with it. If the caller failed the
+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
+ * and partition_sched_domains() will fallback to the single partition
+ * 'fallback_doms', it also forces the domains to be rebuilt.
  *
  * If doms_new == NULL it will be replaced with cpu_online_mask.
  * ndoms_new == 0 is a special case for destroying existing domains,
@@ -8954,8 +8979,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  *
  * Call with hotplug lock held
  */
-/* FIXME: Change to struct cpumask *doms_new[] */
-void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -8974,40 +8998,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(&doms_cur[i], &doms_new[j])
+			if (cpumask_equal(doms_cur[i], doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
 		/* no match - a current sched domain not in new doms_new[] */
-		detach_destroy_domains(doms_cur + i);
+		detach_destroy_domains(doms_cur[i]);
 match1:
 		;
 	}
 
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
-		doms_new = fallback_doms;
-		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
+		doms_new = &fallback_doms;
+		cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur && !new_topology; j++) {
-			if (cpumask_equal(&doms_new[i], &doms_cur[j])
+			if (cpumask_equal(doms_new[i], doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
 		/* no match - add a new doms_new */
-		__build_sched_domains(doms_new + i,
+		__build_sched_domains(doms_new[i],
 					dattr_new ? dattr_new + i : NULL);
 match2:
 		;
 	}
 
 	/* Remember the new sched domains */
-	if (doms_cur != fallback_doms)
-		kfree(doms_cur);
+	if (doms_cur != &fallback_doms)
+		free_sched_domains(doms_cur, ndoms_cur);
 	kfree(dattr_cur);	/* kfree(NULL) is safe */
 	doms_cur = doms_new;
 	dattr_cur = dattr_new;
-- 
cgit v1.2.3


From 663e69592856df53ef52969482ef413a96bc4e06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Nov 2009 14:22:21 +0100
Subject: irq: Remove unused debug_poll_all_shared_irqs()

commit 74296a8ed added this function for debug purposes, but it was
never used for anything. Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  6 ------
 kernel/irq/spurious.c     | 14 +-------------
 2 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 7ca72b74eec7..75f3f00ac1e5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -603,12 +603,6 @@ static inline void init_irq_proc(void)
 }
 #endif
 
-#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
-extern void debug_poll_all_shared_irqs(void);
-#else
-static inline void debug_poll_all_shared_irqs(void) { }
-#endif
-
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 114e704760fe..8996b98f9eb2 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
 	return ok;
 }
 
-static void poll_all_shared_irqs(void)
+static void poll_spurious_irqs(unsigned long dummy)
 {
 	struct irq_desc *desc;
 	int i;
@@ -123,23 +123,11 @@ static void poll_all_shared_irqs(void)
 
 		try_one_irq(i, desc);
 	}
-}
-
-static void poll_spurious_irqs(unsigned long dummy)
-{
-	poll_all_shared_irqs();
 
 	mod_timer(&poll_spurious_irq_timer,
 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
-#ifdef CONFIG_DEBUG_SHIRQ
-void debug_poll_all_shared_irqs(void)
-{
-	poll_all_shared_irqs();
-}
-#endif
-
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
-- 
cgit v1.2.3


From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Nov 2009 05:43:23 -0800
Subject: net: Introduce for_each_netdev_rcu() iterator

Adds RCU management to the list of netdevices.

Convert some for_each_netdev() users to RCU version, if
it can avoid read_lock-ing dev_base_lock

Ie:
	read_lock(&dev_base_loack);
	for_each_netdev(net, dev)
		some_action();
	read_unlock(&dev_base_lock);

becomes :

	rcu_read_lock();
	for_each_netdev_rcu(net, dev)
		some_action();
	rcu_read_unlock();


Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 30 ++++++++++++++++--------------
 net/decnet/af_decnet.c    |  6 +++---
 net/decnet/dn_fib.c       |  6 +++---
 net/decnet/dn_route.c     |  6 +++---
 net/ipv4/devinet.c        | 30 +++++++++++-------------------
 net/ipv6/addrconf.c       | 20 +++++++-------------
 net/ipv6/anycast.c        |  6 +++---
 net/netrom/nr_route.c     | 15 ++++++++-------
 net/rose/rose_route.c     | 18 +++++++++---------
 net/sctp/protocol.c       |  6 +++---
 11 files changed, 68 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bcf1083857fc..5077de028317 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1081,6 +1081,8 @@ extern rwlock_t				dev_base_lock;		/* Device list lock */
 
 #define for_each_netdev(net, d)		\
 		list_for_each_entry(d, &(net)->dev_base_head, dev_list)
+#define for_each_netdev_rcu(net, d)		\
+		list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_safe(net, d, n)	\
 		list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_continue(net, d)		\
diff --git a/net/core/dev.c b/net/core/dev.c
index 76a1502efe67..bf629ac08b87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -175,7 +175,7 @@ static struct list_head ptype_all __read_mostly;	/* Taps */
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
- * Pure readers hold dev_base_lock for reading.
+ * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
  *
  * Writers must hold the rtnl semaphore while they loop through the
  * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -212,7 +212,7 @@ static int list_netdevice(struct net_device *dev)
 	ASSERT_RTNL();
 
 	write_lock_bh(&dev_base_lock);
-	list_add_tail(&dev->dev_list, &net->dev_base_head);
+	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	hlist_add_head_rcu(&dev->index_hlist,
 			   dev_index_hash(net, dev->ifindex));
@@ -229,7 +229,7 @@ static void unlist_netdevice(struct net_device *dev)
 
 	/* Unlink dev from the device chain */
 	write_lock_bh(&dev_base_lock);
-	list_del(&dev->dev_list);
+	list_del_rcu(&dev->dev_list);
 	hlist_del_rcu(&dev->name_hlist);
 	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
@@ -799,15 +799,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
 	struct net_device *dev, *ret;
 
 	ret = NULL;
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(dev_get_by_flags);
@@ -3077,18 +3077,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
  *	in detail.
  */
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
 	struct net *net = seq_file_net(seq);
 	loff_t off;
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	if (!*pos)
 		return SEQ_START_TOKEN;
 
 	off = 1;
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (off++ == *pos)
 			return dev;
 
@@ -3097,16 +3097,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net *net = seq_file_net(seq);
+	struct net_device *dev = (v == SEQ_START_TOKEN) ?
+				  first_net_device(seq_file_net(seq)) :
+				  next_net_device((struct net_device *)v);
+
 	++*pos;
-	return v == SEQ_START_TOKEN ?
-		first_net_device(net) : next_net_device((struct net_device *)v);
+	return rcu_dereference(dev);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 664965c87e16..2e355841ca99 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -749,9 +749,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	if (!(saddr->sdn_flags & SDF_WILD)) {
 		if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
-			read_lock(&dev_base_lock);
+			rcu_read_lock();
 			ldev = NULL;
-			for_each_netdev(&init_net, dev) {
+			for_each_netdev_rcu(&init_net, dev) {
 				if (!dev->dn_ptr)
 					continue;
 				if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
@@ -759,7 +759,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 					break;
 				}
 			}
-			read_unlock(&dev_base_lock);
+			rcu_read_unlock();
 			if (ldev == NULL)
 				return -EADDRNOTAVAIL;
 		}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 27ea2e9b080a..fd641f65e092 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -607,8 +607,8 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
 	ASSERT_RTNL();
 
 	/* Scan device list */
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		dn_db = dev->dn_ptr;
 		if (dn_db == NULL)
 			continue;
@@ -619,7 +619,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
 			}
 		}
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	if (found_it == 0) {
 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 57662cabaf9b..860286a3921b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -908,8 +908,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
 			dev_put(dev_out);
 			goto out;
 		}
-		read_lock(&dev_base_lock);
-		for_each_netdev(&init_net, dev) {
+		rcu_read_lock();
+		for_each_netdev_rcu(&init_net, dev) {
 			if (!dev->dn_ptr)
 				continue;
 			if (!dn_dev_islocal(dev, oldflp->fld_src))
@@ -922,7 +922,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
 			dev_out = dev;
 			break;
 		}
-		read_unlock(&dev_base_lock);
+		rcu_read_unlock();
 		if (dev_out == NULL)
 			goto out;
 		dev_hold(dev_out);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ccccaae50b20..8aa7a134c1f1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -876,19 +876,16 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 		if (!addr)
 			addr = ifa->ifa_local;
 	} endfor_ifa(in_dev);
-no_in_dev:
-	rcu_read_unlock();
 
+no_in_dev:
 	if (addr)
-		goto out;
+		goto out_unlock;
 
 	/* Not loopback addresses on loopback should be preferred
 	   in this case. It is importnat that lo is the first interface
 	   in dev_base list.
 	 */
-	read_lock(&dev_base_lock);
-	rcu_read_lock();
-	for_each_netdev(net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
 			continue;
 
@@ -896,12 +893,11 @@ no_in_dev:
 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
 			    ifa->ifa_scope <= scope) {
 				addr = ifa->ifa_local;
-				goto out_unlock_both;
+				goto out_unlock;
 			}
 		} endfor_ifa(in_dev);
 	}
-out_unlock_both:
-	read_unlock(&dev_base_lock);
+out_unlock:
 	rcu_read_unlock();
 out:
 	return addr;
@@ -962,9 +958,8 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
 		return confirm_addr_indev(in_dev, dst, local, scope);
 
 	net = dev_net(in_dev->dev);
-	read_lock(&dev_base_lock);
 	rcu_read_lock();
-	for_each_netdev(net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		if ((in_dev = __in_dev_get_rcu(dev))) {
 			addr = confirm_addr_indev(in_dev, dst, local, scope);
 			if (addr)
@@ -972,7 +967,6 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
 		}
 	}
 	rcu_read_unlock();
-	read_unlock(&dev_base_lock);
 
 	return addr;
 }
@@ -1240,18 +1234,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		struct in_device *in_dev;
-		rcu_read_lock();
+
 		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev && !test_bit(i, in_dev->cnf.state))
 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
-		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
+/* called with RTNL locked */
 static void inet_forward_change(struct net *net)
 {
 	struct net_device *dev;
@@ -1260,7 +1254,6 @@ static void inet_forward_change(struct net *net)
 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
 
-	read_lock(&dev_base_lock);
 	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
 		if (on)
@@ -1271,7 +1264,6 @@ static void inet_forward_change(struct net *net)
 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
 		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
 }
 
 static int devinet_conf_proc(ctl_table *ctl, int write,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 918648409612..024bba30de21 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
-		rcu_read_lock();
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 			if (changed)
 				dev_forward_change(idev);
 		}
-		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
@@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 	hiscore->rule = -1;
 	hiscore->ifa = NULL;
 
-	read_lock(&dev_base_lock);
 	rcu_read_lock();
 
-	for_each_netdev(net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		struct inet6_dev *idev;
 
 		/* Candidate Source Address (section 4)
@@ -1235,7 +1232,6 @@ try_nextdev:
 		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
-	read_unlock(&dev_base_lock);
 
 	if (!hiscore->ifa)
 		return -EADDRNOTAVAIL;
@@ -4052,9 +4048,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
-		rcu_read_lock();
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -4062,9 +4057,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
 			if (changed)
 				dev_disable_change(idev);
 		}
-		rcu_read_unlock();
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1ae58bec1de0..2f00ca83f049 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 
 	if (dev)
 		return ipv6_chk_acast_dev(dev, addr);
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev)
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
 		if (ipv6_chk_acast_dev(dev, addr)) {
 			found = 1;
 			break;
 		}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return found;
 }
 
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 4eb1ac9a7679..aacba76070fc 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -597,15 +597,15 @@ struct net_device *nr_dev_first(void)
 {
 	struct net_device *dev, *first = NULL;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
 				first = dev;
 	}
 	if (first)
 		dev_hold(first);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	return first;
 }
@@ -617,16 +617,17 @@ struct net_device *nr_dev_get(ax25_address *addr)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
-		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM &&
+		    ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
 			dev_hold(dev);
 			goto out;
 		}
 	}
 	dev = NULL;
 out:
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
 
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 9478d9b3d977..d936226916f2 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -600,13 +600,13 @@ struct net_device *rose_dev_first(void)
 {
 	struct net_device *dev, *first = NULL;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
 				first = dev;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	return first;
 }
@@ -618,8 +618,8 @@ struct net_device *rose_dev_get(rose_address *addr)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
 			dev_hold(dev);
 			goto out;
@@ -627,7 +627,7 @@ struct net_device *rose_dev_get(rose_address *addr)
 	}
 	dev = NULL;
 out:
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
 
@@ -635,14 +635,14 @@ static int rose_dev_exists(rose_address *addr)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
 			goto out;
 	}
 	dev = NULL;
 out:
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev != NULL;
 }
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d9f4cc2c7869..fe44c57101de 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -205,14 +205,14 @@ static void sctp_get_local_addr_list(void)
 	struct list_head *pos;
 	struct sctp_af *af;
 
-	read_lock(&dev_base_lock);
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		__list_for_each(pos, &sctp_address_families) {
 			af = list_entry(pos, struct sctp_af, list);
 			af->copy_addrlist(&sctp_local_addr_list, dev);
 		}
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 /* Free the existing local addresses.  */
-- 
cgit v1.2.3


From 89e1838f5f2c2af80268a096b9a687643b0d0846 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 21 Sep 2009 10:46:22 +0200
Subject: change default: by default, use socket buffer auto tuning

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 9d067ce46960..51f47a586ad8 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -70,11 +70,11 @@
   /* I don't think that a tcp send buffer of more than 10M is usefull */
 #define DRBD_SNDBUF_SIZE_MIN  0
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
-#define DRBD_SNDBUF_SIZE_DEF  (2*65535)
+#define DRBD_SNDBUF_SIZE_DEF  0
 
 #define DRBD_RCVBUF_SIZE_MIN  0
 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
-#define DRBD_RCVBUF_SIZE_DEF  (2*65535)
+#define DRBD_RCVBUF_SIZE_DEF  0
 
   /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
-- 
cgit v1.2.3


From ed814525f2e45188964c270fc3a5a0b644f7e4a9 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 27 Oct 2009 12:37:14 +0100
Subject: Now it is equal to DRBD release 8.3.5 without compat crap

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 233db5c18b86..18942ad115d9 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.3rc2"
+#define REL_VERSION "8.3.5"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From d4ac42a582e46d7f86f0acb4253a310423c72c4c Mon Sep 17 00:00:00 2001
From: Kristoffer Glembo <kristoffer@gaisler.com>
Date: Wed, 4 Nov 2009 08:39:46 -0800
Subject: sparc: Support for GRLIB APBUART serial port

This patch adds support for the APBUART serial port from Aeroflex
Gaisler's IP library GRLIB. It is currently used in all LEON3 designs
(SPARC V8) but can be used on other platforms as well (which support OF).

Signed-off-by: Kristoffer Glembo <kristoffer@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/serial/Kconfig      |  13 +
 drivers/serial/Makefile     |   1 +
 drivers/serial/apbuart.c    | 710 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/serial/apbuart.h    |  64 ++++
 include/linux/serial_core.h |   3 +
 5 files changed, 791 insertions(+)
 create mode 100644 drivers/serial/apbuart.c
 create mode 100644 drivers/serial/apbuart.h

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index e52257257279..50943ff78f4b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1477,4 +1477,17 @@ config SERIAL_BCM63XX_CONSOLE
 	  If you have enabled the serial port on the bcm63xx CPU
 	  you can make it the console by answering Y to this option.
 
+config SERIAL_GRLIB_GAISLER_APBUART
+	tristate "GRLIB APBUART serial support"
+	depends on OF
+	---help---
+	Add support for the GRLIB APBUART serial port.
+
+config SERIAL_GRLIB_GAISLER_APBUART_CONSOLE
+	bool "Console on GRLIB APBUART serial port"
+	depends on SERIAL_GRLIB_GAISLER_APBUART=y
+	select SERIAL_CORE_CONSOLE
+	help
+	Support for running a console on the GRLIB APBUART
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d21d5dd5d048..5548fe7df61d 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -81,3 +81,4 @@ obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
 obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
+obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
diff --git a/drivers/serial/apbuart.c b/drivers/serial/apbuart.c
new file mode 100644
index 000000000000..8a343045131f
--- /dev/null
+++ b/drivers/serial/apbuart.c
@@ -0,0 +1,710 @@
+/*
+ *  Driver for GRLIB serial ports (APBUART)
+ *
+ *  Based on linux/drivers/serial/amba.c
+ *
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2003 Konrad Eisele <eiselekd@web.de>
+ *  Copyright (C) 2006 Daniel Hellstrom <daniel@gaisler.com>, Aeroflex Gaisler AB
+ *  Copyright (C) 2008 Gilead Kutnick <kutnickg@zin-tech.com>
+ *  Copyright (C) 2009 Kristoffer Glembo <kristoffer@gaisler.com>, Aeroflex Gaisler AB
+ */
+
+#if defined(CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/kthread.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/serial_core.h>
+#include <asm/irq.h>
+#include <asm/oplib.h>
+
+#include "apbuart.h"
+
+#define SERIAL_APBUART_MAJOR	TTY_MAJOR
+#define SERIAL_APBUART_MINOR	64
+#define UART_DUMMY_RSR_RX	0x8000	/* for ignore all read */
+
+static void apbuart_tx_chars(struct uart_port *port);
+
+static void apbuart_stop_tx(struct uart_port *port)
+{
+	unsigned int cr;
+
+	cr = UART_GET_CTRL(port);
+	cr &= ~UART_CTRL_TI;
+	UART_PUT_CTRL(port, cr);
+}
+
+static void apbuart_start_tx(struct uart_port *port)
+{
+	unsigned int cr;
+
+	cr = UART_GET_CTRL(port);
+	cr |= UART_CTRL_TI;
+	UART_PUT_CTRL(port, cr);
+
+	if (UART_GET_STATUS(port) & UART_STATUS_THE)
+		apbuart_tx_chars(port);
+}
+
+static void apbuart_stop_rx(struct uart_port *port)
+{
+	unsigned int cr;
+
+	cr = UART_GET_CTRL(port);
+	cr &= ~(UART_CTRL_RI);
+	UART_PUT_CTRL(port, cr);
+}
+
+static void apbuart_enable_ms(struct uart_port *port)
+{
+	/* No modem status change interrupts for APBUART */
+}
+
+static void apbuart_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = port->state->port.tty;
+	unsigned int status, ch, rsr, flag;
+	unsigned int max_chars = port->fifosize;
+
+	status = UART_GET_STATUS(port);
+
+	while (UART_RX_DATA(status) && (max_chars--)) {
+
+		ch = UART_GET_CHAR(port);
+		flag = TTY_NORMAL;
+
+		port->icount.rx++;
+
+		rsr = UART_GET_STATUS(port) | UART_DUMMY_RSR_RX;
+		UART_PUT_STATUS(port, 0);
+		if (rsr & UART_STATUS_ERR) {
+
+			if (rsr & UART_STATUS_BR) {
+				rsr &= ~(UART_STATUS_FE | UART_STATUS_PE);
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					goto ignore_char;
+			} else if (rsr & UART_STATUS_PE) {
+				port->icount.parity++;
+			} else if (rsr & UART_STATUS_FE) {
+				port->icount.frame++;
+			}
+			if (rsr & UART_STATUS_OE)
+				port->icount.overrun++;
+
+			rsr &= port->read_status_mask;
+
+			if (rsr & UART_STATUS_PE)
+				flag = TTY_PARITY;
+			else if (rsr & UART_STATUS_FE)
+				flag = TTY_FRAME;
+		}
+
+		if (uart_handle_sysrq_char(port, ch))
+			goto ignore_char;
+
+		uart_insert_char(port, rsr, UART_STATUS_OE, ch, flag);
+
+
+	      ignore_char:
+		status = UART_GET_STATUS(port);
+	}
+
+	tty_flip_buffer_push(tty);
+}
+
+static void apbuart_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+	int count;
+
+	if (port->x_char) {
+		UART_PUT_CHAR(port, port->x_char);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		apbuart_stop_tx(port);
+		return;
+	}
+
+	/* amba: fill FIFO */
+	count = port->fifosize >> 1;
+	do {
+		UART_PUT_CHAR(port, xmit->buf[xmit->tail]);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		apbuart_stop_tx(port);
+}
+
+static irqreturn_t apbuart_int(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	unsigned int status;
+
+	spin_lock(&port->lock);
+
+	status = UART_GET_STATUS(port);
+	if (status & UART_STATUS_DR)
+		apbuart_rx_chars(port);
+	if (status & UART_STATUS_THE)
+		apbuart_tx_chars(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int apbuart_tx_empty(struct uart_port *port)
+{
+	unsigned int status = UART_GET_STATUS(port);
+	return status & UART_STATUS_THE ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int apbuart_get_mctrl(struct uart_port *port)
+{
+	/* The GRLIB APBUART handles flow control in hardware */
+	return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+static void apbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	/* The GRLIB APBUART handles flow control in hardware */
+}
+
+static void apbuart_break_ctl(struct uart_port *port, int break_state)
+{
+	/* We don't support sending break */
+}
+
+static int apbuart_startup(struct uart_port *port)
+{
+	int retval;
+	unsigned int cr;
+
+	/* Allocate the IRQ */
+	retval = request_irq(port->irq, apbuart_int, 0, "apbuart", port);
+	if (retval)
+		return retval;
+
+	/* Finally, enable interrupts */
+	cr = UART_GET_CTRL(port);
+	UART_PUT_CTRL(port,
+		      cr | UART_CTRL_RE | UART_CTRL_TE |
+		      UART_CTRL_RI | UART_CTRL_TI);
+
+	return 0;
+}
+
+static void apbuart_shutdown(struct uart_port *port)
+{
+	unsigned int cr;
+
+	/* disable all interrupts, disable the port */
+	cr = UART_GET_CTRL(port);
+	UART_PUT_CTRL(port,
+		      cr & ~(UART_CTRL_RE | UART_CTRL_TE |
+			     UART_CTRL_RI | UART_CTRL_TI));
+
+	/* Free the interrupt */
+	free_irq(port->irq, port);
+}
+
+static void apbuart_set_termios(struct uart_port *port,
+				struct ktermios *termios, struct ktermios *old)
+{
+	unsigned int cr;
+	unsigned long flags;
+	unsigned int baud, quot;
+
+	/* Ask the core to calculate the divisor for us. */
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+	if (baud == 0)
+		panic("invalid baudrate %i\n", port->uartclk / 16);
+
+	/* uart_get_divisor calc a *16 uart freq, apbuart is *8 */
+	quot = (uart_get_divisor(port, baud)) * 2;
+	cr = UART_GET_CTRL(port);
+	cr &= ~(UART_CTRL_PE | UART_CTRL_PS);
+
+	if (termios->c_cflag & PARENB) {
+		cr |= UART_CTRL_PE;
+		if ((termios->c_cflag & PARODD))
+			cr |= UART_CTRL_PS;
+	}
+
+	/* Enable flow control. */
+	if (termios->c_cflag & CRTSCTS)
+		cr |= UART_CTRL_FL;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Update the per-port timeout. */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	port->read_status_mask = UART_STATUS_OE;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= UART_STATUS_FE | UART_STATUS_PE;
+
+	/* Characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= UART_STATUS_FE | UART_STATUS_PE;
+
+	/* Ignore all characters if CREAD is not set. */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= UART_DUMMY_RSR_RX;
+
+	/* Set baud rate */
+	quot -= 1;
+	UART_PUT_SCAL(port, quot);
+	UART_PUT_CTRL(port, cr);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *apbuart_type(struct uart_port *port)
+{
+	return port->type == PORT_APBUART ? "GRLIB/APBUART" : NULL;
+}
+
+static void apbuart_release_port(struct uart_port *port)
+{
+	release_mem_region(port->mapbase, 0x100);
+}
+
+static int apbuart_request_port(struct uart_port *port)
+{
+	return request_mem_region(port->mapbase, 0x100, "grlib-apbuart")
+	    != NULL ? 0 : -EBUSY;
+	return 0;
+}
+
+/* Configure/autoconfigure the port */
+static void apbuart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_APBUART;
+		apbuart_request_port(port);
+	}
+}
+
+/* Verify the new serial_struct (for TIOCSSERIAL) */
+static int apbuart_verify_port(struct uart_port *port,
+			       struct serial_struct *ser)
+{
+	int ret = 0;
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_APBUART)
+		ret = -EINVAL;
+	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+		ret = -EINVAL;
+	if (ser->baud_base < 9600)
+		ret = -EINVAL;
+	return ret;
+}
+
+static struct uart_ops grlib_apbuart_ops = {
+	.tx_empty = apbuart_tx_empty,
+	.set_mctrl = apbuart_set_mctrl,
+	.get_mctrl = apbuart_get_mctrl,
+	.stop_tx = apbuart_stop_tx,
+	.start_tx = apbuart_start_tx,
+	.stop_rx = apbuart_stop_rx,
+	.enable_ms = apbuart_enable_ms,
+	.break_ctl = apbuart_break_ctl,
+	.startup = apbuart_startup,
+	.shutdown = apbuart_shutdown,
+	.set_termios = apbuart_set_termios,
+	.type = apbuart_type,
+	.release_port = apbuart_release_port,
+	.request_port = apbuart_request_port,
+	.config_port = apbuart_config_port,
+	.verify_port = apbuart_verify_port,
+};
+
+static struct uart_port grlib_apbuart_ports[UART_NR];
+static struct device_node *grlib_apbuart_nodes[UART_NR];
+
+static int apbuart_scan_fifo_size(struct uart_port *port, int portnumber)
+{
+	int ctrl, loop = 0;
+	int status;
+	int fifosize;
+	unsigned long flags;
+
+	ctrl = UART_GET_CTRL(port);
+
+	/*
+	 * Enable the transceiver and wait for it to be ready to send data.
+	 * Clear interrupts so that this process will not be externally
+	 * interrupted in the middle (which can cause the transceiver to
+	 * drain prematurely).
+	 */
+
+	local_irq_save(flags);
+
+	UART_PUT_CTRL(port, ctrl | UART_CTRL_TE);
+
+	while (!UART_TX_READY(UART_GET_STATUS(port)))
+		loop++;
+
+	/*
+	 * Disable the transceiver so data isn't actually sent during the
+	 * actual test.
+	 */
+
+	UART_PUT_CTRL(port, ctrl & ~(UART_CTRL_TE));
+
+	fifosize = 1;
+	UART_PUT_CHAR(port, 0);
+
+	/*
+	 * So long as transmitting a character increments the tranceivier FIFO
+	 * length the FIFO must be at least that big. These bytes will
+	 * automatically drain off of the FIFO.
+	 */
+
+	status = UART_GET_STATUS(port);
+	while (((status >> 20) & 0x3F) == fifosize) {
+		fifosize++;
+		UART_PUT_CHAR(port, 0);
+		status = UART_GET_STATUS(port);
+	}
+
+	fifosize--;
+
+	UART_PUT_CTRL(port, ctrl);
+	local_irq_restore(flags);
+
+	if (fifosize == 0)
+		fifosize = 1;
+
+	return fifosize;
+}
+
+static void apbuart_flush_fifo(struct uart_port *port)
+{
+	int i;
+
+	for (i = 0; i < port->fifosize; i++)
+		UART_GET_CHAR(port);
+}
+
+
+/* ======================================================================== */
+/* Console driver, if enabled                                               */
+/* ======================================================================== */
+
+#ifdef CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE
+
+static void apbuart_console_putchar(struct uart_port *port, int ch)
+{
+	unsigned int status;
+	do {
+		status = UART_GET_STATUS(port);
+	} while (!UART_TX_READY(status));
+	UART_PUT_CHAR(port, ch);
+}
+
+static void
+apbuart_console_write(struct console *co, const char *s, unsigned int count)
+{
+	struct uart_port *port = &grlib_apbuart_ports[co->index];
+	unsigned int status, old_cr, new_cr;
+
+	/* First save the CR then disable the interrupts */
+	old_cr = UART_GET_CTRL(port);
+	new_cr = old_cr & ~(UART_CTRL_RI | UART_CTRL_TI);
+	UART_PUT_CTRL(port, new_cr);
+
+	uart_console_write(port, s, count, apbuart_console_putchar);
+
+	/*
+	 *      Finally, wait for transmitter to become empty
+	 *      and restore the TCR
+	 */
+	do {
+		status = UART_GET_STATUS(port);
+	} while (!UART_TX_READY(status));
+	UART_PUT_CTRL(port, old_cr);
+}
+
+static void __init
+apbuart_console_get_options(struct uart_port *port, int *baud,
+			    int *parity, int *bits)
+{
+	if (UART_GET_CTRL(port) & (UART_CTRL_RE | UART_CTRL_TE)) {
+
+		unsigned int quot, status;
+		status = UART_GET_STATUS(port);
+
+		*parity = 'n';
+		if (status & UART_CTRL_PE) {
+			if ((status & UART_CTRL_PS) == 0)
+				*parity = 'e';
+			else
+				*parity = 'o';
+		}
+
+		*bits = 8;
+		quot = UART_GET_SCAL(port) / 8;
+		*baud = port->uartclk / (16 * (quot + 1));
+	}
+}
+
+static int __init apbuart_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 38400;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	pr_debug("apbuart_console_setup co=%p, co->index=%i, options=%s\n",
+		 co, co->index, options);
+
+	/*
+	 * Check whether an invalid uart number has been specified, and
+	 * if so, search for the first available port that does have
+	 * console support.
+	 */
+	if (co->index >= grlib_apbuart_port_nr)
+		co->index = 0;
+
+	port = &grlib_apbuart_ports[co->index];
+
+	spin_lock_init(&port->lock);
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+	else
+		apbuart_console_get_options(port, &baud, &parity, &bits);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver grlib_apbuart_driver;
+
+static struct console grlib_apbuart_console = {
+	.name = "ttyS",
+	.write = apbuart_console_write,
+	.device = uart_console_device,
+	.setup = apbuart_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &grlib_apbuart_driver,
+};
+
+
+static void grlib_apbuart_configure(void);
+
+static int __init apbuart_console_init(void)
+{
+	grlib_apbuart_configure();
+	register_console(&grlib_apbuart_console);
+	return 0;
+}
+
+console_initcall(apbuart_console_init);
+
+#define APBUART_CONSOLE	(&grlib_apbuart_console)
+#else
+#define APBUART_CONSOLE	NULL
+#endif
+
+static struct uart_driver grlib_apbuart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "serial",
+	.dev_name = "ttyS",
+	.major = SERIAL_APBUART_MAJOR,
+	.minor = SERIAL_APBUART_MINOR,
+	.nr = UART_NR,
+	.cons = APBUART_CONSOLE,
+};
+
+
+/* ======================================================================== */
+/* OF Platform Driver                                                       */
+/* ======================================================================== */
+
+static int __devinit apbuart_probe(struct of_device *op,
+				   const struct of_device_id *match)
+{
+	int i = -1;
+	struct uart_port *port = NULL;
+
+	i = 0;
+	for (i = 0; i < grlib_apbuart_port_nr; i++) {
+		if (op->node == grlib_apbuart_nodes[i])
+			break;
+	}
+
+	port = &grlib_apbuart_ports[i];
+	port->dev = &op->dev;
+
+	uart_add_one_port(&grlib_apbuart_driver, (struct uart_port *) port);
+
+	apbuart_flush_fifo((struct uart_port *) port);
+
+	printk(KERN_INFO "grlib-apbuart at 0x%x, irq %d\n",
+	       port->mapbase, port->irq);
+	return 0;
+
+}
+
+static struct of_device_id __initdata apbuart_match[] = {
+	{
+	 .name = "GAISLER_APBUART",
+	 },
+	{},
+};
+
+static struct of_platform_driver grlib_apbuart_of_driver = {
+	.match_table = apbuart_match,
+	.probe = apbuart_probe,
+	.driver = {
+		   .owner = THIS_MODULE,
+		   .name = "grlib-apbuart",
+		   },
+};
+
+
+static void grlib_apbuart_configure(void)
+{
+	static int enum_done;
+	struct device_node *np;
+	struct uart_port *port = NULL;
+
+	int node;
+	int freq_khz;
+	int v = 0, d = 0;
+	unsigned int addr;
+	int irq, line;
+	struct amba_prom_registers *regs;
+
+	if (enum_done)
+		return;
+
+	/* Get bus frequency */
+	node = prom_getchild(prom_root_node);
+	freq_khz = prom_getint(node, "clock-frequency");
+
+	line = 0;
+	for_each_matching_node(np, apbuart_match) {
+
+		int *vendor = (int *) of_get_property(np, "vendor", NULL);
+		int *device = (int *) of_get_property(np, "device", NULL);
+		int *irqs = (int *) of_get_property(np, "interrupts", NULL);
+		regs = (struct amba_prom_registers *)
+		    of_get_property(np, "reg", NULL);
+
+		if (vendor)
+			v = *vendor;
+		if (device)
+			d = *device;
+
+		if (!irqs || !regs)
+			return;
+
+		grlib_apbuart_nodes[line] = np;
+
+		addr = regs->phys_addr;
+		irq = *irqs;
+
+		port = &grlib_apbuart_ports[line];
+
+		port->mapbase = addr;
+		port->membase = ioremap(addr, sizeof(struct grlib_apbuart_regs_map));
+		port->irq = irq;
+		port->iotype = UPIO_MEM;
+		port->ops = &grlib_apbuart_ops;
+		port->flags = UPF_BOOT_AUTOCONF;
+		port->line = line;
+		port->uartclk = freq_khz * 1000;
+		port->fifosize = apbuart_scan_fifo_size((struct uart_port *) port, line);
+		line++;
+
+		/* We support maximum UART_NR uarts ... */
+		if (line == UART_NR)
+			break;
+
+	}
+
+	enum_done = 1;
+
+	grlib_apbuart_driver.nr = grlib_apbuart_port_nr = line;
+}
+
+static int __init grlib_apbuart_init(void)
+{
+	int ret;
+
+	/* Find all APBUARTS in device the tree and initialize their ports */
+	grlib_apbuart_configure();
+
+	printk(KERN_INFO "Serial: GRLIB APBUART driver\n");
+
+	ret = uart_register_driver(&grlib_apbuart_driver);
+
+	if (ret) {
+		printk(KERN_ERR "%s: uart_register_driver failed (%i)\n",
+		       __FILE__, ret);
+		return ret;
+	}
+
+	ret = of_register_driver(&grlib_apbuart_of_driver, &of_platform_bus_type);
+
+	if (ret) {
+		printk(KERN_ERR
+		       "%s: of_register_platform_driver failed (%i)\n",
+		       __FILE__, ret);
+		uart_unregister_driver(&grlib_apbuart_driver);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit grlib_apbuart_exit(void)
+{
+	int i;
+
+	for (i = 0; i < grlib_apbuart_port_nr; i++)
+		uart_remove_one_port(&grlib_apbuart_driver,
+				     &grlib_apbuart_ports[i]);
+
+	uart_unregister_driver(&grlib_apbuart_driver);
+
+}
+
+module_init(grlib_apbuart_init);
+module_exit(grlib_apbuart_exit);
+
+MODULE_AUTHOR("Aeroflex Gaisler AB");
+MODULE_DESCRIPTION("GRLIB APBUART serial driver");
+MODULE_VERSION("2.1");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/apbuart.h b/drivers/serial/apbuart.h
new file mode 100644
index 000000000000..5faf87c8d2bc
--- /dev/null
+++ b/drivers/serial/apbuart.h
@@ -0,0 +1,64 @@
+#ifndef __GRLIB_APBUART_H__
+#define __GRLIB_APBUART_H__
+
+#include <asm/io.h>
+
+#define UART_NR		8
+static int grlib_apbuart_port_nr;
+
+struct grlib_apbuart_regs_map {
+	u32 data;
+	u32 status;
+	u32 ctrl;
+	u32 scaler;
+};
+
+struct amba_prom_registers {
+	unsigned int phys_addr;
+	unsigned int reg_size;
+};
+
+/*
+ *  The following defines the bits in the APBUART Status Registers.
+ */
+#define UART_STATUS_DR   0x00000001	/* Data Ready */
+#define UART_STATUS_TSE  0x00000002	/* TX Send Register Empty */
+#define UART_STATUS_THE  0x00000004	/* TX Hold Register Empty */
+#define UART_STATUS_BR   0x00000008	/* Break Error */
+#define UART_STATUS_OE   0x00000010	/* RX Overrun Error */
+#define UART_STATUS_PE   0x00000020	/* RX Parity Error */
+#define UART_STATUS_FE   0x00000040	/* RX Framing Error */
+#define UART_STATUS_ERR  0x00000078	/* Error Mask */
+
+/*
+ *  The following defines the bits in the APBUART Ctrl Registers.
+ */
+#define UART_CTRL_RE     0x00000001	/* Receiver enable */
+#define UART_CTRL_TE     0x00000002	/* Transmitter enable */
+#define UART_CTRL_RI     0x00000004	/* Receiver interrupt enable */
+#define UART_CTRL_TI     0x00000008	/* Transmitter irq */
+#define UART_CTRL_PS     0x00000010	/* Parity select */
+#define UART_CTRL_PE     0x00000020	/* Parity enable */
+#define UART_CTRL_FL     0x00000040	/* Flow control enable */
+#define UART_CTRL_LB     0x00000080	/* Loopback enable */
+
+#define APBBASE(port) ((struct grlib_apbuart_regs_map *)((port)->membase))
+
+#define APBBASE_DATA_P(port)	(&(APBBASE(port)->data))
+#define APBBASE_STATUS_P(port)	(&(APBBASE(port)->status))
+#define APBBASE_CTRL_P(port)	(&(APBBASE(port)->ctrl))
+#define APBBASE_SCALAR_P(port)	(&(APBBASE(port)->scaler))
+
+#define UART_GET_CHAR(port)	(__raw_readl(APBBASE_DATA_P(port)))
+#define UART_PUT_CHAR(port, v)	(__raw_writel(v, APBBASE_DATA_P(port)))
+#define UART_GET_STATUS(port)	(__raw_readl(APBBASE_STATUS_P(port)))
+#define UART_PUT_STATUS(port, v)(__raw_writel(v, APBBASE_STATUS_P(port)))
+#define UART_GET_CTRL(port)	(__raw_readl(APBBASE_CTRL_P(port)))
+#define UART_PUT_CTRL(port, v)	(__raw_writel(v, APBBASE_CTRL_P(port)))
+#define UART_GET_SCAL(port)	(__raw_readl(APBBASE_SCALAR_P(port)))
+#define UART_PUT_SCAL(port, v)	(__raw_writel(v, APBBASE_SCALAR_P(port)))
+
+#define UART_RX_DATA(s)		(((s) & UART_STATUS_DR) != 0)
+#define UART_TX_READY(s)	(((s) & UART_STATUS_THE) != 0)
+
+#endif /* __GRLIB_APBUART_H__ */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index db532ce288be..8c3dd36fe91a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -179,6 +179,9 @@
 /* BCM63xx family SoCs */
 #define PORT_BCM63XX	89
 
+/* Aeroflex Gaisler GRLIB APBUART */
+#define PORT_APBUART    90
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From ac1aa47b131416a6ff37eb1005a0a1d2541aad6c Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 26 Oct 2009 13:20:44 -0700
Subject: PCI: determine CLS more intelligently

Till now, CLS has been determined either by arch code or as
L1_CACHE_BYTES.  Only x86 and ia64 set CLS explicitly and x86 doesn't
always get it right.  On most configurations, the chance is that
firmware configures the correct value during boot.

This patch makes pci_init() determine CLS by looking at what firmware
has configured.  It scans all devices and if all non-zero values
agree, the value is used.  If none is configured or there is a
disagreement, pci_dfl_cache_line_size is used.  arch can set the dfl
value (via PCI_CACHE_LINE_BYTES or pci_dfl_cache_line_size) or
override the actual one.

ia64, x86 and sparc64 updated to set the default cls instead of the
actual one.

While at it, declare pci_cache_line_size and pci_dfl_cache_line_size
in pci.h and drop private declarations from arch code.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Miller <davem@davemloft.net>
Acked-by: Greg KH <gregkh@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/pci/pci.c   |  9 +++------
 arch/x86/pci/common.c |  8 +++-----
 drivers/pci/pci.c     | 21 +++++++++++++--------
 drivers/pci/quirks.c  | 28 ++++++++++++++++++++++++++++
 include/linux/pci.h   |  2 ++
 5 files changed, 49 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index c0fca2c1c858..d60e7195b7dd 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -720,9 +720,6 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	return ret;
 }
 
-/* It's defined in drivers/pci/pci.c */
-extern u8 pci_cache_line_size;
-
 /**
  * set_pci_cacheline_size - determine cacheline size for PCI devices
  *
@@ -731,7 +728,7 @@ extern u8 pci_cache_line_size;
  *
  * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
  */
-static void __init set_pci_cacheline_size(void)
+static void __init set_pci_dfl_cacheline_size(void)
 {
 	unsigned long levels, unique_caches;
 	long status;
@@ -751,7 +748,7 @@ static void __init set_pci_cacheline_size(void)
 			"(status=%ld)\n", __func__, status);
 		return;
 	}
-	pci_cache_line_size = (1 << cci.pcci_line_size) / 4;
+	pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
 }
 
 u64 ia64_dma_get_required_mask(struct device *dev)
@@ -782,7 +779,7 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
 
 static int __init pcibios_init(void)
 {
-	set_pci_cacheline_size();
+	set_pci_dfl_cacheline_size();
 	return 0;
 }
 
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1331fcf26143..fbeec31316cf 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
 	return bus;
 }
 
-extern u8 pci_cache_line_size;
-
 int __init pcibios_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -426,11 +424,11 @@ int __init pcibios_init(void)
 	 * and P4. It's also good for 386/486s (which actually have 16)
 	 * as quite a few PCI devices do not support smaller values.
 	 */
-	pci_cache_line_size = 32 >> 2;
+	pci_dfl_cache_line_size = 32 >> 2;
 	if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
-		pci_cache_line_size = 64 >> 2;	/* K7 & K8 */
+		pci_dfl_cache_line_size = 64 >> 2;	/* K7 & K8 */
 	else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
-		pci_cache_line_size = 128 >> 2;	/* P4 */
+		pci_dfl_cache_line_size = 128 >> 2;	/* P4 */
 
 	pcibios_resource_survey();
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4e4c295a049f..1f9a7a03847b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -47,6 +47,19 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
+#ifndef PCI_CACHE_LINE_BYTES
+#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES
+#endif
+
+/*
+ * The default CLS is used if arch didn't set CLS explicitly and not
+ * all pci devices agree on the same value.  Arch can override either
+ * the dfl or actual value as it sees fit.  Don't forget this is
+ * measured in 32-bit words, not bytes.
+ */
+u8 pci_dfl_cache_line_size __initdata = PCI_CACHE_LINE_BYTES >> 2;
+u8 pci_cache_line_size;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -1883,14 +1896,6 @@ void pci_clear_mwi(struct pci_dev *dev)
 
 #else
 
-#ifndef PCI_CACHE_LINE_BYTES
-#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES
-#endif
-
-/* This can be overridden by arch code. */
-/* Don't forget this is measured in 32-bit words, not bytes */
-u8 pci_cache_line_size = PCI_CACHE_LINE_BYTES / 4;
-
 /**
  * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
  * @dev: the PCI device for which MWI is to be enabled
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 245d2cdb4765..1812ae7698de 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2595,9 +2595,37 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
 static int __init pci_apply_final_quirks(void)
 {
 	struct pci_dev *dev = NULL;
+	u8 cls = 0;
+	u8 tmp;
+
+	if (pci_cache_line_size)
+		printk(KERN_DEBUG "PCI: CLS %u bytes\n",
+		       pci_cache_line_size << 2);
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_fixup_device(pci_fixup_final, dev);
+		/*
+		 * If arch hasn't set it explicitly yet, use the CLS
+		 * value shared by all PCI devices.  If there's a
+		 * mismatch, fall back to the default value.
+		 */
+		if (!pci_cache_line_size) {
+			pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &tmp);
+			if (!cls)
+				cls = tmp;
+			if (!tmp || cls == tmp)
+				continue;
+
+			printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), "
+			       "using %u bytes\n", cls << 2, tmp << 2,
+			       pci_dfl_cache_line_size << 2);
+			pci_cache_line_size = pci_dfl_cache_line_size;
+		}
+	}
+	if (!pci_cache_line_size) {
+		printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n",
+		       cls << 2, pci_dfl_cache_line_size << 2);
+		pci_cache_line_size = cls;
 	}
 
 	return 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f5c7cd343e56..b849861d78e6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1246,6 +1246,8 @@ extern int pci_pci_problems;
 
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
+extern u8 pci_dfl_cache_line_size;
+extern u8 pci_cache_line_size;
 
 extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;
-- 
cgit v1.2.3


From 15ea76d407d560f985224b65fe59c9db01692a0d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 22 Sep 2009 17:34:48 +0900
Subject: pccard: configure CLS on attach

For non hotplug PCI devices, the system firmware usually configures
CLS correctly.  For pccard devices system firmware can't do it and
Linux PCI layer doesn't do it either.  Unfortunately this leads to
poor performance for certain devices (sata_sil).  Unless MWI, which
requires separate configuration, is to be used, CLS doesn't affect
correctness, so the configuration should be harmless.

This patch makes pci_set_cacheline_size() always built and export it
and make pccard call it during attach.

Please note that some other PCI hotplug drivers (shpchp and pciehp)
also configure CLS on hotplug.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Daniel Ritz <daniel.ritz@gmx.ch>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Cc: Axel Birndt <towerlexa@gmx.de>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 40 ++++++++++++++++++++--------------------
 drivers/pcmcia/cardbus.c | 23 +++++++++++++++--------
 include/linux/pci.h      |  1 +
 3 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 01337b7a215f..d1afbae5b1fb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1875,23 +1875,6 @@ void pci_clear_master(struct pci_dev *dev)
 	__pci_set_master(dev, false);
 }
 
-#ifdef PCI_DISABLE_MWI
-int pci_set_mwi(struct pci_dev *dev)
-{
-	return 0;
-}
-
-int pci_try_set_mwi(struct pci_dev *dev)
-{
-	return 0;
-}
-
-void pci_clear_mwi(struct pci_dev *dev)
-{
-}
-
-#else
-
 /**
  * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
  * @dev: the PCI device for which MWI is to be enabled
@@ -1902,13 +1885,12 @@ void pci_clear_mwi(struct pci_dev *dev)
  *
  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
  */
-static int
-pci_set_cacheline_size(struct pci_dev *dev)
+int pci_set_cacheline_size(struct pci_dev *dev)
 {
 	u8 cacheline_size;
 
 	if (!pci_cache_line_size)
-		return -EINVAL;		/* The system doesn't support MWI. */
+		return -EINVAL;
 
 	/* Validate current setting: the PCI_CACHE_LINE_SIZE must be
 	   equal to or multiple of the right value. */
@@ -1929,6 +1911,24 @@ pci_set_cacheline_size(struct pci_dev *dev)
 
 	return -EINVAL;
 }
+EXPORT_SYMBOL_GPL(pci_set_cacheline_size);
+
+#ifdef PCI_DISABLE_MWI
+int pci_set_mwi(struct pci_dev *dev)
+{
+	return 0;
+}
+
+int pci_try_set_mwi(struct pci_dev *dev)
+{
+	return 0;
+}
+
+void pci_clear_mwi(struct pci_dev *dev)
+{
+}
+
+#else
 
 /**
  * pci_set_mwi - enables memory-write-invalidate PCI transaction
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index db77e1f3309a..98789c031a7c 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -184,26 +184,33 @@ fail:
     
 =====================================================================*/
 
-/*
- * Since there is only one interrupt available to CardBus
- * devices, all devices downstream of this device must
- * be using this IRQ.
- */
-static void cardbus_assign_irqs(struct pci_bus *bus, int irq)
+static void cardbus_config_irq_and_cls(struct pci_bus *bus, int irq)
 {
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		u8 irq_pin;
 
+		/*
+		 * Since there is only one interrupt available to
+		 * CardBus devices, all devices downstream of this
+		 * device must be using this IRQ.
+		 */
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin);
 		if (irq_pin) {
 			dev->irq = irq;
 			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
 		}
 
+		/*
+		 * Some controllers transfer very slowly with 0 CLS.
+		 * Configure it.  This may fail as CLS configuration
+		 * is mandatory only for MWI.
+		 */
+		pci_set_cacheline_size(dev);
+
 		if (dev->subordinate)
-			cardbus_assign_irqs(dev->subordinate, irq);
+			cardbus_config_irq_and_cls(dev->subordinate, irq);
 	}
 }
 
@@ -228,7 +235,7 @@ int __ref cb_alloc(struct pcmcia_socket * s)
 	 */
 	pci_bus_size_bridges(bus);
 	pci_bus_assign_resources(bus);
-	cardbus_assign_irqs(bus, s->pci_irq);
+	cardbus_config_irq_and_cls(bus, s->pci_irq);
 
 	/* socket specific tune function */
 	if (s->tune_bridge)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b849861d78e6..da4128f6e916 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -701,6 +701,7 @@ void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
 void pci_clear_master(struct pci_dev *dev);
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
+int pci_set_cacheline_size(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
 int __must_check pci_set_mwi(struct pci_dev *dev);
 int pci_try_set_mwi(struct pci_dev *dev);
-- 
cgit v1.2.3


From ae21ee65e8bc228416bbcc8a1da01c56a847a60c Mon Sep 17 00:00:00 2001
From: Allen Kay <allen.m.kay@intel.com>
Date: Wed, 7 Oct 2009 10:27:17 -0700
Subject: PCI: acs p2p upsteram forwarding enabling

Note: dom0 checking in v4 has been separated out into 2/2.

This patch enables P2P upstream forwarding in ACS capable PCIe switches.
It solves two potential problems in virtualization environment where a PCIe
device is assigned to a guest domain using a HW iommu such as VT-d:

1) Unintentional failure caused by guest physical address programmed
   into the device's DMA that happens to match the memory address range
   of other downstream ports in the same PCIe switch.  This causes the PCI
   transaction to go to the matching downstream port instead of go to the
   root complex to get translated by VT-d as it should be.

2) Malicious guest software intentionally attacks another downstream
   PCIe device by programming the DMA address into the assigned device
   that matches memory address range of the downstream PCIe port.

We are in process of implementing device filtering software in KVM/XEN
management software to allow device assignment of PCIe devices behind a PCIe
switch only if it has ACS capability and with the P2P upstream forwarding bits
enabled.  This patch is intended to work for both KVM and Xen environments.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Reviewed-by: Mathew Wilcox <willy@linux.intel.com>
Reviewed-by: Chris Wright <chris@sous-sol.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 35 +++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h        |  2 ++
 drivers/pci/probe.c      |  5 +++++
 include/linux/pci_regs.h | 13 +++++++++++++
 4 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4859669f0ab5..557218222826 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1545,6 +1545,41 @@ void pci_enable_ari(struct pci_dev *dev)
 	bridge->ari_enabled = 1;
 }
 
+/**
+ * pci_enable_acs - enable ACS if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_acs(struct pci_dev *dev)
+{
+	int pos;
+	u16 cap;
+	u16 ctrl;
+
+	if (!dev->is_pcie)
+		return;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
+
+	/* Source Validation */
+	ctrl |= (cap & PCI_ACS_SV);
+
+	/* P2P Request Redirect */
+	ctrl |= (cap & PCI_ACS_RR);
+
+	/* P2P Completion Redirect */
+	ctrl |= (cap & PCI_ACS_CR);
+
+	/* Upstream Forwarding */
+	ctrl |= (cap & PCI_ACS_UF);
+
+	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+}
+
 /**
  * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
  * @dev: the PCI device
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d92d1954a2fb..33ed8e0aba1e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -311,4 +311,6 @@ static inline int pci_resource_alignment(struct pci_dev *dev,
 	return resource_alignment(res);
 }
 
+extern void pci_enable_acs(struct pci_dev *dev);
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2adb47574d86..aac5b156a5c5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <linux/pci-aspm.h>
+#include <linux/iommu.h>
 #include "pci.h"
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
@@ -1004,6 +1005,10 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Single Root I/O Virtualization */
 	pci_iov_init(dev);
+
+	/* Enable ACS P2P upstream forwarding */
+	if (iommu_found())
+		pci_enable_acs(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index dd0bed4f1cf0..d798770f08cd 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -502,6 +502,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_ACS	13
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
@@ -662,4 +663,16 @@
 #define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
 #define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
 
+/* Access Control Service */
+#define PCI_ACS_CAP		0x04	/* ACS Capability Register */
+#define  PCI_ACS_SV		0x01	/* Source Validation */
+#define  PCI_ACS_TB		0x02	/* Translation Blocking */
+#define  PCI_ACS_RR		0x04	/* P2P Request Redirect */
+#define  PCI_ACS_CR		0x08	/* P2P Completion Redirect */
+#define  PCI_ACS_UF		0x10	/* Upstream Forwarding */
+#define  PCI_ACS_EC		0x20	/* P2P Egress Control */
+#define  PCI_ACS_DT		0x40	/* Direct Translated P2P */
+#define PCI_ACS_CTRL		0x06	/* ACS Control Register */
+#define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
+
 #endif /* LINUX_PCI_REGS_H */
-- 
cgit v1.2.3


From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Sun, 25 Oct 2009 15:37:58 +0100
Subject: signal: Fix alternate signal stack check

All architectures in the kernel increment/decrement the stack pointer
before storing values on the stack.

On architectures which have the stack grow down sas_ss_sp == sp is not
on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is
on the alternate signal stack.

On architectures which have the stack grow up sas_ss_sp == sp is on
the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not
on the alternate signal stack.

The current implementation fails for architectures which have the
stack grow down on the corner case where sas_ss_sp == sp.This was
reported as Debian bug #544905 on AMD64.
Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c

The test case creates the following stack scenario:
   0xn0300	stack top
   0xn0200	alt stack pointer top (when switching to alt stack)
   0xn01ff	alt stack end
   0xn0100	alt stack start == stack pointer

If the signal is sent the stack pointer is pointing to the base
address of the alt stack and the kernel erroneously decides that it
has already switched to the alternate stack because of the current
check for "sp - sas_ss_sp < sas_ss_size"

On parisc (stack grows up) the scenario would be:
   0xn0200	stack pointer
   0xn01ff	alt stack end
   0xn0100	alt stack start = alt stack pointer base
   		    	  	  (when switching to alt stack)
   0xn0000	stack base

This is handled correctly by the current implementation.

[ tglx: Modified for archs which have the stack grow up (parisc) which
  	would fail with the correct implementation for stack grows
  	down. Added a check for sp >= current->sas_ss_sp which is
  	strictly not necessary but makes the code symetric for both
  	variants ]

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: stable@kernel.org
LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..0f67914a43c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info)
 	return info <= SEND_SIG_FORCED;
 }
 
-/* True if we are on the alternate signal stack.  */
-
+/*
+ * True if we are on the alternate signal stack.
+ */
 static inline int on_sig_stack(unsigned long sp)
 {
-	return (sp - current->sas_ss_sp < current->sas_ss_size);
+#ifdef CONFIG_STACK_GROWSUP
+	return sp >= current->sas_ss_sp &&
+		sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+	return sp > current->sas_ss_sp &&
+		sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
 }
 
 static inline int sas_ss_flags(unsigned long sp)
-- 
cgit v1.2.3


From d94d9fee9fa4e66a0b91640a694b8b10177075b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Nov 2009 09:50:58 -0800
Subject: net: cleanup include/linux

This cleanup patch puts struct/union/enum opening braces,
in first line to ease grep games.

struct something
{

becomes :

struct something {

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dn.h                             |   9 +-
 include/linux/errqueue.h                       |   6 +-
 include/linux/fib_rules.h                      |   9 +-
 include/linux/filter.h                         |   6 +-
 include/linux/gen_stats.h                      |  15 ++--
 include/linux/if.h                             |  15 ++--
 include/linux/if_addr.h                        |   9 +-
 include/linux/if_addrlabel.h                   |   6 +-
 include/linux/if_arcnet.h                      |  18 ++--
 include/linux/if_arp.h                         |   3 +-
 include/linux/if_bonding.h                     |   3 +-
 include/linux/if_bridge.h                      |   9 +-
 include/linux/if_ec.h                          |  12 +--
 include/linux/if_fddi.h                        |  22 ++---
 include/linux/if_hippi.h                       |  15 ++--
 include/linux/if_link.h                        |  27 ++----
 include/linux/if_packet.h                      |  27 ++----
 include/linux/if_plip.h                        |   3 +-
 include/linux/if_pppol2tp.h                    |   3 +-
 include/linux/if_tunnel.h                      |   6 +-
 include/linux/igmp.h                           |  15 ++--
 include/linux/in.h                             |  18 ++--
 include/linux/in6.h                            |   9 +-
 include/linux/inetdevice.h                     |   9 +-
 include/linux/ip_vs.h                          |   3 +-
 include/linux/mroute.h                         |  18 ++--
 include/linux/mroute6.h                        |  15 ++--
 include/linux/neighbour.h                      |  18 ++--
 include/linux/netdevice.h                      |  30 +++----
 include/linux/netfilter.h                      |   6 +-
 include/linux/netfilter/nf_conntrack_common.h  |   6 +-
 include/linux/netfilter/nf_conntrack_ftp.h     |   3 +-
 include/linux/netfilter/nf_conntrack_sctp.h    |   3 +-
 include/linux/netfilter/nf_conntrack_tcp.h     |   3 +-
 include/linux/netfilter/nfnetlink.h            |   6 +-
 include/linux/netfilter/nfnetlink_compat.h     |   3 +-
 include/linux/netfilter/x_tables.h             |  45 ++++------
 include/linux/netfilter/xt_connbytes.h         |   3 +-
 include/linux/netfilter/xt_esp.h               |   3 +-
 include/linux/netfilter/xt_multiport.h         |   9 +-
 include/linux/netfilter/xt_policy.h            |  18 ++--
 include/linux/netfilter/xt_state.h             |   3 +-
 include/linux/netfilter/xt_string.h            |   3 +-
 include/linux/netfilter/xt_tcpudp.h            |   6 +-
 include/linux/netfilter_arp/arp_tables.h       |  21 ++---
 include/linux/netfilter_bridge/ebt_802_3.h     |   3 +-
 include/linux/netfilter_bridge/ebt_among.h     |   9 +-
 include/linux/netfilter_bridge/ebt_arpreply.h  |   3 +-
 include/linux/netfilter_bridge/ebt_ip.h        |   3 +-
 include/linux/netfilter_bridge/ebt_ip6.h       |   3 +-
 include/linux/netfilter_bridge/ebt_limit.h     |   3 +-
 include/linux/netfilter_bridge/ebt_log.h       |   3 +-
 include/linux/netfilter_bridge/ebt_mark_m.h    |   3 +-
 include/linux/netfilter_bridge/ebt_mark_t.h    |   3 +-
 include/linux/netfilter_bridge/ebt_nat.h       |   3 +-
 include/linux/netfilter_bridge/ebt_pkttype.h   |   3 +-
 include/linux/netfilter_bridge/ebt_redirect.h  |   3 +-
 include/linux/netfilter_bridge/ebt_stp.h       |   6 +-
 include/linux/netfilter_bridge/ebtables.h      |  39 +++------
 include/linux/netfilter_ipv4/ip_tables.h       |  27 ++----
 include/linux/netfilter_ipv4/ipt_SAME.h        |   3 +-
 include/linux/netfilter_ipv4/ipt_ah.h          |   3 +-
 include/linux/netfilter_ipv6/ip6_tables.h      |  27 ++----
 include/linux/netfilter_ipv6/ip6t_ah.h         |   3 +-
 include/linux/netfilter_ipv6/ip6t_frag.h       |   3 +-
 include/linux/netfilter_ipv6/ip6t_ipv6header.h |   3 +-
 include/linux/netfilter_ipv6/ip6t_mh.h         |   3 +-
 include/linux/netfilter_ipv6/ip6t_opts.h       |   3 +-
 include/linux/netfilter_ipv6/ip6t_rt.h         |   3 +-
 include/linux/netlink.h                        |  24 ++----
 include/linux/pkt_cls.h                        |  84 +++++++------------
 include/linux/pkt_sched.h                      | 111 +++++++++----------------
 include/linux/route.h                          |   3 +-
 include/linux/rtnetlink.h                      |  57 +++++--------
 include/linux/skbuff.h                         |   3 +-
 include/linux/tc_act/tc_defact.h               |   6 +-
 include/linux/tc_act/tc_gact.h                 |   9 +-
 include/linux/tc_act/tc_ipt.h                  |   3 +-
 include/linux/tc_act/tc_mirred.h               |   6 +-
 include/linux/tc_act/tc_nat.h                  |   6 +-
 include/linux/tc_act/tc_pedit.h                |   9 +-
 include/linux/tc_ematch/tc_em_cmp.h            |   6 +-
 include/linux/tc_ematch/tc_em_meta.h           |  15 ++--
 include/linux/tc_ematch/tc_em_nbyte.h          |   3 +-
 include/linux/tc_ematch/tc_em_text.h           |   3 +-
 include/linux/tcp.h                            |   6 +-
 include/linux/xfrm.h                           |  27 ++----
 87 files changed, 351 insertions(+), 694 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dn.h b/include/linux/dn.h
index fe9990823193..9c50445462d9 100644
--- a/include/linux/dn.h
+++ b/include/linux/dn.h
@@ -71,14 +71,12 @@
 /* Structures */
 
 
-struct dn_naddr 
-{
+struct dn_naddr {
 	__le16		a_len;
 	__u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */
 };
 
-struct sockaddr_dn
-{
+struct sockaddr_dn {
 	__u16		sdn_family;
 	__u8		sdn_flags;
 	__u8		sdn_objnum;
@@ -101,8 +99,7 @@ struct optdata_dn {
         __u8   opt_data[16];   /* User data              */
 };
 
-struct accessdata_dn
-{
+struct accessdata_dn {
 	__u8		acc_accl;
 	__u8		acc_acc[DN_MAXACCL];
 	__u8 		acc_passl;
diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index ec12cc74366f..034072cea853 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-struct sock_extended_err
-{
+struct sock_extended_err {
 	__u32	ee_errno;	
 	__u8	ee_origin;
 	__u8	ee_type;
@@ -31,8 +30,7 @@ struct sock_extended_err
 
 #define SKB_EXT_ERR(skb) ((struct sock_exterr_skb *) ((skb)->cb))
 
-struct sock_exterr_skb
-{
+struct sock_exterr_skb {
 	union {
 		struct inet_skb_parm	h4;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 87b606b63f1e..c7e5b700bb91 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -13,8 +13,7 @@
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
 
-struct fib_rule_hdr
-{
+struct fib_rule_hdr {
 	__u8		family;
 	__u8		dst_len;
 	__u8		src_len;
@@ -28,8 +27,7 @@ struct fib_rule_hdr
 	__u32		flags;
 };
 
-enum
-{
+enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
 	FRA_SRC,	/* source address */
@@ -52,8 +50,7 @@ enum
 
 #define FRA_MAX (__FRA_MAX - 1)
 
-enum
-{
+enum {
 	FR_ACT_UNSPEC,
 	FR_ACT_TO_TBL,		/* Pass to fixed table */
 	FR_ACT_GOTO,		/* Jump to another rule */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index bb3b4352978a..29a0e3db9f43 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -23,16 +23,14 @@
  *	the BPF code definitions which need to match so you can share filters
  */
  
-struct sock_filter	/* Filter block */
-{
+struct sock_filter {	/* Filter block */
 	__u16	code;   /* Actual filter code */
 	__u8	jt;	/* Jump true */
 	__u8	jf;	/* Jump false */
 	__u32	k;      /* Generic multiuse field */
 };
 
-struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
-{
+struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 	unsigned short		len;	/* Number of filter blocks */
 	struct sock_filter __user *filter;
 };
diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
index 710e901085d0..552c8a0a12d1 100644
--- a/include/linux/gen_stats.h
+++ b/include/linux/gen_stats.h
@@ -18,13 +18,11 @@ enum {
  * @bytes: number of seen bytes
  * @packets: number of seen packets
  */
-struct gnet_stats_basic
-{
+struct gnet_stats_basic {
 	__u64	bytes;
 	__u32	packets;
 };
-struct gnet_stats_basic_packed
-{
+struct gnet_stats_basic_packed {
 	__u64	bytes;
 	__u32	packets;
 } __attribute__ ((packed));
@@ -34,8 +32,7 @@ struct gnet_stats_basic_packed
  * @bps: current byte rate
  * @pps: current packet rate
  */
-struct gnet_stats_rate_est
-{
+struct gnet_stats_rate_est {
 	__u32	bps;
 	__u32	pps;
 };
@@ -48,8 +45,7 @@ struct gnet_stats_rate_est
  * @requeues: number of requeues
  * @overlimits: number of enqueues over the limit
  */
-struct gnet_stats_queue
-{
+struct gnet_stats_queue {
 	__u32	qlen;
 	__u32	backlog;
 	__u32	drops;
@@ -62,8 +58,7 @@ struct gnet_stats_queue
  * @interval: sampling period
  * @ewma_log: the log of measurement window weight
  */
-struct gnet_estimator
-{
+struct gnet_estimator {
 	signed char	interval;
 	unsigned char	ewma_log;
 };
diff --git a/include/linux/if.h b/include/linux/if.h
index b9a6229f3be7..3b2a46bf8f8d 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -125,8 +125,7 @@ enum {
  *	being very small might be worth keeping for clean configuration.
  */
 
-struct ifmap 
-{
+struct ifmap {
 	unsigned long mem_start;
 	unsigned long mem_end;
 	unsigned short base_addr; 
@@ -136,8 +135,7 @@ struct ifmap
 	/* 3 bytes spare */
 };
 
-struct if_settings
-{
+struct if_settings {
 	unsigned int type;	/* Type of physical device or protocol */
 	unsigned int size;	/* Size of the data allocated by the caller */
 	union {
@@ -161,8 +159,7 @@ struct if_settings
  * remainder may be interface specific.
  */
 
-struct ifreq 
-{
+struct ifreq {
 #define IFHWADDRLEN	6
 	union
 	{
@@ -211,11 +208,9 @@ struct ifreq
  * must know all networks accessible).
  */
 
-struct ifconf 
-{
+struct ifconf  {
 	int	ifc_len;			/* size of buffer	*/
-	union 
-	{
+	union {
 		char __user *ifcu_buf;
 		struct ifreq __user *ifcu_req;
 	} ifc_ifcu;
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index fd9740466757..23357ab81a77 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 #include <linux/netlink.h>
 
-struct ifaddrmsg
-{
+struct ifaddrmsg {
 	__u8		ifa_family;
 	__u8		ifa_prefixlen;	/* The prefix length		*/
 	__u8		ifa_flags;	/* Flags			*/
@@ -20,8 +19,7 @@ struct ifaddrmsg
  * but for point-to-point IFA_ADDRESS is DESTINATION address,
  * local address is supplied in IFA_LOCAL attribute.
  */
-enum
-{
+enum {
 	IFA_UNSPEC,
 	IFA_ADDRESS,
 	IFA_LOCAL,
@@ -47,8 +45,7 @@ enum
 #define IFA_F_TENTATIVE		0x40
 #define IFA_F_PERMANENT		0x80
 
-struct ifa_cacheinfo
-{
+struct ifa_cacheinfo {
 	__u32	ifa_prefered;
 	__u32	ifa_valid;
 	__u32	cstamp; /* created timestamp, hundredths of seconds */
diff --git a/include/linux/if_addrlabel.h b/include/linux/if_addrlabel.h
index 89571f65d6de..54580c298187 100644
--- a/include/linux/if_addrlabel.h
+++ b/include/linux/if_addrlabel.h
@@ -12,8 +12,7 @@
 
 #include <linux/types.h>
 
-struct ifaddrlblmsg
-{
+struct ifaddrlblmsg {
 	__u8		ifal_family;		/* Address family */
 	__u8		__ifal_reserved;	/* Reserved */
 	__u8		ifal_prefixlen;		/* Prefix length */
@@ -22,8 +21,7 @@ struct ifaddrlblmsg
 	__u32		ifal_seq;		/* sequence number */
 };
 
-enum
-{
+enum {
 	IFAL_ADDRESS = 1,
 	IFAL_LABEL = 2,
 	__IFAL_MAX
diff --git a/include/linux/if_arcnet.h b/include/linux/if_arcnet.h
index 0835debab115..46e34bd0e783 100644
--- a/include/linux/if_arcnet.h
+++ b/include/linux/if_arcnet.h
@@ -56,8 +56,7 @@
 /*
  * The RFC1201-specific components of an arcnet packet header.
  */
-struct arc_rfc1201
-{
+struct arc_rfc1201 {
     __u8  proto;		/* protocol ID field - varies		*/
     __u8  split_flag;	/* for use with split packets		*/
     __be16   sequence;		/* sequence number			*/
@@ -69,8 +68,7 @@ struct arc_rfc1201
 /*
  * The RFC1051-specific components.
  */
-struct arc_rfc1051
-{
+struct arc_rfc1051 {
     __u8 proto;		/* ARC_P_RFC1051_ARP/RFC1051_IP	*/
     __u8 payload[0];		/* 507 bytes			*/
 };
@@ -81,8 +79,7 @@ struct arc_rfc1051
  * The ethernet-encap-specific components.  We have a real ethernet header
  * and some data.
  */
-struct arc_eth_encap
-{
+struct arc_eth_encap {
     __u8 proto;		/* Always ARC_P_ETHER			*/
     struct ethhdr eth;		/* standard ethernet header (yuck!)	*/
     __u8 payload[0];		/* 493 bytes				*/
@@ -90,8 +87,7 @@ struct arc_eth_encap
 #define ETH_ENCAP_HDR_SIZE 14
 
 
-struct arc_cap
-{
+struct arc_cap {
 	__u8 proto;
 	__u8 cookie[sizeof(int)];   /* Actually NOT sent over the network */
 	union {
@@ -108,8 +104,7 @@ struct arc_cap
  * the _end_ of the 512-byte buffer.  We hide this complexity inside the
  * driver.
  */
-struct arc_hardware
-{
+struct arc_hardware {
     __u8  source,		/* source ARCnet - filled in automagically */
              dest,		/* destination ARCnet - 0 for broadcast    */
     	     offset[2];		/* offset bytes (some weird semantics)     */
@@ -120,8 +115,7 @@ struct arc_hardware
  * This is an ARCnet frame header, as seen by the kernel (and userspace,
  * when you do a raw packet capture).
  */
-struct archdr
-{
+struct archdr {
     /* hardware requirements */
     struct arc_hardware hard;
      
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 282eb37e2dec..e80b7f88f7c6 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -133,8 +133,7 @@ struct arpreq_old {
  *	This structure defines an ethernet arp header.
  */
 
-struct arphdr
-{
+struct arphdr {
 	__be16		ar_hrd;		/* format of hardware address	*/
 	__be16		ar_pro;		/* format of protocol address	*/
 	unsigned char	ar_hln;		/* length of hardware address	*/
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 65c2d247068b..cd525fae3c98 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -94,8 +94,7 @@ typedef struct ifbond {
 	__s32 miimon;
 } ifbond;
 
-typedef struct ifslave
-{
+typedef struct ifslave {
 	__s32 slave_id; /* Used as an IN param to the BOND_SLAVE_INFO_QUERY ioctl */
 	char slave_name[IFNAMSIZ];
 	__s8 link;
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6badb3e2c4e4..938b7e81df95 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -49,8 +49,7 @@
 #define BR_STATE_FORWARDING 3
 #define BR_STATE_BLOCKING 4
 
-struct __bridge_info
-{
+struct __bridge_info {
 	__u64 designated_root;
 	__u64 bridge_id;
 	__u32 root_path_cost;
@@ -72,8 +71,7 @@ struct __bridge_info
 	__u32 gc_timer_value;
 };
 
-struct __port_info
-{
+struct __port_info {
 	__u64 designated_root;
 	__u64 designated_bridge;
 	__u16 port_id;
@@ -89,8 +87,7 @@ struct __port_info
 	__u32 hold_timer_value;
 };
 
-struct __fdb_entry
-{
+struct __fdb_entry {
 	__u8 mac_addr[6];
 	__u8 port_no;
 	__u8 is_local;
diff --git a/include/linux/if_ec.h b/include/linux/if_ec.h
index e7499aa79783..d85f9f48129f 100644
--- a/include/linux/if_ec.h
+++ b/include/linux/if_ec.h
@@ -5,14 +5,12 @@
 
 /* User visible stuff. Glibc provides its own but libc5 folk will use these */
 
-struct ec_addr
-{
+struct ec_addr {
   unsigned char station;		/* Station number.  */
   unsigned char net;			/* Network number.  */
 };
 
-struct sockaddr_ec
-{
+struct sockaddr_ec {
   unsigned short sec_family;
   unsigned char port;			/* Port number.  */
   unsigned char cb;			/* Control/flag byte.  */
@@ -37,8 +35,7 @@ struct sockaddr_ec
 #define EC_HLEN				6
 
 /* This is what an Econet frame looks like on the wire. */
-struct ec_framehdr 
-{
+struct ec_framehdr {
   unsigned char dst_stn;
   unsigned char dst_net;
   unsigned char src_stn;
@@ -62,8 +59,7 @@ static inline struct econet_sock *ec_sk(const struct sock *sk)
 	return (struct econet_sock *)sk;
 }
 
-struct ec_device
-{
+struct ec_device {
   unsigned char station, net;		/* Econet protocol address */
 };
 
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 45de1046dbbf..5459c5c09930 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -63,36 +63,32 @@
 #define FDDI_UI_CMD			0x03
 
 /* Define 802.2 Type 1 header */
-struct fddi_8022_1_hdr
-	{
+struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-	} __attribute__ ((packed));
+} __attribute__ ((packed));
 
 /* Define 802.2 Type 2 header */
-struct fddi_8022_2_hdr
-	{
+struct fddi_8022_2_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-	} __attribute__ ((packed));
+} __attribute__ ((packed));
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
-struct fddi_snap_hdr
-	{
+struct fddi_snap_hdr {
 	__u8	dsap;					/* always 0xAA */
 	__u8	ssap;					/* always 0xAA */
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-	} __attribute__ ((packed));
+} __attribute__ ((packed));
 
 /* Define FDDI LLC frame header */
-struct fddihdr
-	{
+struct fddihdr {
 	__u8	fc;						/* frame control */
 	__u8	daddr[FDDI_K_ALEN];		/* destination address */
 	__u8	saddr[FDDI_K_ALEN];		/* source address */
@@ -102,7 +98,7 @@ struct fddihdr
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-	} __attribute__ ((packed));
+} __attribute__ ((packed));
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
@@ -197,7 +193,7 @@ struct fddi_statistics {
 	__u32	port_pc_withhold[2];
 	__u32	port_ler_flag[2];
 	__u32	port_hardware_present[2];
-	};
+};
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_IF_FDDI_H */
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 4a7c9940b080..8d038eb8db5c 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -51,8 +51,7 @@
  *	HIPPI statistics collection data. 
  */
  
-struct hipnet_statistics
-{
+struct hipnet_statistics {
 	int	rx_packets;		/* total packets received	*/
 	int	tx_packets;		/* total packets transmitted	*/
 	int	rx_errors;		/* bad packets received		*/
@@ -77,8 +76,7 @@ struct hipnet_statistics
 };
 
 
-struct hippi_fp_hdr
-{
+struct hippi_fp_hdr {
 #if 0
 	__u8		ulp;				/* must contain 4 */
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -108,8 +106,7 @@ struct hippi_fp_hdr
 	__be32		d2_size;
 } __attribute__ ((packed));
 
-struct hippi_le_hdr
-{
+struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
 	__u8		fc:3;
 	__u8		double_wide:1;
@@ -139,8 +136,7 @@ struct hippi_le_hdr
  * Looks like the dsap and ssap fields have been swapped by mistake in
  * RFC 2067 "IP over HIPPI".
  */
-struct hippi_snap_hdr
-{
+struct hippi_snap_hdr {
 	__u8	dsap;			/* always 0xAA */
 	__u8	ssap;			/* always 0xAA */
 	__u8	ctrl;			/* always 0x03 */
@@ -148,8 +144,7 @@ struct hippi_snap_hdr
 	__be16	ethertype;		/* packet type ID field */
 } __attribute__ ((packed));
 
-struct hippi_hdr
-{
+struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 176c5182c515..1d3b2425f661 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -5,8 +5,7 @@
 #include <linux/netlink.h>
 
 /* The struct should be in sync with struct net_device_stats */
-struct rtnl_link_stats
-{
+struct rtnl_link_stats {
 	__u32	rx_packets;		/* total packets received	*/
 	__u32	tx_packets;		/* total packets transmitted	*/
 	__u32	rx_bytes;		/* total bytes received 	*/
@@ -39,8 +38,7 @@ struct rtnl_link_stats
 };
 
 /* The struct should be in sync with struct ifmap */
-struct rtnl_link_ifmap
-{
+struct rtnl_link_ifmap {
 	__u64	mem_start;
 	__u64	mem_end;
 	__u64	base_addr;
@@ -49,8 +47,7 @@ struct rtnl_link_ifmap
 	__u8	port;
 };
 
-enum
-{
+enum {
 	IFLA_UNSPEC,
 	IFLA_ADDRESS,
 	IFLA_BROADCAST,
@@ -123,8 +120,7 @@ enum
  */
 
 /* Subtype attributes for IFLA_PROTINFO */
-enum
-{
+enum {
 	IFLA_INET6_UNSPEC,
 	IFLA_INET6_FLAGS,	/* link flags			*/
 	IFLA_INET6_CONF,	/* sysctl parameters		*/
@@ -137,16 +133,14 @@ enum
 
 #define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
 
-struct ifla_cacheinfo
-{
+struct ifla_cacheinfo {
 	__u32	max_reasm_len;
 	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
 	__u32	reachable_time;
 	__u32	retrans_time;
 };
 
-enum
-{
+enum {
 	IFLA_INFO_UNSPEC,
 	IFLA_INFO_KIND,
 	IFLA_INFO_DATA,
@@ -158,8 +152,7 @@ enum
 
 /* VLAN section */
 
-enum
-{
+enum {
 	IFLA_VLAN_UNSPEC,
 	IFLA_VLAN_ID,
 	IFLA_VLAN_FLAGS,
@@ -175,8 +168,7 @@ struct ifla_vlan_flags {
 	__u32	mask;
 };
 
-enum
-{
+enum {
 	IFLA_VLAN_QOS_UNSPEC,
 	IFLA_VLAN_QOS_MAPPING,
 	__IFLA_VLAN_QOS_MAX
@@ -184,8 +176,7 @@ enum
 
 #define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
 
-struct ifla_vlan_qos_mapping
-{
+struct ifla_vlan_qos_mapping {
 	__u32 from;
 	__u32 to;
 };
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index dea7d6b7cf98..4021d47cc437 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -3,15 +3,13 @@
 
 #include <linux/types.h>
 
-struct sockaddr_pkt
-{
+struct sockaddr_pkt {
 	unsigned short spkt_family;
 	unsigned char spkt_device[14];
 	__be16 spkt_protocol;
 };
 
-struct sockaddr_ll
-{
+struct sockaddr_ll {
 	unsigned short	sll_family;
 	__be16		sll_protocol;
 	int		sll_ifindex;
@@ -49,14 +47,12 @@ struct sockaddr_ll
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
 
-struct tpacket_stats
-{
+struct tpacket_stats {
 	unsigned int	tp_packets;
 	unsigned int	tp_drops;
 };
 
-struct tpacket_auxdata
-{
+struct tpacket_auxdata {
 	__u32		tp_status;
 	__u32		tp_len;
 	__u32		tp_snaplen;
@@ -78,8 +74,7 @@ struct tpacket_auxdata
 #define TP_STATUS_SENDING	0x2
 #define TP_STATUS_WRONG_FORMAT	0x4
 
-struct tpacket_hdr
-{
+struct tpacket_hdr {
 	unsigned long	tp_status;
 	unsigned int	tp_len;
 	unsigned int	tp_snaplen;
@@ -93,8 +88,7 @@ struct tpacket_hdr
 #define TPACKET_ALIGN(x)	(((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
 #define TPACKET_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
 
-struct tpacket2_hdr
-{
+struct tpacket2_hdr {
 	__u32		tp_status;
 	__u32		tp_len;
 	__u32		tp_snaplen;
@@ -107,8 +101,7 @@ struct tpacket2_hdr
 
 #define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
 
-enum tpacket_versions
-{
+enum tpacket_versions {
 	TPACKET_V1,
 	TPACKET_V2,
 };
@@ -126,16 +119,14 @@ enum tpacket_versions
    - Pad to align to TPACKET_ALIGNMENT=16
  */
 
-struct tpacket_req
-{
+struct tpacket_req {
 	unsigned int	tp_block_size;	/* Minimal size of contiguous block */
 	unsigned int	tp_block_nr;	/* Number of blocks */
 	unsigned int	tp_frame_size;	/* Size of frame */
 	unsigned int	tp_frame_nr;	/* Total number of frames */
 };
 
-struct packet_mreq
-{
+struct packet_mreq {
 	int		mr_ifindex;
 	unsigned short	mr_type;
 	unsigned short	mr_alen;
diff --git a/include/linux/if_plip.h b/include/linux/if_plip.h
index 153a649915a2..6298c7e88b2b 100644
--- a/include/linux/if_plip.h
+++ b/include/linux/if_plip.h
@@ -15,8 +15,7 @@
 
 #define	SIOCDEVPLIP	SIOCDEVPRIVATE
 
-struct plipconf
-{
+struct plipconf {
 	unsigned short pcmd;
 	unsigned long  nibble;
 	unsigned long  trigger;
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index 3a14b088c8ec..c58baea4a25b 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -24,8 +24,7 @@
 /* Structure used to connect() the socket to a particular tunnel UDP
  * socket.
  */
-struct pppol2tp_addr
-{
+struct pppol2tp_addr {
 	__kernel_pid_t	pid;		/* pid that owns the fd.
 					 * 0 => current */
 	int	fd;			/* FD of UDP socket to use */
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 8d76cb4c86fa..1822d635be6b 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -30,8 +30,7 @@
 #define GRE_FLAGS	__cpu_to_be16(0x00F8)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
-struct ip_tunnel_parm
-{
+struct ip_tunnel_parm {
 	char			name[IFNAMSIZ];
 	int			link;
 	__be16			i_flags;
@@ -63,8 +62,7 @@ struct ip_tunnel_6rd {
 	__u16			relay_prefixlen;
 };
 
-enum
-{
+enum {
 	IFLA_GRE_UNSPEC,
 	IFLA_GRE_LINK,
 	IFLA_GRE_IFLAGS,
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index fe158e0e20e6..724c27e5d173 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -27,8 +27,7 @@
  *	Header in on cable format
  */
 
-struct igmphdr
-{
+struct igmphdr {
 	__u8 type;
 	__u8 code;		/* For newer IGMP */
 	__sum16 csum;
@@ -151,8 +150,7 @@ static inline struct igmpv3_query *
 extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
 
-struct ip_sf_socklist
-{
+struct ip_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
 	__be32			sl_addr[0];
@@ -167,16 +165,14 @@ struct ip_sf_socklist
    this list never used in fast path code
  */
 
-struct ip_mc_socklist
-{
+struct ip_mc_socklist {
 	struct ip_mc_socklist	*next;
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip_sf_socklist	*sflist;
 };
 
-struct ip_sf_list
-{
+struct ip_sf_list {
 	struct ip_sf_list	*sf_next;
 	__be32			sf_inaddr;
 	unsigned long		sf_count[2];	/* include/exclude counts */
@@ -185,8 +181,7 @@ struct ip_sf_list
 	unsigned char		sf_crcount;	/* retrans. left to send */
 };
 
-struct ip_mc_list
-{
+struct ip_mc_list {
 	struct in_device	*interface;
 	__be32			multiaddr;
 	struct ip_sf_list	*sources;
diff --git a/include/linux/in.h b/include/linux/in.h
index cf196da04ec9..b615649db129 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -118,14 +118,12 @@ struct in_addr {
 
 /* Request struct for multicast socket ops */
 
-struct ip_mreq 
-{
+struct ip_mreq  {
 	struct in_addr imr_multiaddr;	/* IP multicast address of group */
 	struct in_addr imr_interface;	/* local IP address of interface */
 };
 
-struct ip_mreqn
-{
+struct ip_mreqn {
 	struct in_addr	imr_multiaddr;		/* IP multicast address of group */
 	struct in_addr	imr_address;		/* local IP address of interface */
 	int		imr_ifindex;		/* Interface index */
@@ -149,21 +147,18 @@ struct ip_msfilter {
 	(sizeof(struct ip_msfilter) - sizeof(__u32) \
 	+ (numsrc) * sizeof(__u32))
 
-struct group_req
-{
+struct group_req {
 	__u32				 gr_interface;	/* interface index */
 	struct __kernel_sockaddr_storage gr_group;	/* group address */
 };
 
-struct group_source_req
-{
+struct group_source_req {
 	__u32				 gsr_interface;	/* interface index */
 	struct __kernel_sockaddr_storage gsr_group;	/* group address */
 	struct __kernel_sockaddr_storage gsr_source;	/* source address */
 };
 
-struct group_filter
-{
+struct group_filter {
 	__u32				 gf_interface;	/* interface index */
 	struct __kernel_sockaddr_storage gf_group;	/* multicast address */
 	__u32				 gf_fmode;	/* filter mode */
@@ -175,8 +170,7 @@ struct group_filter
 	(sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
 	+ (numsrc) * sizeof(struct __kernel_sockaddr_storage))
 
-struct in_pktinfo
-{
+struct in_pktinfo {
 	int		ipi_ifindex;
 	struct in_addr	ipi_spec_dst;
 	struct in_addr	ipi_addr;
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 718bf21c5754..dfa29168e6ab 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -27,10 +27,8 @@
  *	IPv6 address structure
  */
 
-struct in6_addr
-{
-	union 
-	{
+struct in6_addr {
+	union {
 		__u8		u6_addr8[16];
 		__be16		u6_addr16[8];
 		__be32		u6_addr32[4];
@@ -75,8 +73,7 @@ struct ipv6_mreq {
 
 #define ipv6mr_acaddr	ipv6mr_multiaddr
 
-struct in6_flowlabel_req
-{
+struct in6_flowlabel_req {
 	struct in6_addr	flr_dst;
 	__be32	flr_label;
 	__u8	flr_action;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ad27c7da8798..eecfa559bfb4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -10,15 +10,13 @@
 #include <linux/timer.h>
 #include <linux/sysctl.h>
 
-struct ipv4_devconf
-{
+struct ipv4_devconf {
 	void	*sysctl;
 	int	data[__NET_IPV4_CONF_MAX - 1];
 	DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1);
 };
 
-struct in_device
-{
+struct in_device {
 	struct net_device	*dev;
 	atomic_t		refcnt;
 	int			dead;
@@ -110,8 +108,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_ARP_IGNORE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
 #define IN_DEV_ARP_NOTIFY(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_NOTIFY)
 
-struct in_ifaddr
-{
+struct in_ifaddr {
 	struct in_ifaddr	*ifa_next;
 	struct in_device	*ifa_dev;
 	struct rcu_head		rcu_head;
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 148265e63e8d..dfc170362842 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -127,8 +127,7 @@ struct ip_vs_dest_user {
 /*
  *	IPVS statistics object (for user space)
  */
-struct ip_vs_stats_user
-{
+struct ip_vs_stats_user {
 	__u32                   conns;          /* connections scheduled */
 	__u32                   inpkts;         /* incoming packets */
 	__u32                   outpkts;        /* outgoing packets */
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index d5f69151f692..c5f3d53548e2 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -76,8 +76,7 @@ struct vifctl {
  *	Cache manipulation structures for mrouted and PIMd
  */
  
-struct mfcctl
-{
+struct mfcctl {
 	struct in_addr mfcc_origin;		/* Origin of mcast	*/
 	struct in_addr mfcc_mcastgrp;		/* Group in question	*/
 	vifi_t	mfcc_parent;			/* Where it arrived	*/
@@ -92,8 +91,7 @@ struct mfcctl
  *	Group count retrieval for mrouted
  */
  
-struct sioc_sg_req
-{
+struct sioc_sg_req {
 	struct in_addr src;
 	struct in_addr grp;
 	unsigned long pktcnt;
@@ -105,8 +103,7 @@ struct sioc_sg_req
  *	To get vif packet counts
  */
 
-struct sioc_vif_req
-{
+struct sioc_vif_req {
 	vifi_t	vifi;		/* Which iface */
 	unsigned long icount;	/* In packets */
 	unsigned long ocount;	/* Out packets */
@@ -119,8 +116,7 @@ struct sioc_vif_req
  *	data. Magically happens to be like an IP packet as per the original
  */
  
-struct igmpmsg
-{
+struct igmpmsg {
 	__u32 unused1,unused2;
 	unsigned char im_msgtype;		/* What is this */
 	unsigned char im_mbz;			/* Must be zero */
@@ -181,8 +177,7 @@ static inline int ip_mr_init(void)
 }
 #endif
 
-struct vif_device
-{
+struct vif_device {
 	struct net_device 	*dev;			/* Device we are using */
 	unsigned long	bytes_in,bytes_out;
 	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
@@ -195,8 +190,7 @@ struct vif_device
 
 #define VIFF_STATIC 0x8000
 
-struct mfc_cache 
-{
+struct mfc_cache {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
 #ifdef CONFIG_NET_NS
 	struct net *mfc_net;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index b191865a6ca3..2caa1a8e525d 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -75,8 +75,7 @@ struct mif6ctl {
  *	Cache manipulation structures for mrouted and PIMd
  */
 
-struct mf6cctl
-{
+struct mf6cctl {
 	struct sockaddr_in6 mf6cc_origin;		/* Origin of mcast	*/
 	struct sockaddr_in6 mf6cc_mcastgrp;		/* Group in question	*/
 	mifi_t	mf6cc_parent;			/* Where it arrived	*/
@@ -87,8 +86,7 @@ struct mf6cctl
  *	Group count retrieval for pim6sd
  */
 
-struct sioc_sg_req6
-{
+struct sioc_sg_req6 {
 	struct sockaddr_in6 src;
 	struct sockaddr_in6 grp;
 	unsigned long pktcnt;
@@ -100,8 +98,7 @@ struct sioc_sg_req6
  *	To get vif packet counts
  */
 
-struct sioc_mif_req6
-{
+struct sioc_mif_req6 {
 	mifi_t	mifi;		/* Which iface */
 	unsigned long icount;	/* In packets */
 	unsigned long ocount;	/* Out packets */
@@ -172,8 +169,7 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device
-{
+struct mif_device {
 	struct net_device 	*dev;			/* Device we are using */
 	unsigned long	bytes_in,bytes_out;
 	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
@@ -185,8 +181,7 @@ struct mif_device
 
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache
-{
+struct mfc6_cache {
 	struct mfc6_cache *next;		/* Next entry on cache line 	*/
 #ifdef CONFIG_NET_NS
 	struct net *mfc6_net;
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index 12c9de138451..a7003b7a695d 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 #include <linux/netlink.h>
 
-struct ndmsg
-{
+struct ndmsg {
 	__u8		ndm_family;
 	__u8		ndm_pad1;
 	__u16		ndm_pad2;
@@ -15,8 +14,7 @@ struct ndmsg
 	__u8		ndm_type;
 };
 
-enum
-{
+enum {
 	NDA_UNSPEC,
 	NDA_DST,
 	NDA_LLADDR,
@@ -56,8 +54,7 @@ enum
    NUD_PERMANENT is also cannot be deleted by garbage collectors.
  */
 
-struct nda_cacheinfo
-{
+struct nda_cacheinfo {
 	__u32		ndm_confirmed;
 	__u32		ndm_used;
 	__u32		ndm_updated;
@@ -89,8 +86,7 @@ struct nda_cacheinfo
  * device.
  ****/
 
-struct ndt_stats
-{
+struct ndt_stats {
 	__u64		ndts_allocs;
 	__u64		ndts_destroys;
 	__u64		ndts_hash_grows;
@@ -124,15 +120,13 @@ enum {
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
 
-struct ndtmsg
-{
+struct ndtmsg {
 	__u8		ndtm_family;
 	__u8		ndtm_pad1;
 	__u16		ndtm_pad2;
 };
 
-struct ndt_config
-{
+struct ndt_config {
 	__u16		ndtc_key_len;
 	__u16		ndtc_entry_size;
 	__u32		ndtc_entries;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5077de028317..465add6c43e3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -125,8 +125,7 @@ typedef enum netdev_tx netdev_tx_t;
  *	with byte counters.
  */
 
-struct net_device_stats
-{
+struct net_device_stats {
 	unsigned long	rx_packets;		/* total packets received	*/
 	unsigned long	tx_packets;		/* total packets transmitted	*/
 	unsigned long	rx_bytes;		/* total bytes received 	*/
@@ -179,8 +178,7 @@ struct neighbour;
 struct neigh_parms;
 struct sk_buff;
 
-struct netif_rx_stats
-{
+struct netif_rx_stats {
 	unsigned total;
 	unsigned dropped;
 	unsigned time_squeeze;
@@ -189,8 +187,7 @@ struct netif_rx_stats
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
 
-struct dev_addr_list
-{
+struct dev_addr_list {
 	struct dev_addr_list	*next;
 	u8			da_addr[MAX_ADDR_LEN];
 	u8			da_addrlen;
@@ -227,8 +224,7 @@ struct netdev_hw_addr_list {
 	int			count;
 };
 
-struct hh_cache
-{
+struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
 /*
@@ -291,8 +287,7 @@ struct header_ops {
  * code.
  */
 
-enum netdev_state_t
-{
+enum netdev_state_t {
 	__LINK_STATE_START,
 	__LINK_STATE_PRESENT,
 	__LINK_STATE_NOCARRIER,
@@ -341,8 +336,7 @@ struct napi_struct {
 	struct sk_buff		*skb;
 };
 
-enum
-{
+enum {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
 	NAPI_STATE_DISABLE,	/* Disable pending */
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
@@ -458,8 +452,7 @@ static inline void napi_synchronize(const struct napi_struct *n)
 # define napi_synchronize(n)	barrier()
 #endif
 
-enum netdev_queue_state_t
-{
+enum netdev_queue_state_t {
 	__QUEUE_STATE_XOFF,
 	__QUEUE_STATE_FROZEN,
 };
@@ -653,8 +646,7 @@ struct net_device_ops {
  *	moves out.
  */
 
-struct net_device
-{
+struct net_device {
 
 	/*
 	 * This is the first field of the "visible" part of this structure
@@ -1229,8 +1221,7 @@ static inline int unregister_gifconf(unsigned int family)
  * Incoming packets are placed on per-cpu queues so that
  * no locking is needed.
  */
-struct softnet_data
-{
+struct softnet_data {
 	struct Qdisc		*output_queue;
 	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
@@ -1627,7 +1618,8 @@ static inline int netif_dormant(const struct net_device *dev)
  *
  * Check if carrier is operational
  */
-static inline int netif_oper_up(const struct net_device *dev) {
+static inline int netif_oper_up(const struct net_device *dev)
+{
 	return (dev->operstate == IF_OPER_UP ||
 		dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
 }
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 6132b5e6d9d3..48c54960773c 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -93,8 +93,7 @@ typedef unsigned int nf_hookfn(unsigned int hooknum,
 			       const struct net_device *out,
 			       int (*okfn)(struct sk_buff *));
 
-struct nf_hook_ops
-{
+struct nf_hook_ops {
 	struct list_head list;
 
 	/* User fills in from here down. */
@@ -106,8 +105,7 @@ struct nf_hook_ops
 	int priority;
 };
 
-struct nf_sockopt_ops
-{
+struct nf_sockopt_ops {
 	struct list_head list;
 
 	u_int8_t pf;
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index a8248ee422b7..a374787ed9b0 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -3,8 +3,7 @@
 /* Connection state tracking for netfilter.  This is separated from,
    but required by, the NAT layer; it can also be used by an iptables
    extension. */
-enum ip_conntrack_info
-{
+enum ip_conntrack_info {
 	/* Part of an established connection (either direction). */
 	IP_CT_ESTABLISHED,
 
@@ -76,8 +75,7 @@ enum ip_conntrack_status {
 };
 
 #ifdef __KERNEL__
-struct ip_conntrack_stat
-{
+struct ip_conntrack_stat {
 	unsigned int searched;
 	unsigned int found;
 	unsigned int new;
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index 47727d7546ea..3e3aa08980c3 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -3,8 +3,7 @@
 /* FTP tracking. */
 
 /* This enum is exposed to userspace */
-enum nf_ct_ftp_type
-{
+enum nf_ct_ftp_type {
 	/* PORT command from client */
 	NF_CT_FTP_PORT,
 	/* PASV response from server */
diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h
index 768f78c4ac53..ceeefe6681b5 100644
--- a/include/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/linux/netfilter/nf_conntrack_sctp.h
@@ -16,8 +16,7 @@ enum sctp_conntrack {
 	SCTP_CONNTRACK_MAX
 };
 
-struct ip_ct_sctp
-{
+struct ip_ct_sctp {
 	enum sctp_conntrack state;
 
 	__be32 vtag[IP_CT_DIR_MAX];
diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 4352feed2377..f6d97f64d7a0 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -55,8 +55,7 @@ struct ip_ct_tcp_state {
 	u_int8_t	flags;		/* per direction options */
 };
 
-struct ip_ct_tcp
-{
+struct ip_ct_tcp {
 	struct ip_ct_tcp_state seen[2];	/* connection parameters per direction */
 	u_int8_t	state;		/* state of the connection (enum tcp_conntrack) */
 	/* For detecting stale connections */
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 9f00da287f2c..49d321f3ccd2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -55,8 +55,7 @@ struct nfgenmsg {
 #include <linux/capability.h>
 #include <net/netlink.h>
 
-struct nfnl_callback
-{
+struct nfnl_callback {
 	int (*call)(struct sock *nl, struct sk_buff *skb, 
 		    const struct nlmsghdr *nlh,
 		    const struct nlattr * const cda[]);
@@ -64,8 +63,7 @@ struct nfnl_callback
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
 
-struct nfnetlink_subsystem
-{
+struct nfnetlink_subsystem {
 	const char *name;
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
 	__u8 cb_count;			/* number of callbacks */
diff --git a/include/linux/netfilter/nfnetlink_compat.h b/include/linux/netfilter/nfnetlink_compat.h
index eda55cabceec..ffb95036bbd4 100644
--- a/include/linux/netfilter/nfnetlink_compat.h
+++ b/include/linux/netfilter/nfnetlink_compat.h
@@ -21,8 +21,7 @@
  * ! nfnetlink use the same attributes methods. - J. Schulist.
  */
 
-struct nfattr
-{
+struct nfattr {
 	__u16 nfa_len;
 	__u16 nfa_type;	/* we use 15 bits for the type, and the highest
 				 * bit to indicate whether the payload is nested */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 812cb153cabb..378f27ae7772 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -6,8 +6,7 @@
 #define XT_FUNCTION_MAXNAMELEN 30
 #define XT_TABLE_MAXNAMELEN 32
 
-struct xt_entry_match
-{
+struct xt_entry_match {
 	union {
 		struct {
 			__u16 match_size;
@@ -31,8 +30,7 @@ struct xt_entry_match
 	unsigned char data[0];
 };
 
-struct xt_entry_target
-{
+struct xt_entry_target {
 	union {
 		struct {
 			__u16 target_size;
@@ -64,16 +62,14 @@ struct xt_entry_target
 	},								       \
 }
 
-struct xt_standard_target
-{
+struct xt_standard_target {
 	struct xt_entry_target target;
 	int verdict;
 };
 
 /* The argument to IPT_SO_GET_REVISION_*.  Returns highest revision
  * kernel supports, if >= revision. */
-struct xt_get_revision
-{
+struct xt_get_revision {
 	char name[XT_FUNCTION_MAXNAMELEN-1];
 
 	__u8 revision;
@@ -90,8 +86,7 @@ struct xt_get_revision
  * ip6t_entry and arpt_entry.  This sucks, and it is a hack.  It will be my
  * personal pleasure to remove it -HW
  */
-struct _xt_align
-{
+struct _xt_align {
 	__u8 u8;
 	__u16 u16;
 	__u32 u32;
@@ -109,14 +104,12 @@ struct _xt_align
 #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
-struct xt_counters
-{
+struct xt_counters {
 	__u64 pcnt, bcnt;			/* Packet and byte counters */
 };
 
 /* The argument to IPT_SO_ADD_COUNTERS. */
-struct xt_counters_info
-{
+struct xt_counters_info {
 	/* Which table. */
 	char name[XT_TABLE_MAXNAMELEN];
 
@@ -269,8 +262,7 @@ struct xt_tgdtor_param {
 	u_int8_t family;
 };
 
-struct xt_match
-{
+struct xt_match {
 	struct list_head list;
 
 	const char name[XT_FUNCTION_MAXNAMELEN-1];
@@ -310,8 +302,7 @@ struct xt_match
 };
 
 /* Registration hooks for targets. */
-struct xt_target
-{
+struct xt_target {
 	struct list_head list;
 
 	const char name[XT_FUNCTION_MAXNAMELEN-1];
@@ -349,8 +340,7 @@ struct xt_target
 };
 
 /* Furniture shopping... */
-struct xt_table
-{
+struct xt_table {
 	struct list_head list;
 
 	/* What hooks you will enter on */
@@ -371,8 +361,7 @@ struct xt_table
 #include <linux/netfilter_ipv4.h>
 
 /* The table itself */
-struct xt_table_info
-{
+struct xt_table_info {
 	/* Size per table */
 	unsigned int size;
 	/* Number of entries: FIXME. --RR */
@@ -528,8 +517,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
-struct compat_xt_entry_match
-{
+struct compat_xt_entry_match {
 	union {
 		struct {
 			u_int16_t match_size;
@@ -545,8 +533,7 @@ struct compat_xt_entry_match
 	unsigned char data[0];
 };
 
-struct compat_xt_entry_target
-{
+struct compat_xt_entry_target {
 	union {
 		struct {
 			u_int16_t target_size;
@@ -566,8 +553,7 @@ struct compat_xt_entry_target
  * need to change whole approach in order to calculate align as function of
  * current task alignment */
 
-struct compat_xt_counters
-{
+struct compat_xt_counters {
 #if defined(CONFIG_X86_64) || defined(CONFIG_IA64)
 	u_int32_t cnt[4];
 #else
@@ -575,8 +561,7 @@ struct compat_xt_counters
 #endif
 };
 
-struct compat_xt_counters_info
-{
+struct compat_xt_counters_info {
 	char name[XT_TABLE_MAXNAMELEN];
 	compat_uint_t num_counters;
 	struct compat_xt_counters counters[0];
diff --git a/include/linux/netfilter/xt_connbytes.h b/include/linux/netfilter/xt_connbytes.h
index 52bd6153b996..92fcbb0d193e 100644
--- a/include/linux/netfilter/xt_connbytes.h
+++ b/include/linux/netfilter/xt_connbytes.h
@@ -15,8 +15,7 @@ enum xt_connbytes_direction {
 	XT_CONNBYTES_DIR_BOTH,
 };
 
-struct xt_connbytes_info
-{
+struct xt_connbytes_info {
 	struct {
 		aligned_u64 from;	/* count to be matched */
 		aligned_u64 to;		/* count to be matched */
diff --git a/include/linux/netfilter/xt_esp.h b/include/linux/netfilter/xt_esp.h
index ef6fa4747d0a..ee6882408000 100644
--- a/include/linux/netfilter/xt_esp.h
+++ b/include/linux/netfilter/xt_esp.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-struct xt_esp
-{
+struct xt_esp {
 	__u32 spis[2];	/* Security Parameter Index */
 	__u8  invflags;	/* Inverse flags */
 };
diff --git a/include/linux/netfilter/xt_multiport.h b/include/linux/netfilter/xt_multiport.h
index 185db499fcbc..5b7e72dfffc5 100644
--- a/include/linux/netfilter/xt_multiport.h
+++ b/include/linux/netfilter/xt_multiport.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-enum xt_multiport_flags
-{
+enum xt_multiport_flags {
 	XT_MULTIPORT_SOURCE,
 	XT_MULTIPORT_DESTINATION,
 	XT_MULTIPORT_EITHER
@@ -13,15 +12,13 @@ enum xt_multiport_flags
 #define XT_MULTI_PORTS	15
 
 /* Must fit inside union xt_matchinfo: 16 bytes */
-struct xt_multiport
-{
+struct xt_multiport {
 	__u8 flags;				/* Type of comparison */
 	__u8 count;				/* Number of ports */
 	__u16 ports[XT_MULTI_PORTS];	/* Ports */
 };
 
-struct xt_multiport_v1
-{
+struct xt_multiport_v1 {
 	__u8 flags;				/* Type of comparison */
 	__u8 count;				/* Number of ports */
 	__u16 ports[XT_MULTI_PORTS];	/* Ports */
diff --git a/include/linux/netfilter/xt_policy.h b/include/linux/netfilter/xt_policy.h
index 7bb64e7c853d..be8ead05c316 100644
--- a/include/linux/netfilter/xt_policy.h
+++ b/include/linux/netfilter/xt_policy.h
@@ -5,22 +5,19 @@
 
 #define XT_POLICY_MAX_ELEM	4
 
-enum xt_policy_flags
-{
+enum xt_policy_flags {
 	XT_POLICY_MATCH_IN	= 0x1,
 	XT_POLICY_MATCH_OUT	= 0x2,
 	XT_POLICY_MATCH_NONE	= 0x4,
 	XT_POLICY_MATCH_STRICT	= 0x8,
 };
 
-enum xt_policy_modes
-{
+enum xt_policy_modes {
 	XT_POLICY_MODE_TRANSPORT,
 	XT_POLICY_MODE_TUNNEL
 };
 
-struct xt_policy_spec
-{
+struct xt_policy_spec {
 	__u8	saddr:1,
 			daddr:1,
 			proto:1,
@@ -30,15 +27,13 @@ struct xt_policy_spec
 };
 
 #ifndef __KERNEL__
-union xt_policy_addr
-{
+union xt_policy_addr {
 	struct in_addr	a4;
 	struct in6_addr	a6;
 };
 #endif
 
-struct xt_policy_elem
-{
+struct xt_policy_elem {
 	union {
 #ifdef __KERNEL__
 		struct {
@@ -65,8 +60,7 @@ struct xt_policy_elem
 	struct xt_policy_spec	invert;
 };
 
-struct xt_policy_info
-{
+struct xt_policy_info {
 	struct xt_policy_elem pol[XT_POLICY_MAX_ELEM];
 	__u16 flags;
 	__u16 len;
diff --git a/include/linux/netfilter/xt_state.h b/include/linux/netfilter/xt_state.h
index c06f32edee07..7b32de886613 100644
--- a/include/linux/netfilter/xt_state.h
+++ b/include/linux/netfilter/xt_state.h
@@ -6,8 +6,7 @@
 
 #define XT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1))
 
-struct xt_state_info
-{
+struct xt_state_info {
 	unsigned int statemask;
 };
 #endif /*_XT_STATE_H*/
diff --git a/include/linux/netfilter/xt_string.h b/include/linux/netfilter/xt_string.h
index ecbb95fc89ed..235347c02eab 100644
--- a/include/linux/netfilter/xt_string.h
+++ b/include/linux/netfilter/xt_string.h
@@ -11,8 +11,7 @@ enum {
 	XT_STRING_FLAG_IGNORECASE	= 0x02
 };
 
-struct xt_string_info
-{
+struct xt_string_info {
 	__u16 from_offset;
 	__u16 to_offset;
 	char	  algo[XT_STRING_MAX_ALGO_NAME_SIZE];
diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/linux/netfilter/xt_tcpudp.h
index a490a0bc1d29..38aa7b399021 100644
--- a/include/linux/netfilter/xt_tcpudp.h
+++ b/include/linux/netfilter/xt_tcpudp.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 
 /* TCP matching stuff */
-struct xt_tcp
-{
+struct xt_tcp {
 	__u16 spts[2];			/* Source port range. */
 	__u16 dpts[2];			/* Destination port range. */
 	__u8 option;			/* TCP Option iff non-zero*/
@@ -22,8 +21,7 @@ struct xt_tcp
 #define XT_TCP_INV_MASK		0x0F	/* All possible flags. */
 
 /* UDP matching stuff */
-struct xt_udp
-{
+struct xt_udp {
 	__u16 spts[2];			/* Source port range. */
 	__u16 dpts[2];			/* Destination port range. */
 	__u8 invflags;			/* Inverse flags */
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 6fe3e6aa10db..f2336523a9df 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -132,8 +132,7 @@ struct arpt_entry
 #define ARPT_RETURN XT_RETURN
 
 /* The argument to ARPT_SO_GET_INFO */
-struct arpt_getinfo
-{
+struct arpt_getinfo {
 	/* Which table: caller fills this in. */
 	char name[ARPT_TABLE_MAXNAMELEN];
 
@@ -155,8 +154,7 @@ struct arpt_getinfo
 };
 
 /* The argument to ARPT_SO_SET_REPLACE. */
-struct arpt_replace
-{
+struct arpt_replace {
 	/* Which table. */
 	char name[ARPT_TABLE_MAXNAMELEN];
 
@@ -191,8 +189,7 @@ struct arpt_replace
 #define arpt_counters xt_counters
 
 /* The argument to ARPT_SO_GET_ENTRIES. */
-struct arpt_get_entries
-{
+struct arpt_get_entries {
 	/* Which table: user fills this in. */
 	char name[ARPT_TABLE_MAXNAMELEN];
 
@@ -224,20 +221,17 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e
 #ifdef __KERNEL__
 
 /* Standard entry. */
-struct arpt_standard
-{
+struct arpt_standard {
 	struct arpt_entry entry;
 	struct arpt_standard_target target;
 };
 
-struct arpt_error_target
-{
+struct arpt_error_target {
 	struct arpt_entry_target target;
 	char errorname[ARPT_FUNCTION_MAXNAMELEN];
 };
 
-struct arpt_error
-{
+struct arpt_error {
 	struct arpt_entry entry;
 	struct arpt_error_target target;
 };
@@ -279,8 +273,7 @@ extern unsigned int arpt_do_table(struct sk_buff *skb,
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
-struct compat_arpt_entry
-{
+struct compat_arpt_entry {
 	struct arpt_arp arp;
 	u_int16_t target_offset;
 	u_int16_t next_offset;
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index a11b0c2017fd..c73ef0b18bdc 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -58,8 +58,7 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
 }
 #endif
 
-struct ebt_802_3_info 
-{
+struct ebt_802_3_info {
 	uint8_t  sap;
 	__be16 type;
 	uint8_t  bitmask;
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h
index 7654069233ca..0009558609a7 100644
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -29,14 +29,12 @@
  * Yes, it is a memory overhead, but in 2003 AD, who cares?
  */
 
-struct ebt_mac_wormhash_tuple
-{
+struct ebt_mac_wormhash_tuple {
 	uint32_t cmp[2];
 	__be32 ip;
 };
 
-struct ebt_mac_wormhash
-{
+struct ebt_mac_wormhash {
 	int table[257];
 	int poolsize;
 	struct ebt_mac_wormhash_tuple pool[0];
@@ -45,8 +43,7 @@ struct ebt_mac_wormhash
 #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \
 		+ (x)->poolsize * sizeof(struct ebt_mac_wormhash_tuple) : 0)
 
-struct ebt_among_info
-{
+struct ebt_among_info {
 	int wh_dst_ofs;
 	int wh_src_ofs;
 	int bitmask;
diff --git a/include/linux/netfilter_bridge/ebt_arpreply.h b/include/linux/netfilter_bridge/ebt_arpreply.h
index 96a8339960e0..7e77896e1fbf 100644
--- a/include/linux/netfilter_bridge/ebt_arpreply.h
+++ b/include/linux/netfilter_bridge/ebt_arpreply.h
@@ -1,8 +1,7 @@
 #ifndef __LINUX_BRIDGE_EBT_ARPREPLY_H
 #define __LINUX_BRIDGE_EBT_ARPREPLY_H
 
-struct ebt_arpreply_info
-{
+struct ebt_arpreply_info {
 	unsigned char mac[ETH_ALEN];
 	int target;
 };
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h
index d6847475bf2e..6a708fb92241 100644
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -26,8 +26,7 @@
 #define EBT_IP_MATCH "ip"
 
 /* the same values are used for the invflags */
-struct ebt_ip_info
-{
+struct ebt_ip_info {
 	__be32 saddr;
 	__be32 daddr;
 	__be32 smsk;
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
index 2273c3ae33ca..e5de98701519 100644
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -23,8 +23,7 @@
 #define EBT_IP6_MATCH "ip6"
 
 /* the same values are used for the invflags */
-struct ebt_ip6_info
-{
+struct ebt_ip6_info {
 	struct in6_addr saddr;
 	struct in6_addr daddr;
 	struct in6_addr smsk;
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h
index d8b65000afe4..4bf76b751676 100644
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -9,8 +9,7 @@
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
    seconds, or one every 59 hours. */
 
-struct ebt_limit_info
-{
+struct ebt_limit_info {
 	u_int32_t avg;    /* Average secs between packets * scale */
 	u_int32_t burst;  /* Period multiplier for upper limit. */
 
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index b76e653157e5..cc2cdfb764bc 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -9,8 +9,7 @@
 #define EBT_LOG_PREFIX_SIZE 30
 #define EBT_LOG_WATCHER "log"
 
-struct ebt_log_info
-{
+struct ebt_log_info {
 	uint8_t loglevel;
 	uint8_t prefix[EBT_LOG_PREFIX_SIZE];
 	uint32_t bitmask;
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h
index 301524ff1065..9ceb10ec0ed6 100644
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -4,8 +4,7 @@
 #define EBT_MARK_AND 0x01
 #define EBT_MARK_OR 0x02
 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
-struct ebt_mark_m_info
-{
+struct ebt_mark_m_info {
 	unsigned long mark, mask;
 	uint8_t invert;
 	uint8_t bitmask;
diff --git a/include/linux/netfilter_bridge/ebt_mark_t.h b/include/linux/netfilter_bridge/ebt_mark_t.h
index 6270f6f33693..7d5a268a4311 100644
--- a/include/linux/netfilter_bridge/ebt_mark_t.h
+++ b/include/linux/netfilter_bridge/ebt_mark_t.h
@@ -13,8 +13,7 @@
 #define MARK_AND_VALUE (0xffffffd0)
 #define MARK_XOR_VALUE (0xffffffc0)
 
-struct ebt_mark_t_info
-{
+struct ebt_mark_t_info {
 	unsigned long mark;
 	/* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */
 	int target;
diff --git a/include/linux/netfilter_bridge/ebt_nat.h b/include/linux/netfilter_bridge/ebt_nat.h
index 435b886a51aa..5e74e3b03bd6 100644
--- a/include/linux/netfilter_bridge/ebt_nat.h
+++ b/include/linux/netfilter_bridge/ebt_nat.h
@@ -2,8 +2,7 @@
 #define __LINUX_BRIDGE_EBT_NAT_H
 
 #define NAT_ARP_BIT  (0x00000010)
-struct ebt_nat_info
-{
+struct ebt_nat_info {
 	unsigned char mac[ETH_ALEN];
 	/* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */
 	int target;
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h
index 0d64bbb29c66..51a799840931 100644
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -1,8 +1,7 @@
 #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H
 #define __LINUX_BRIDGE_EBT_PKTTYPE_H
 
-struct ebt_pkttype_info
-{
+struct ebt_pkttype_info {
 	uint8_t pkt_type;
 	uint8_t invert;
 };
diff --git a/include/linux/netfilter_bridge/ebt_redirect.h b/include/linux/netfilter_bridge/ebt_redirect.h
index 5c67990fce39..dd9622ce8488 100644
--- a/include/linux/netfilter_bridge/ebt_redirect.h
+++ b/include/linux/netfilter_bridge/ebt_redirect.h
@@ -1,8 +1,7 @@
 #ifndef __LINUX_BRIDGE_EBT_REDIRECT_H
 #define __LINUX_BRIDGE_EBT_REDIRECT_H
 
-struct ebt_redirect_info
-{
+struct ebt_redirect_info {
 	/* EBT_ACCEPT, EBT_DROP, EBT_CONTINUE or EBT_RETURN */
 	int target;
 };
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h
index e5fd67850f4d..e503a0aa2728 100644
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -20,8 +20,7 @@
 
 #define EBT_STP_MATCH "stp"
 
-struct ebt_stp_config_info
-{
+struct ebt_stp_config_info {
 	uint8_t flags;
 	uint16_t root_priol, root_priou;
 	char root_addr[6], root_addrmsk[6];
@@ -35,8 +34,7 @@ struct ebt_stp_config_info
 	uint16_t forward_delayl, forward_delayu;
 };
 
-struct ebt_stp_info
-{
+struct ebt_stp_info {
 	uint8_t type;
 	struct ebt_stp_config_info config;
 	uint16_t bitmask;
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index ea281e6a2048..3cc40c131cc3 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -34,14 +34,12 @@
 struct xt_match;
 struct xt_target;
 
-struct ebt_counter
-{
+struct ebt_counter {
 	uint64_t pcnt;
 	uint64_t bcnt;
 };
 
-struct ebt_replace
-{
+struct ebt_replace {
 	char name[EBT_TABLE_MAXNAMELEN];
 	unsigned int valid_hooks;
 	/* nr of rules in the table */
@@ -57,8 +55,7 @@ struct ebt_replace
 	char __user *entries;
 };
 
-struct ebt_replace_kernel
-{
+struct ebt_replace_kernel {
 	char name[EBT_TABLE_MAXNAMELEN];
 	unsigned int valid_hooks;
 	/* nr of rules in the table */
@@ -120,8 +117,7 @@ struct ebt_entries {
 #define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \
    | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST)
 
-struct ebt_entry_match
-{
+struct ebt_entry_match {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
 		struct xt_match *match;
@@ -131,8 +127,7 @@ struct ebt_entry_match
 	unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
 };
 
-struct ebt_entry_watcher
-{
+struct ebt_entry_watcher {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
 		struct xt_target *watcher;
@@ -142,8 +137,7 @@ struct ebt_entry_watcher
 	unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
 };
 
-struct ebt_entry_target
-{
+struct ebt_entry_target {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
 		struct xt_target *target;
@@ -154,8 +148,7 @@ struct ebt_entry_target
 };
 
 #define EBT_STANDARD_TARGET "standard"
-struct ebt_standard_target
-{
+struct ebt_standard_target {
 	struct ebt_entry_target target;
 	int verdict;
 };
@@ -206,8 +199,7 @@ struct ebt_entry {
 #define EBT_MATCH 0
 #define EBT_NOMATCH 1
 
-struct ebt_match
-{
+struct ebt_match {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	bool (*match)(const struct sk_buff *skb, const struct net_device *in,
@@ -224,8 +216,7 @@ struct ebt_match
 	struct module *me;
 };
 
-struct ebt_watcher
-{
+struct ebt_watcher {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	unsigned int (*target)(struct sk_buff *skb,
@@ -242,8 +233,7 @@ struct ebt_watcher
 	struct module *me;
 };
 
-struct ebt_target
-{
+struct ebt_target {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	/* returns one of the standard EBT_* verdicts */
@@ -262,15 +252,13 @@ struct ebt_target
 };
 
 /* used for jumping from and into user defined chains (udc) */
-struct ebt_chainstack
-{
+struct ebt_chainstack {
 	struct ebt_entries *chaininfo; /* pointer to chain data */
 	struct ebt_entry *e; /* pointer to entry data */
 	unsigned int n; /* n'th entry */
 };
 
-struct ebt_table_info
-{
+struct ebt_table_info {
 	/* total size of the entries */
 	unsigned int entries_size;
 	unsigned int nentries;
@@ -282,8 +270,7 @@ struct ebt_table_info
 	struct ebt_counter counters[0] ____cacheline_aligned;
 };
 
-struct ebt_table
-{
+struct ebt_table {
 	struct list_head list;
 	char name[EBT_TABLE_MAXNAMELEN];
 	struct ebt_replace_kernel *table;
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 61fafc868a7b..27b3f5807305 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -76,8 +76,7 @@ struct ipt_ip {
 /* This structure defines each of the firewall rules.  Consists of 3
    parts which are 1) general IP header stuff 2) match specific
    stuff 3) the target to perform if the rule matches */
-struct ipt_entry
-{
+struct ipt_entry {
 	struct ipt_ip ip;
 
 	/* Mark with fields that we care about. */
@@ -135,8 +134,7 @@ struct ipt_entry
 #define IPT_UDP_INV_MASK	XT_UDP_INV_MASK
 
 /* ICMP matching stuff */
-struct ipt_icmp
-{
+struct ipt_icmp {
 	u_int8_t type;				/* type to match */
 	u_int8_t code[2];			/* range of code */
 	u_int8_t invflags;			/* Inverse flags */
@@ -146,8 +144,7 @@ struct ipt_icmp
 #define IPT_ICMP_INV	0x01	/* Invert the sense of type/code test */
 
 /* The argument to IPT_SO_GET_INFO */
-struct ipt_getinfo
-{
+struct ipt_getinfo {
 	/* Which table: caller fills this in. */
 	char name[IPT_TABLE_MAXNAMELEN];
 
@@ -169,8 +166,7 @@ struct ipt_getinfo
 };
 
 /* The argument to IPT_SO_SET_REPLACE. */
-struct ipt_replace
-{
+struct ipt_replace {
 	/* Which table. */
 	char name[IPT_TABLE_MAXNAMELEN];
 
@@ -204,8 +200,7 @@ struct ipt_replace
 #define ipt_counters_info xt_counters_info
 
 /* The argument to IPT_SO_GET_ENTRIES. */
-struct ipt_get_entries
-{
+struct ipt_get_entries {
 	/* Which table: user fills this in. */
 	char name[IPT_TABLE_MAXNAMELEN];
 
@@ -250,20 +245,17 @@ extern struct xt_table *ipt_register_table(struct net *net,
 extern void ipt_unregister_table(struct xt_table *table);
 
 /* Standard entry. */
-struct ipt_standard
-{
+struct ipt_standard {
 	struct ipt_entry entry;
 	struct ipt_standard_target target;
 };
 
-struct ipt_error_target
-{
+struct ipt_error_target {
 	struct ipt_entry_target target;
 	char errorname[IPT_FUNCTION_MAXNAMELEN];
 };
 
-struct ipt_error
-{
+struct ipt_error {
 	struct ipt_entry entry;
 	struct ipt_error_target target;
 };
@@ -301,8 +293,7 @@ extern unsigned int ipt_do_table(struct sk_buff *skb,
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
-struct compat_ipt_entry
-{
+struct compat_ipt_entry {
 	struct ipt_ip ip;
 	compat_uint_t nfcache;
 	u_int16_t target_offset;
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index be6e682a85ec..2529660c5b38 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -5,8 +5,7 @@
 
 #define IPT_SAME_NODST		0x01
 
-struct ipt_same_info
-{
+struct ipt_same_info {
 	unsigned char info;
 	u_int32_t rangesize;
 	u_int32_t ipnum;
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h
index 7b9a2ac7adb9..2e555b4d05e3 100644
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -1,8 +1,7 @@
 #ifndef _IPT_AH_H
 #define _IPT_AH_H
 
-struct ipt_ah
-{
+struct ipt_ah {
 	u_int32_t spis[2];			/* Security Parameter Index */
 	u_int8_t  invflags;			/* Inverse flags */
 };
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index a64e1451ac38..b31050d20ae4 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -88,8 +88,7 @@ struct ip6t_ip6 {
 /* This structure defines each of the firewall rules.  Consists of 3
    parts which are 1) general IP header stuff 2) match specific
    stuff 3) the target to perform if the rule matches */
-struct ip6t_entry
-{
+struct ip6t_entry {
 	struct ip6t_ip6 ipv6;
 
 	/* Mark with fields that we care about. */
@@ -111,20 +110,17 @@ struct ip6t_entry
 };
 
 /* Standard entry */
-struct ip6t_standard
-{
+struct ip6t_standard {
 	struct ip6t_entry entry;
 	struct ip6t_standard_target target;
 };
 
-struct ip6t_error_target
-{
+struct ip6t_error_target {
 	struct ip6t_entry_target target;
 	char errorname[IP6T_FUNCTION_MAXNAMELEN];
 };
 
-struct ip6t_error
-{
+struct ip6t_error {
 	struct ip6t_entry entry;
 	struct ip6t_error_target target;
 };
@@ -195,8 +191,7 @@ struct ip6t_error
 #define IP6T_UDP_INV_MASK	XT_UDP_INV_MASK
 
 /* ICMP matching stuff */
-struct ip6t_icmp
-{
+struct ip6t_icmp {
 	u_int8_t type;				/* type to match */
 	u_int8_t code[2];			/* range of code */
 	u_int8_t invflags;			/* Inverse flags */
@@ -206,8 +201,7 @@ struct ip6t_icmp
 #define IP6T_ICMP_INV	0x01	/* Invert the sense of type/code test */
 
 /* The argument to IP6T_SO_GET_INFO */
-struct ip6t_getinfo
-{
+struct ip6t_getinfo {
 	/* Which table: caller fills this in. */
 	char name[IP6T_TABLE_MAXNAMELEN];
 
@@ -229,8 +223,7 @@ struct ip6t_getinfo
 };
 
 /* The argument to IP6T_SO_SET_REPLACE. */
-struct ip6t_replace
-{
+struct ip6t_replace {
 	/* Which table. */
 	char name[IP6T_TABLE_MAXNAMELEN];
 
@@ -264,8 +257,7 @@ struct ip6t_replace
 #define ip6t_counters_info xt_counters_info
 
 /* The argument to IP6T_SO_GET_ENTRIES. */
-struct ip6t_get_entries
-{
+struct ip6t_get_entries {
 	/* Which table: user fills this in. */
 	char name[IP6T_TABLE_MAXNAMELEN];
 
@@ -330,8 +322,7 @@ extern int ip6_masked_addrcmp(const struct in6_addr *addr1,
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
-struct compat_ip6t_entry
-{
+struct compat_ip6t_entry {
 	struct ip6t_ip6 ipv6;
 	compat_uint_t nfcache;
 	u_int16_t target_offset;
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h
index 8531879eb464..17a745cfb2c7 100644
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -1,8 +1,7 @@
 #ifndef _IP6T_AH_H
 #define _IP6T_AH_H
 
-struct ip6t_ah
-{
+struct ip6t_ah {
 	u_int32_t spis[2];			/* Security Parameter Index */
 	u_int32_t hdrlen;			/* Header Length */
 	u_int8_t  hdrres;			/* Test of the Reserved Filed */
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h
index 66070a0d6dfc..3724d0850920 100644
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -1,8 +1,7 @@
 #ifndef _IP6T_FRAG_H
 #define _IP6T_FRAG_H
 
-struct ip6t_frag
-{
+struct ip6t_frag {
 	u_int32_t ids[2];			/* Security Parameter Index */
 	u_int32_t hdrlen;			/* Header Length */
 	u_int8_t  flags;			/*  */
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
index 51c53fc9c44a..01dfd445596a 100644
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -8,8 +8,7 @@ on whether they contain certain headers */
 #ifndef __IPV6HEADER_H
 #define __IPV6HEADER_H
 
-struct ip6t_ipv6header_info
-{
+struct ip6t_ipv6header_info {
 	u_int8_t matchflags;
 	u_int8_t invflags;
 	u_int8_t modeflag;
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
index b9ca9a5f74d0..18549bca2d1f 100644
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -2,8 +2,7 @@
 #define _IP6T_MH_H
 
 /* MH matching stuff */
-struct ip6t_mh
-{
+struct ip6t_mh {
 	u_int8_t types[2];	/* MH type range */
 	u_int8_t invflags;	/* Inverse flags */
 };
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h
index a07e36380ae8..62d89bcd9f9c 100644
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -3,8 +3,7 @@
 
 #define IP6T_OPTS_OPTSNR 16
 
-struct ip6t_opts
-{
+struct ip6t_opts {
 	u_int32_t hdrlen;			/* Header Length */
 	u_int8_t flags;				/*  */
 	u_int8_t invflags;			/* Inverse flags */
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h
index 52156023e8db..ab91bfd2cd00 100644
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -5,8 +5,7 @@
 
 #define IP6T_RT_HOPS 16
 
-struct ip6t_rt
-{
+struct ip6t_rt {
 	u_int32_t rt_type;			/* Routing Type */
 	u_int32_t segsleft[2];			/* Segments Left */
 	u_int32_t hdrlen;			/* Header Length */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index ab5d3126831f..fde27c017326 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -29,16 +29,14 @@
 
 struct net;
 
-struct sockaddr_nl
-{
+struct sockaddr_nl {
 	sa_family_t	nl_family;	/* AF_NETLINK	*/
 	unsigned short	nl_pad;		/* zero		*/
 	__u32		nl_pid;		/* port ID	*/
        	__u32		nl_groups;	/* multicast groups mask */
 };
 
-struct nlmsghdr
-{
+struct nlmsghdr {
 	__u32		nlmsg_len;	/* Length of message including header */
 	__u16		nlmsg_type;	/* Message content */
 	__u16		nlmsg_flags;	/* Additional flags */
@@ -94,8 +92,7 @@ struct nlmsghdr
 
 #define NLMSG_MIN_TYPE		0x10	/* < 0x10: reserved control messages */
 
-struct nlmsgerr
-{
+struct nlmsgerr {
 	int		error;
 	struct nlmsghdr msg;
 };
@@ -106,8 +103,7 @@ struct nlmsgerr
 #define NETLINK_BROADCAST_ERROR	4
 #define NETLINK_NO_ENOBUFS	5
 
-struct nl_pktinfo
-{
+struct nl_pktinfo {
 	__u32	group;
 };
 
@@ -127,8 +123,7 @@ enum {
  *  <-------------- nlattr->nla_len -------------->
  */
 
-struct nlattr
-{
+struct nlattr {
 	__u16           nla_len;
 	__u16           nla_type;
 };
@@ -161,8 +156,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 	return (struct nlmsghdr *)skb->data;
 }
 
-struct netlink_skb_parms
-{
+struct netlink_skb_parms {
 	struct ucred		creds;		/* Skb credentials	*/
 	__u32			pid;
 	__u32			dst_group;
@@ -220,8 +214,7 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
 #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
 
 
-struct netlink_callback
-{
+struct netlink_callback {
 	struct sk_buff		*skb;
 	const struct nlmsghdr	*nlh;
 	int			(*dump)(struct sk_buff * skb,
@@ -231,8 +224,7 @@ struct netlink_callback
 	long			args[6];
 };
 
-struct netlink_notify
-{
+struct netlink_notify {
 	struct net *net;
 	int pid;
 	int protocol;
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 3c842edff388..7f6ba8658abe 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -75,8 +75,7 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define SET_TC_AT(v,n)   ((V_TC_AT(n)) | (v & ~M_TC_AT))
 
 /* Action attributes */
-enum
-{
+enum {
 	TCA_ACT_UNSPEC,
 	TCA_ACT_KIND,
 	TCA_ACT_OPTIONS,
@@ -108,8 +107,7 @@ enum
 #define TC_ACT_JUMP		0x10000000
 
 /* Action type identifiers*/
-enum
-{
+enum {
 	TCA_ID_UNSPEC=0,
 	TCA_ID_POLICE=1,
 	/* other actions go here */
@@ -118,8 +116,7 @@ enum
 
 #define TCA_ID_MAX __TCA_ID_MAX
 
-struct tc_police
-{
+struct tc_police {
 	__u32			index;
 	int			action;
 #define TC_POLICE_UNSPEC	TC_ACT_UNSPEC
@@ -138,15 +135,13 @@ struct tc_police
 	__u32			capab;
 };
 
-struct tcf_t
-{
+struct tcf_t {
 	__u64   install;
 	__u64   lastuse;
 	__u64   expires;
 };
 
-struct tc_cnt
-{
+struct tc_cnt {
 	int                   refcnt; 
 	int                   bindcnt;
 };
@@ -158,8 +153,7 @@ struct tc_cnt
 	int                   refcnt; \
 	int                   bindcnt
 
-enum
-{
+enum {
 	TCA_POLICE_UNSPEC,
 	TCA_POLICE_TBF,
 	TCA_POLICE_RATE,
@@ -182,8 +176,7 @@ enum
 #define TC_U32_UNSPEC	0
 #define TC_U32_ROOT	(0xFFF00000)
 
-enum
-{
+enum {
 	TCA_U32_UNSPEC,
 	TCA_U32_CLASSID,
 	TCA_U32_HASH,
@@ -200,16 +193,14 @@ enum
 
 #define TCA_U32_MAX (__TCA_U32_MAX - 1)
 
-struct tc_u32_key
-{
+struct tc_u32_key {
 	__be32		mask;
 	__be32		val;
 	int		off;
 	int		offmask;
 };
 
-struct tc_u32_sel
-{
+struct tc_u32_sel {
 	unsigned char		flags;
 	unsigned char		offshift;
 	unsigned char		nkeys;
@@ -223,15 +214,13 @@ struct tc_u32_sel
 	struct tc_u32_key	keys[0];
 };
 
-struct tc_u32_mark
-{
+struct tc_u32_mark {
 	__u32		val;
 	__u32		mask;
 	__u32		success;
 };
 
-struct tc_u32_pcnt
-{
+struct tc_u32_pcnt {
 	__u64 rcnt;
 	__u64 rhit;
 	__u64 kcnts[0];
@@ -249,8 +238,7 @@ struct tc_u32_pcnt
 
 /* RSVP filter */
 
-enum
-{
+enum {
 	TCA_RSVP_UNSPEC,
 	TCA_RSVP_CLASSID,
 	TCA_RSVP_DST,
@@ -263,15 +251,13 @@ enum
 
 #define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
 
-struct tc_rsvp_gpi
-{
+struct tc_rsvp_gpi {
 	__u32	key;
 	__u32	mask;
 	int	offset;
 };
 
-struct tc_rsvp_pinfo
-{
+struct tc_rsvp_pinfo {
 	struct tc_rsvp_gpi dpi;
 	struct tc_rsvp_gpi spi;
 	__u8	protocol;
@@ -282,8 +268,7 @@ struct tc_rsvp_pinfo
 
 /* ROUTE filter */
 
-enum
-{
+enum {
 	TCA_ROUTE4_UNSPEC,
 	TCA_ROUTE4_CLASSID,
 	TCA_ROUTE4_TO,
@@ -299,8 +284,7 @@ enum
 
 /* FW filter */
 
-enum
-{
+enum {
 	TCA_FW_UNSPEC,
 	TCA_FW_CLASSID,
 	TCA_FW_POLICE,
@@ -314,8 +298,7 @@ enum
 
 /* TC index filter */
 
-enum
-{
+enum {
 	TCA_TCINDEX_UNSPEC,
 	TCA_TCINDEX_HASH,
 	TCA_TCINDEX_MASK,
@@ -331,8 +314,7 @@ enum
 
 /* Flow filter */
 
-enum
-{
+enum {
 	FLOW_KEY_SRC,
 	FLOW_KEY_DST,
 	FLOW_KEY_PROTO,
@@ -355,14 +337,12 @@ enum
 
 #define FLOW_KEY_MAX	(__FLOW_KEY_MAX - 1)
 
-enum
-{
+enum {
 	FLOW_MODE_MAP,
 	FLOW_MODE_HASH,
 };
 
-enum
-{
+enum {
 	TCA_FLOW_UNSPEC,
 	TCA_FLOW_KEYS,
 	TCA_FLOW_MODE,
@@ -383,8 +363,7 @@ enum
 
 /* Basic filter */
 
-enum
-{
+enum {
 	TCA_BASIC_UNSPEC,
 	TCA_BASIC_CLASSID,
 	TCA_BASIC_EMATCHES,
@@ -398,8 +377,7 @@ enum
 
 /* Cgroup classifier */
 
-enum
-{
+enum {
 	TCA_CGROUP_UNSPEC,
 	TCA_CGROUP_ACT,
 	TCA_CGROUP_POLICE,
@@ -411,14 +389,12 @@ enum
 
 /* Extended Matches */
 
-struct tcf_ematch_tree_hdr
-{
+struct tcf_ematch_tree_hdr {
 	__u16		nmatches;
 	__u16		progid;
 };
 
-enum
-{
+enum {
 	TCA_EMATCH_TREE_UNSPEC,
 	TCA_EMATCH_TREE_HDR,
 	TCA_EMATCH_TREE_LIST,
@@ -426,8 +402,7 @@ enum
 };
 #define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
 
-struct tcf_ematch_hdr
-{
+struct tcf_ematch_hdr {
 	__u16		matchid;
 	__u16		kind;
 	__u16		flags;
@@ -457,8 +432,7 @@ struct tcf_ematch_hdr
 #define TCF_EM_REL_MASK	3
 #define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
 
-enum
-{
+enum {
 	TCF_LAYER_LINK,
 	TCF_LAYER_NETWORK,
 	TCF_LAYER_TRANSPORT,
@@ -479,13 +453,11 @@ enum
 #define        TCF_EM_VLAN		6
 #define	TCF_EM_MAX		6
 
-enum
-{
+enum {
 	TCF_EM_PROG_TC
 };
 
-enum
-{
+enum {
 	TCF_EM_OPND_EQ,
 	TCF_EM_OPND_GT,
 	TCF_EM_OPND_LT
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d51a2b3e221e..2cfa4bc8dea6 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -29,8 +29,7 @@
    Particular schedulers may have also their private records.
  */
 
-struct tc_stats
-{
+struct tc_stats {
 	__u64	bytes;			/* NUmber of enqueues bytes */
 	__u32	packets;		/* Number of enqueued packets	*/
 	__u32	drops;			/* Packets dropped because of lack of resources */
@@ -42,8 +41,7 @@ struct tc_stats
 	__u32	backlog;
 };
 
-struct tc_estimator
-{
+struct tc_estimator {
 	signed char	interval;
 	unsigned char	ewma_log;
 };
@@ -75,8 +73,7 @@ struct tc_estimator
 #define TC_H_ROOT	(0xFFFFFFFFU)
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 
-struct tc_ratespec
-{
+struct tc_ratespec {
 	unsigned char	cell_log;
 	unsigned char	__reserved;
 	unsigned short	overhead;
@@ -109,8 +106,7 @@ enum {
 
 /* FIFO section */
 
-struct tc_fifo_qopt
-{
+struct tc_fifo_qopt {
 	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
 };
 
@@ -119,8 +115,7 @@ struct tc_fifo_qopt
 #define TCQ_PRIO_BANDS	16
 #define TCQ_MIN_PRIO_BANDS 2
 
-struct tc_prio_qopt
-{
+struct tc_prio_qopt {
 	int	bands;			/* Number of bands */
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
@@ -134,8 +129,7 @@ struct tc_multiq_qopt {
 
 /* TBF section */
 
-struct tc_tbf_qopt
-{
+struct tc_tbf_qopt {
 	struct tc_ratespec rate;
 	struct tc_ratespec peakrate;
 	__u32		limit;
@@ -143,8 +137,7 @@ struct tc_tbf_qopt
 	__u32		mtu;
 };
 
-enum
-{
+enum {
 	TCA_TBF_UNSPEC,
 	TCA_TBF_PARMS,
 	TCA_TBF_RTAB,
@@ -161,8 +154,7 @@ enum
 
 /* SFQ section */
 
-struct tc_sfq_qopt
-{
+struct tc_sfq_qopt {
 	unsigned	quantum;	/* Bytes per round allocated to flow */
 	int		perturb_period;	/* Period of hash perturbation */
 	__u32		limit;		/* Maximal packets in queue */
@@ -170,8 +162,7 @@ struct tc_sfq_qopt
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
-struct tc_sfq_xstats
-{
+struct tc_sfq_xstats {
 	__s32		allot;
 };
 
@@ -186,8 +177,7 @@ struct tc_sfq_xstats
 
 /* RED section */
 
-enum
-{
+enum {
 	TCA_RED_UNSPEC,
 	TCA_RED_PARMS,
 	TCA_RED_STAB,
@@ -196,8 +186,7 @@ enum
 
 #define TCA_RED_MAX (__TCA_RED_MAX - 1)
 
-struct tc_red_qopt
-{
+struct tc_red_qopt {
 	__u32		limit;		/* HARD maximal queue length (bytes)	*/
 	__u32		qth_min;	/* Min average length threshold (bytes) */
 	__u32		qth_max;	/* Max average length threshold (bytes) */
@@ -209,8 +198,7 @@ struct tc_red_qopt
 #define TC_RED_HARDDROP	2
 };
 
-struct tc_red_xstats
-{
+struct tc_red_xstats {
 	__u32           early;          /* Early drops */
 	__u32           pdrop;          /* Drops due to queue limits */
 	__u32           other;          /* Drops due to drop() calls */
@@ -221,8 +209,7 @@ struct tc_red_xstats
 
 #define MAX_DPs 16
 
-enum
-{
+enum {
        TCA_GRED_UNSPEC,
        TCA_GRED_PARMS,
        TCA_GRED_STAB,
@@ -232,8 +219,7 @@ enum
 
 #define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
 
-struct tc_gred_qopt
-{
+struct tc_gred_qopt {
 	__u32		limit;        /* HARD maximal queue length (bytes)    */
 	__u32		qth_min;      /* Min average length threshold (bytes) */
 	__u32		qth_max;      /* Max average length threshold (bytes) */
@@ -253,8 +239,7 @@ struct tc_gred_qopt
 };
 
 /* gred setup */
-struct tc_gred_sopt
-{
+struct tc_gred_sopt {
 	__u32		DPs;
 	__u32		def_DP;
 	__u8		grio;
@@ -267,8 +252,7 @@ struct tc_gred_sopt
 #define TC_HTB_MAXDEPTH		8
 #define TC_HTB_PROTOVER		3 /* the same as HTB and TC's major */
 
-struct tc_htb_opt
-{
+struct tc_htb_opt {
 	struct tc_ratespec 	rate;
 	struct tc_ratespec 	ceil;
 	__u32	buffer;
@@ -277,8 +261,7 @@ struct tc_htb_opt
 	__u32	level;		/* out only */
 	__u32	prio;
 };
-struct tc_htb_glob
-{
+struct tc_htb_glob {
 	__u32 version;		/* to match HTB/TC */
     	__u32 rate2quantum;	/* bps->quantum divisor */
     	__u32 defcls;		/* default class number */
@@ -287,8 +270,7 @@ struct tc_htb_glob
 	/* stats */
 	__u32 direct_pkts; /* count of non shapped packets */
 };
-enum
-{
+enum {
 	TCA_HTB_UNSPEC,
 	TCA_HTB_PARMS,
 	TCA_HTB_INIT,
@@ -299,8 +281,7 @@ enum
 
 #define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
 
-struct tc_htb_xstats
-{
+struct tc_htb_xstats {
 	__u32 lends;
 	__u32 borrows;
 	__u32 giants;	/* too big packets (rate will not be accurate) */
@@ -310,28 +291,24 @@ struct tc_htb_xstats
 
 /* HFSC section */
 
-struct tc_hfsc_qopt
-{
+struct tc_hfsc_qopt {
 	__u16	defcls;		/* default class */
 };
 
-struct tc_service_curve
-{
+struct tc_service_curve {
 	__u32	m1;		/* slope of the first segment in bps */
 	__u32	d;		/* x-projection of the first segment in us */
 	__u32	m2;		/* slope of the second segment in bps */
 };
 
-struct tc_hfsc_stats
-{
+struct tc_hfsc_stats {
 	__u64	work;		/* total work done */
 	__u64	rtwork;		/* work done by real-time criteria */
 	__u32	period;		/* current period */
 	__u32	level;		/* class level in hierarchy */
 };
 
-enum
-{
+enum {
 	TCA_HFSC_UNSPEC,
 	TCA_HFSC_RSC,
 	TCA_HFSC_FSC,
@@ -348,8 +325,7 @@ enum
 #define TC_CBQ_MAXLEVEL		8
 #define TC_CBQ_DEF_EWMA		5
 
-struct tc_cbq_lssopt
-{
+struct tc_cbq_lssopt {
 	unsigned char	change;
 	unsigned char	flags;
 #define TCF_CBQ_LSS_BOUNDED	1
@@ -368,8 +344,7 @@ struct tc_cbq_lssopt
 	__u32		avpkt;
 };
 
-struct tc_cbq_wrropt
-{
+struct tc_cbq_wrropt {
 	unsigned char	flags;
 	unsigned char	priority;
 	unsigned char	cpriority;
@@ -378,8 +353,7 @@ struct tc_cbq_wrropt
 	__u32		weight;
 };
 
-struct tc_cbq_ovl
-{
+struct tc_cbq_ovl {
 	unsigned char	strategy;
 #define	TC_CBQ_OVL_CLASSIC	0
 #define	TC_CBQ_OVL_DELAY	1
@@ -391,30 +365,26 @@ struct tc_cbq_ovl
 	__u32		penalty;
 };
 
-struct tc_cbq_police
-{
+struct tc_cbq_police {
 	unsigned char	police;
 	unsigned char	__res1;
 	unsigned short	__res2;
 };
 
-struct tc_cbq_fopt
-{
+struct tc_cbq_fopt {
 	__u32		split;
 	__u32		defmap;
 	__u32		defchange;
 };
 
-struct tc_cbq_xstats
-{
+struct tc_cbq_xstats {
 	__u32		borrows;
 	__u32		overactions;
 	__s32		avgidle;
 	__s32		undertime;
 };
 
-enum
-{
+enum {
 	TCA_CBQ_UNSPEC,
 	TCA_CBQ_LSSOPT,
 	TCA_CBQ_WRROPT,
@@ -459,8 +429,7 @@ enum {
 
 /* Network emulator */
 
-enum
-{
+enum {
 	TCA_NETEM_UNSPEC,
 	TCA_NETEM_CORR,
 	TCA_NETEM_DELAY_DIST,
@@ -471,8 +440,7 @@ enum
 
 #define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
 
-struct tc_netem_qopt
-{
+struct tc_netem_qopt {
 	__u32	latency;	/* added delay (us) */
 	__u32   limit;		/* fifo limit (packets) */
 	__u32	loss;		/* random packet loss (0=none ~0=100%) */
@@ -481,21 +449,18 @@ struct tc_netem_qopt
 	__u32	jitter;		/* random jitter in latency (us) */
 };
 
-struct tc_netem_corr
-{
+struct tc_netem_corr {
 	__u32	delay_corr;	/* delay correlation */
 	__u32	loss_corr;	/* packet loss correlation */
 	__u32	dup_corr;	/* duplicate correlation  */
 };
 
-struct tc_netem_reorder
-{
+struct tc_netem_reorder {
 	__u32	probability;
 	__u32	correlation;
 };
 
-struct tc_netem_corrupt
-{
+struct tc_netem_corrupt {
 	__u32	probability;
 	__u32	correlation;
 };
@@ -504,8 +469,7 @@ struct tc_netem_corrupt
 
 /* DRR */
 
-enum
-{
+enum {
 	TCA_DRR_UNSPEC,
 	TCA_DRR_QUANTUM,
 	__TCA_DRR_MAX
@@ -513,8 +477,7 @@ enum
 
 #define TCA_DRR_MAX	(__TCA_DRR_MAX - 1)
 
-struct tc_drr_stats
-{
+struct tc_drr_stats {
 	__u32	deficit;
 };
 
diff --git a/include/linux/route.h b/include/linux/route.h
index f7ed35d5e653..6600708311c8 100644
--- a/include/linux/route.h
+++ b/include/linux/route.h
@@ -27,8 +27,7 @@
 #include <linux/compiler.h>
 
 /* This structure gets passed by the SIOCADDRT and SIOCDELRT calls. */
-struct rtentry 
-{
+struct rtentry {
 	unsigned long	rt_pad1;
 	struct sockaddr	rt_dst;		/* target address		*/
 	struct sockaddr	rt_gateway;	/* gateway addr (RTF_GATEWAY)	*/
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index e78b60cd65a4..14fc906ed602 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -127,8 +127,7 @@ enum {
    with attribute type.
  */
 
-struct rtattr
-{
+struct rtattr {
 	unsigned short	rta_len;
 	unsigned short	rta_type;
 };
@@ -154,8 +153,7 @@ struct rtattr
  *		Definitions used in routing table administration.
  ****/
 
-struct rtmsg
-{
+struct rtmsg {
 	unsigned char		rtm_family;
 	unsigned char		rtm_dst_len;
 	unsigned char		rtm_src_len;
@@ -171,8 +169,7 @@ struct rtmsg
 
 /* rtm_type */
 
-enum
-{
+enum {
 	RTN_UNSPEC,
 	RTN_UNICAST,		/* Gateway or direct route	*/
 	RTN_LOCAL,		/* Accept locally		*/
@@ -230,8 +227,7 @@ enum
    could be assigned a value between UNIVERSE and LINK.
 */
 
-enum rt_scope_t
-{
+enum rt_scope_t {
 	RT_SCOPE_UNIVERSE=0,
 /* User defined values  */
 	RT_SCOPE_SITE=200,
@@ -249,8 +245,7 @@ enum rt_scope_t
 
 /* Reserved table identifiers */
 
-enum rt_class_t
-{
+enum rt_class_t {
 	RT_TABLE_UNSPEC=0,
 /* User defined values */
 	RT_TABLE_COMPAT=252,
@@ -263,8 +258,7 @@ enum rt_class_t
 
 /* Routing message attributes */
 
-enum rtattr_type_t
-{
+enum rtattr_type_t {
 	RTA_UNSPEC,
 	RTA_DST,
 	RTA_SRC,
@@ -298,8 +292,7 @@ enum rtattr_type_t
  * and rtt for different paths from multipath.
  */
 
-struct rtnexthop
-{
+struct rtnexthop {
 	unsigned short		rtnh_len;
 	unsigned char		rtnh_flags;
 	unsigned char		rtnh_hops;
@@ -325,8 +318,7 @@ struct rtnexthop
 
 /* RTM_CACHEINFO */
 
-struct rta_cacheinfo
-{
+struct rta_cacheinfo {
 	__u32	rta_clntref;
 	__u32	rta_lastuse;
 	__s32	rta_expires;
@@ -341,8 +333,7 @@ struct rta_cacheinfo
 
 /* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
 
-enum
-{
+enum {
 	RTAX_UNSPEC,
 #define RTAX_UNSPEC RTAX_UNSPEC
 	RTAX_LOCK,
@@ -383,8 +374,7 @@ enum
 #define RTAX_FEATURE_NO_WSCALE	0x00000010
 #define RTAX_FEATURE_NO_DSACK	0x00000020
 
-struct rta_session
-{
+struct rta_session {
 	__u8	proto;
 	__u8	pad1;
 	__u16	pad2;
@@ -409,8 +399,7 @@ struct rta_session
  *		General form of address family dependent message.
  ****/
 
-struct rtgenmsg
-{
+struct rtgenmsg {
 	unsigned char		rtgen_family;
 };
 
@@ -423,8 +412,7 @@ struct rtgenmsg
  * on network protocol.
  */
 
-struct ifinfomsg
-{
+struct ifinfomsg {
 	unsigned char	ifi_family;
 	unsigned char	__ifi_pad;
 	unsigned short	ifi_type;		/* ARPHRD_* */
@@ -437,8 +425,7 @@ struct ifinfomsg
  *		prefix information 
  ****/
 
-struct prefixmsg
-{
+struct prefixmsg {
 	unsigned char	prefix_family;
 	unsigned char	prefix_pad1;
 	unsigned short	prefix_pad2;
@@ -459,8 +446,7 @@ enum
 
 #define PREFIX_MAX	(__PREFIX_MAX - 1)
 
-struct prefix_cacheinfo
-{
+struct prefix_cacheinfo {
 	__u32	preferred_time;
 	__u32	valid_time;
 };
@@ -470,8 +456,7 @@ struct prefix_cacheinfo
  *		Traffic control messages.
  ****/
 
-struct tcmsg
-{
+struct tcmsg {
 	unsigned char	tcm_family;
 	unsigned char	tcm__pad1;
 	unsigned short	tcm__pad2;
@@ -481,8 +466,7 @@ struct tcmsg
 	__u32		tcm_info;
 };
 
-enum
-{
+enum {
 	TCA_UNSPEC,
 	TCA_KIND,
 	TCA_OPTIONS,
@@ -504,8 +488,7 @@ enum
  *		Neighbor Discovery userland options
  ****/
 
-struct nduseroptmsg
-{
+struct nduseroptmsg {
 	unsigned char	nduseropt_family;
 	unsigned char	nduseropt_pad1;
 	unsigned short	nduseropt_opts_len;	/* Total length of options */
@@ -517,8 +500,7 @@ struct nduseroptmsg
 	/* Followed by one or more ND options */
 };
 
-enum
-{
+enum {
 	NDUSEROPT_UNSPEC,
 	NDUSEROPT_SRCADDR,
 	__NDUSEROPT_MAX
@@ -600,8 +582,7 @@ enum rtnetlink_groups {
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
 
 /* TC action piece */
-struct tcamsg
-{
+struct tcamsg {
 	unsigned char	tca_family;
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0c68fbd6faac..d0448c5d1ffc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -491,8 +491,7 @@ extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			int len,int odd, struct sk_buff *skb),
 			void *from, int length);
 
-struct skb_seq_state
-{
+struct skb_seq_state {
 	__u32		lower_offset;
 	__u32		upper_offset;
 	__u32		frag_idx;
diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h
index 964f473af0f0..6f65d07c7ce2 100644
--- a/include/linux/tc_act/tc_defact.h
+++ b/include/linux/tc_act/tc_defact.h
@@ -3,13 +3,11 @@
 
 #include <linux/pkt_cls.h>
 
-struct tc_defact
-{
+struct tc_defact {
 	tc_gen;
 };
                                                                                 
-enum
-{
+enum {
 	TCA_DEF_UNSPEC,
 	TCA_DEF_TM,
 	TCA_DEF_PARMS,
diff --git a/include/linux/tc_act/tc_gact.h b/include/linux/tc_act/tc_gact.h
index e895c0a39629..f7bf94eed510 100644
--- a/include/linux/tc_act/tc_gact.h
+++ b/include/linux/tc_act/tc_gact.h
@@ -5,14 +5,12 @@
 #include <linux/pkt_cls.h>
 
 #define TCA_ACT_GACT 5
-struct tc_gact
-{
+struct tc_gact {
 	tc_gen;
 
 };
 
-struct tc_gact_p
-{
+struct tc_gact_p {
 #define PGACT_NONE              0
 #define PGACT_NETRAND           1
 #define PGACT_DETERM            2
@@ -22,8 +20,7 @@ struct tc_gact_p
 	int                   paction;
 };
  
-enum
-{
+enum {
 	TCA_GACT_UNSPEC,
 	TCA_GACT_TM,
 	TCA_GACT_PARMS,
diff --git a/include/linux/tc_act/tc_ipt.h b/include/linux/tc_act/tc_ipt.h
index 4b6f7b6c7a79..a2335563d21f 100644
--- a/include/linux/tc_act/tc_ipt.h
+++ b/include/linux/tc_act/tc_ipt.h
@@ -5,8 +5,7 @@
 
 #define TCA_ACT_IPT 6
 
-enum
-{
+enum {
 	TCA_IPT_UNSPEC,
 	TCA_IPT_TABLE,
 	TCA_IPT_HOOK,
diff --git a/include/linux/tc_act/tc_mirred.h b/include/linux/tc_act/tc_mirred.h
index 0a99ab60d610..7561750e8fd6 100644
--- a/include/linux/tc_act/tc_mirred.h
+++ b/include/linux/tc_act/tc_mirred.h
@@ -10,15 +10,13 @@
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
 #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
                                                                                 
-struct tc_mirred
-{
+struct tc_mirred {
 	tc_gen;
 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
 	__u32                   ifindex;  /* ifindex of egress port */
 };
                                                                                 
-enum
-{
+enum {
 	TCA_MIRRED_UNSPEC,
 	TCA_MIRRED_TM,
 	TCA_MIRRED_PARMS,
diff --git a/include/linux/tc_act/tc_nat.h b/include/linux/tc_act/tc_nat.h
index e7cf31e8ba79..6663aeba0b9a 100644
--- a/include/linux/tc_act/tc_nat.h
+++ b/include/linux/tc_act/tc_nat.h
@@ -6,8 +6,7 @@
 
 #define TCA_ACT_NAT 9
 
-enum
-{
+enum {
 	TCA_NAT_UNSPEC,
 	TCA_NAT_PARMS,
 	TCA_NAT_TM,
@@ -17,8 +16,7 @@ enum
 
 #define TCA_NAT_FLAG_EGRESS 1
 
-struct tc_nat
-{
+struct tc_nat {
 	tc_gen;
 	__be32 old_addr;
 	__be32 new_addr;
diff --git a/include/linux/tc_act/tc_pedit.h b/include/linux/tc_act/tc_pedit.h
index 54ce9064115a..716cfabcd5b2 100644
--- a/include/linux/tc_act/tc_pedit.h
+++ b/include/linux/tc_act/tc_pedit.h
@@ -6,8 +6,7 @@
 
 #define TCA_ACT_PEDIT 7
 
-enum
-{
+enum {
 	TCA_PEDIT_UNSPEC,
 	TCA_PEDIT_TM,
 	TCA_PEDIT_PARMS,
@@ -15,8 +14,7 @@ enum
 };
 #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1)
                                                                                 
-struct tc_pedit_key
-{
+struct tc_pedit_key {
 	__u32           mask;  /* AND */
 	__u32           val;   /*XOR */
 	__u32           off;  /*offset */
@@ -25,8 +23,7 @@ struct tc_pedit_key
 	__u32           shift;
 };
                                                                                 
-struct tc_pedit_sel
-{
+struct tc_pedit_sel {
 	tc_gen;
 	unsigned char           nkeys;
 	unsigned char           flags;
diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/linux/tc_ematch/tc_em_cmp.h
index 38e7f7b25ec2..f34bb1bae083 100644
--- a/include/linux/tc_ematch/tc_em_cmp.h
+++ b/include/linux/tc_ematch/tc_em_cmp.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-struct tcf_em_cmp
-{
+struct tcf_em_cmp {
 	__u32		val;
 	__u32		mask;
 	__u16		off;
@@ -15,8 +14,7 @@ struct tcf_em_cmp
 	__u8		opnd:4;
 };
 
-enum
-{
+enum {
 	TCF_EM_ALIGN_U8  = 1,
 	TCF_EM_ALIGN_U16 = 2,
 	TCF_EM_ALIGN_U32 = 4
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index dcfb733fa1f6..0864206ec1a3 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-enum
-{
+enum {
 	TCA_EM_META_UNSPEC,
 	TCA_EM_META_HDR,
 	TCA_EM_META_LVALUE,
@@ -14,8 +13,7 @@ enum
 };
 #define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1)
 
-struct tcf_meta_val
-{
+struct tcf_meta_val {
 	__u16			kind;
 	__u8			shift;
 	__u8			op;
@@ -26,16 +24,14 @@ struct tcf_meta_val
 #define TCF_META_ID_MASK	0x7ff
 #define TCF_META_ID(kind)	((kind) & TCF_META_ID_MASK)
 
-enum
-{
+enum {
 	TCF_META_TYPE_VAR,
 	TCF_META_TYPE_INT,
 	__TCF_META_TYPE_MAX
 };
 #define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1)
 
-enum
-{
+enum {
 	TCF_META_ID_VALUE,
 	TCF_META_ID_RANDOM,
 	TCF_META_ID_LOADAVG_0,
@@ -87,8 +83,7 @@ enum
 };
 #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
 
-struct tcf_meta_hdr
-{
+struct tcf_meta_hdr {
 	struct tcf_meta_val	left;
 	struct tcf_meta_val	right;
 };
diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/linux/tc_ematch/tc_em_nbyte.h
index 9ed8c2e58488..7172cfb999c1 100644
--- a/include/linux/tc_ematch/tc_em_nbyte.h
+++ b/include/linux/tc_ematch/tc_em_nbyte.h
@@ -4,8 +4,7 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-struct tcf_em_nbyte
-{
+struct tcf_em_nbyte {
 	__u16		off;
 	__u16		len:12;
 	__u8		layer:4;
diff --git a/include/linux/tc_ematch/tc_em_text.h b/include/linux/tc_ematch/tc_em_text.h
index d12a73a225fc..5aac4045ba88 100644
--- a/include/linux/tc_ematch/tc_em_text.h
+++ b/include/linux/tc_ematch/tc_em_text.h
@@ -6,8 +6,7 @@
 
 #define TC_EM_TEXT_ALGOSIZ	16
 
-struct tcf_em_text
-{
+struct tcf_em_text {
 	char		algo[TC_EM_TEXT_ALGOSIZ];
 	__u16		from_offset;
 	__u16		to_offset;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7c21fe..eeecb8547a2a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -102,8 +102,7 @@ enum {
 #define TCPI_OPT_WSCALE		4
 #define TCPI_OPT_ECN		8
 
-enum tcp_ca_state
-{
+enum tcp_ca_state {
 	TCP_CA_Open = 0,
 #define TCPF_CA_Open	(1<<TCP_CA_Open)
 	TCP_CA_Disorder = 1,
@@ -116,8 +115,7 @@ enum tcp_ca_state
 #define TCPF_CA_Loss	(1<<TCP_CA_Loss)
 };
 
-struct tcp_info
-{
+struct tcp_info {
 	__u8	tcpi_state;
 	__u8	tcpi_ca_state;
 	__u8	tcpi_retransmits;
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 2d4ec15abaca..3246f0e66bcc 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -10,8 +10,7 @@
 /* Structure to encapsulate addresses. I do not want to use
  * "standard" structure. My apologies.
  */
-typedef union
-{
+typedef union {
 	__be32		a4;
 	__be32		a6[4];
 } xfrm_address_t;
@@ -20,8 +19,7 @@ typedef union
  * the state by (spi,daddr,ah/esp) or to store information about
  * spi, protocol and tunnel address on output.
  */
-struct xfrm_id
-{
+struct xfrm_id {
 	xfrm_address_t	daddr;
 	__be32		spi;
 	__u8		proto;
@@ -45,8 +43,7 @@ struct xfrm_sec_ctx {
 
 /* Selector, used as selector both on policy rules (SPD) and SAs. */
 
-struct xfrm_selector
-{
+struct xfrm_selector {
 	xfrm_address_t	daddr;
 	xfrm_address_t	saddr;
 	__be16	dport;
@@ -63,8 +60,7 @@ struct xfrm_selector
 
 #define XFRM_INF (~(__u64)0)
 
-struct xfrm_lifetime_cfg
-{
+struct xfrm_lifetime_cfg {
 	__u64	soft_byte_limit;
 	__u64	hard_byte_limit;
 	__u64	soft_packet_limit;
@@ -75,16 +71,14 @@ struct xfrm_lifetime_cfg
 	__u64	hard_use_expires_seconds;
 };
 
-struct xfrm_lifetime_cur
-{
+struct xfrm_lifetime_cur {
 	__u64	bytes;
 	__u64	packets;
 	__u64	add_time;
 	__u64	use_time;
 };
 
-struct xfrm_replay_state
-{
+struct xfrm_replay_state {
 	__u32	oseq;
 	__u32	seq;
 	__u32	bitmap;
@@ -109,16 +103,14 @@ struct xfrm_stats {
 	__u32	integrity_failed;
 };
 
-enum
-{
+enum {
 	XFRM_POLICY_TYPE_MAIN	= 0,
 	XFRM_POLICY_TYPE_SUB	= 1,
 	XFRM_POLICY_TYPE_MAX	= 2,
 	XFRM_POLICY_TYPE_ANY	= 255
 };
 
-enum
-{
+enum {
 	XFRM_POLICY_IN	= 0,
 	XFRM_POLICY_OUT	= 1,
 	XFRM_POLICY_FWD	= 2,
@@ -126,8 +118,7 @@ enum
 	XFRM_POLICY_MAX	= 3
 };
 
-enum
-{
+enum {
 	XFRM_SHARE_ANY,		/* No limitations */
 	XFRM_SHARE_SESSION,	/* For this session only */
 	XFRM_SHARE_USER,	/* For this user only */
-- 
cgit v1.2.3


From 0584396157ad2d008e2cc76b4ed6254151183a25 Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch@dell.com>
Date: Mon, 2 Nov 2009 11:51:24 -0600
Subject: PCI: PCIe AER: honor ACPI HEST FIRMWARE FIRST mode

Feedback from Hidetoshi Seto and Kenji Kaneshige incorporated.  This
correctly handles PCI-X bridges, PCIe root ports and endpoints, and
prints debug messages when invalid/reserved types are found in the
HEST.  PCI devices not in domain/segment 0 are not represented in
HEST, thus will be ignored.

Today, the PCIe Advanced Error Reporting (AER) driver attaches itself
to every PCIe root port for which BIOS reports it should, via ACPI
_OSC.

However, _OSC alone is insufficient for newer BIOSes.  Part of ACPI
4.0 is the new APEI (ACPI Platform Error Interfaces) which is a way
for OS and BIOS to handshake over which errors for which components
each will handle.  One table in ACPI 4.0 is the Hardware Error Source
Table (HEST), where BIOS can define that errors for certain PCIe
devices (or all devices), should be handled by BIOS ("Firmware First
mode"), rather than be handled by the OS.

Dell PowerEdge 11G server BIOS defines Firmware First mode in HEST, so
that it may manage such errors, log them to the System Event Log, and
possibly take other actions.  The aer driver should honor this, and
not attach itself to devices noted as such.

Furthermore, Kenji Kaneshige reminded us to disallow changing the AER
registers when respecting Firmware First mode.  Platform firmware is
expected to manage these, and if changes to them are allowed, it could
break that firmware's behavior.

The HEST parsing code may be replaced in the future by a more
feature-rich implementation.  This patch provides the minimum needed
to prevent breakage until that implementation is available.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/Makefile              |   1 +
 drivers/acpi/hest.c                | 135 +++++++++++++++++++++++++++++++++++++
 drivers/pci/pcie/aer/aerdrv_core.c |  24 ++++++-
 drivers/pci/probe.c                |   8 +++
 include/acpi/acpi_hest.h           |  12 ++++
 include/linux/pci.h                |   1 +
 6 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 drivers/acpi/hest.c
 create mode 100644 include/acpi/acpi_hest.h

(limited to 'include/linux')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 7702118509a0..c7b10b4298e9 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -19,6 +19,7 @@ obj-y				+= acpi.o \
 
 # All the builtin files are in the "acpi." module_param namespace.
 acpi-y				+= osl.o utils.o reboot.o
+acpi-y				+= hest.o
 
 # sleep related files
 acpi-y				+= wakeup.o
diff --git a/drivers/acpi/hest.c b/drivers/acpi/hest.c
new file mode 100644
index 000000000000..4bb18c980ac6
--- /dev/null
+++ b/drivers/acpi/hest.c
@@ -0,0 +1,135 @@
+#include <linux/acpi.h>
+#include <linux/pci.h>
+
+#define PREFIX "ACPI: "
+
+static inline unsigned long parse_acpi_hest_ia_machine_check(struct acpi_hest_ia_machine_check *p)
+{
+	return sizeof(*p) +
+		(sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks);
+}
+
+static inline unsigned long parse_acpi_hest_ia_corrected(struct acpi_hest_ia_corrected *p)
+{
+	return sizeof(*p) +
+		(sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks);
+}
+
+static inline unsigned long parse_acpi_hest_ia_nmi(struct acpi_hest_ia_nmi *p)
+{
+	return sizeof(*p);
+}
+
+static inline unsigned long parse_acpi_hest_generic(struct acpi_hest_generic *p)
+{
+	return sizeof(*p);
+}
+
+static inline unsigned int hest_match_pci(struct acpi_hest_aer_common *p, struct pci_dev *pci)
+{
+	return	(0           == pci_domain_nr(pci->bus) &&
+		 p->bus      == pci->bus->number &&
+		 p->device   == PCI_SLOT(pci->devfn) &&
+		 p->function == PCI_FUNC(pci->devfn));
+}
+
+static unsigned long parse_acpi_hest_aer(void *hdr, int type, struct pci_dev *pci, int *firmware_first)
+{
+	struct acpi_hest_aer_common *p = hdr + sizeof(struct acpi_hest_header);
+	unsigned long rc=0;
+	u8 pcie_type = 0;
+	u8 bridge = 0;
+	switch (type) {
+	case ACPI_HEST_TYPE_AER_ROOT_PORT:
+		rc = sizeof(struct acpi_hest_aer_root);
+		pcie_type = PCI_EXP_TYPE_ROOT_PORT;
+		break;
+	case ACPI_HEST_TYPE_AER_ENDPOINT:
+		rc = sizeof(struct acpi_hest_aer);
+		pcie_type = PCI_EXP_TYPE_ENDPOINT;
+		break;
+	case ACPI_HEST_TYPE_AER_BRIDGE:
+		rc = sizeof(struct acpi_hest_aer_bridge);
+		if ((pci->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+			bridge = 1;
+		break;
+	}
+
+	if (p->flags & ACPI_HEST_GLOBAL) {
+		if ((pci->is_pcie && (pci->pcie_type == pcie_type)) || bridge)
+			*firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+	}
+	else
+		if (hest_match_pci(p, pci))
+			*firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+	return rc;
+}
+
+static int acpi_hest_firmware_first(struct acpi_table_header *stdheader, struct pci_dev *pci)
+{
+	struct acpi_table_hest *hest = (struct acpi_table_hest *)stdheader;
+	void *p = (void *)hest + sizeof(*hest); /* defined by the ACPI 4.0 spec */
+	struct acpi_hest_header *hdr = p;
+
+	int i;
+	int firmware_first = 0;
+	static unsigned char printed_unused = 0;
+	static unsigned char printed_reserved = 0;
+
+	for (i=0, hdr=p; p < (((void *)hest) + hest->header.length) && i < hest->error_source_count; i++) {
+		switch (hdr->type) {
+		case ACPI_HEST_TYPE_IA32_CHECK:
+			p += parse_acpi_hest_ia_machine_check(p);
+			break;
+		case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
+			p += parse_acpi_hest_ia_corrected(p);
+			break;
+		case ACPI_HEST_TYPE_IA32_NMI:
+			p += parse_acpi_hest_ia_nmi(p);
+			break;
+		/* These three should never appear */
+		case ACPI_HEST_TYPE_NOT_USED3:
+		case ACPI_HEST_TYPE_NOT_USED4:
+		case ACPI_HEST_TYPE_NOT_USED5:
+			if (!printed_unused) {
+				printk(KERN_DEBUG PREFIX
+				       "HEST Error Source list contains an obsolete type (%d).\n", hdr->type);
+				printed_unused = 1;
+			}
+			break;
+		case ACPI_HEST_TYPE_AER_ROOT_PORT:
+		case ACPI_HEST_TYPE_AER_ENDPOINT:
+		case ACPI_HEST_TYPE_AER_BRIDGE:
+			p += parse_acpi_hest_aer(p, hdr->type, pci, &firmware_first);
+			break;
+		case ACPI_HEST_TYPE_GENERIC_ERROR:
+			p += parse_acpi_hest_generic(p);
+			break;
+		/* These should never appear either */
+		case ACPI_HEST_TYPE_RESERVED:
+		default:
+			if (!printed_reserved) {
+				printk(KERN_DEBUG PREFIX
+				       "HEST Error Source list contains a reserved type (%d).\n", hdr->type);
+				printed_reserved = 1;
+			}
+			break;
+		}
+	}
+	return firmware_first;
+}
+
+int acpi_hest_firmware_first_pci(struct pci_dev *pci)
+{
+	acpi_status status = AE_NOT_FOUND;
+	struct acpi_table_header *hest = NULL;
+	status = acpi_get_table(ACPI_SIG_HEST, 1, &hest);
+
+	if (ACPI_SUCCESS(status)) {
+		if (acpi_hest_firmware_first(hest, pci)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_hest_firmware_first_pci);
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 9f5ccbeb4fa5..f4512feac12b 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -35,6 +35,9 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
+	if (dev->aer_firmware_first)
+		return -EIO;
+
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 	if (!pos)
 		return -EIO;
@@ -60,6 +63,9 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
+	if (dev->aer_firmware_first)
+		return -EIO;
+
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos)
 		return -EIO;
@@ -874,8 +880,22 @@ void aer_delete_rootport(struct aer_rpc *rpc)
  */
 int aer_init(struct pcie_device *dev)
 {
-	if (aer_osc_setup(dev) && !forceload)
-		return -ENXIO;
+	if (dev->port->aer_firmware_first) {
+		dev_printk(KERN_DEBUG, &dev->device,
+			   "PCIe errors handled by platform firmware.\n");
+		goto out;
+	}
+
+	if (aer_osc_setup(dev))
+		goto out;
 
 	return 0;
+out:
+	if (forceload) {
+		dev_printk(KERN_DEBUG, &dev->device,
+			   "aerdrv forceload requested.\n");
+		dev->port->aer_firmware_first = 0;
+		return 0;
+	}
+	return -ENXIO;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 9cefc54a0125..118463befef0 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -11,6 +11,7 @@
 #include <linux/cpumask.h>
 #include <linux/pci-aspm.h>
 #include <linux/iommu.h>
+#include <acpi/acpi_hest.h>
 #include <xen/xen.h>
 #include "pci.h"
 
@@ -706,6 +707,12 @@ static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 		pdev->is_hotplug_bridge = 1;
 }
 
+static void set_pci_aer_firmware_first(struct pci_dev *pdev)
+{
+	if (acpi_hest_firmware_first_pci(pdev))
+		pdev->aer_firmware_first = 1;
+}
+
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
 /**
@@ -734,6 +741,7 @@ int pci_setup_device(struct pci_dev *dev)
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
+	set_pci_aer_firmware_first(dev);
 
 	list_for_each_entry(slot, &dev->bus->slots, list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
diff --git a/include/acpi/acpi_hest.h b/include/acpi/acpi_hest.h
new file mode 100644
index 000000000000..63194d03cb2d
--- /dev/null
+++ b/include/acpi/acpi_hest.h
@@ -0,0 +1,12 @@
+#ifndef __ACPI_HEST_H
+#define __ACPI_HEST_H
+
+#include <linux/pci.h>
+
+#ifdef CONFIG_ACPI
+extern int acpi_hest_firmware_first_pci(struct pci_dev *pci);
+#else
+static inline int acpi_hest_firmware_first_pci(struct pci_dev *pci) { return 0; }
+#endif
+
+#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da4128f6e916..9d646e60cae0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -280,6 +280,7 @@ struct pci_dev {
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
 	unsigned int    is_hotplug_bridge:1;
+	unsigned int    aer_firmware_first:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From bc577d2bb98cc44371287fce3e892d26ad4050a8 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@ni.com>
Date: Tue, 6 Oct 2009 10:45:19 -0500
Subject: PCI: populate subsystem vendor and device IDs for PCI bridges

Change to populate the subsystem vendor and subsytem device IDs for
PCI-PCI bridges that implement the PCI Subsystem Vendor ID capability.
Previously bridges left subsystem vendor IDs unpopulated.

Signed-off-by: Gabe Black <gabe.black@ni.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c      | 6 ++++++
 include/linux/pci_regs.h | 5 +++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 118463befef0..4842b09b7f3c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -730,6 +730,7 @@ int pci_setup_device(struct pci_dev *dev)
 	u32 class;
 	u8 hdr_type;
 	struct pci_slot *slot;
+	int pos = 0;
 
 	if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
 		return -EIO;
@@ -822,6 +823,11 @@ int pci_setup_device(struct pci_dev *dev)
 		dev->transparent = ((dev->class & 0xff) == 1);
 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
 		set_pcie_hotplug_bridge(dev);
+		pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
+		if (pos) {
+			pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
+			pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
+		}
 		break;
 
 	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index d798770f08cd..9f2ad0aa3c39 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -365,6 +365,11 @@
 #define  PCI_X_STATUS_266MHZ	0x40000000	/* 266 MHz capable */
 #define  PCI_X_STATUS_533MHZ	0x80000000	/* 533 MHz capable */
 
+/* PCI Bridge Subsystem ID registers */
+
+#define PCI_SSVID_VENDOR_ID     4	/* PCI-Bridge subsystem vendor id register */
+#define PCI_SSVID_DEVICE_ID     6	/* PCI-Bridge subsystem device id register */
+
 /* PCI Express capability registers */
 
 #define PCI_EXP_FLAGS		2	/* Capabilities register */
-- 
cgit v1.2.3


From 3c299dc22635e500214707aa28be119ff2b3901c Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 12 Oct 2009 13:14:00 -0600
Subject: PCI: add pci_get_domain_bus_and_slot function

Added the pci_get_domain_and_slot_function which is analogous to
pci_get_bus_and_slot. It returns a pci_dev given a domain (segment) number,
bus number, and devnr. Like pci_get_bus_and_slot,
pci_get_domain_bus_and_slot holds a reference to the returned pci_dev.

Converted pci_get_bus_and_slot to a wrapper that calls
pci_get_domain_bus_and_slot with the domain hard-coded to 0.

This routine was patterned off code suggested by Bjorn Helgaas.

Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/search.c | 34 +++++++++++++++++-----------------
 include/linux/pci.h  |  8 +++++++-
 2 files changed, 24 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index ec415352d9ba..75826482c71a 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -149,32 +149,33 @@ struct pci_dev * pci_get_slot(struct pci_bus *bus, unsigned int devfn)
 }
 
 /**
- * pci_get_bus_and_slot - locate PCI device from a given PCI bus & slot
- * @bus: number of PCI bus on which desired PCI device resides
- * @devfn: encodes number of PCI slot in which the desired PCI
- * device resides and the logical device number within that slot
- * in case of multi-function devices.
- *
- * Note: the bus/slot search is limited to PCI domain (segment) 0.
+ * pci_get_domain_bus_and_slot - locate PCI device for a given PCI domain (segment), bus, and slot
+ * @domain: PCI domain/segment on which the PCI device resides.
+ * @bus: PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI device
+ * resides and the logical device number within that slot in case of
+ * multi-function devices.
  *
- * Given a PCI bus and slot/function number, the desired PCI device
- * is located in system global list of PCI devices.  If the device
- * is found, a pointer to its data structure is returned.  If no
- * device is found, %NULL is returned. The returned device has its
- * reference count bumped by one.
+ * Given a PCI domain, bus, and slot/function number, the desired PCI
+ * device is located in the list of PCI devices. If the device is
+ * found, its reference count is increased and this function returns a
+ * pointer to its data structure.  The caller must decrement the
+ * reference count by calling pci_dev_put().  If no device is found,
+ * %NULL is returned.
  */
-
-struct pci_dev * pci_get_bus_and_slot(unsigned int bus, unsigned int devfn)
+struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
+					    unsigned int devfn)
 {
 	struct pci_dev *dev = NULL;
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		if (pci_domain_nr(dev->bus) == 0 &&
-		   (dev->bus->number == bus && dev->devfn == devfn))
+		if (pci_domain_nr(dev->bus) == domain &&
+		    (dev->bus->number == bus && dev->devfn == devfn))
 			return dev;
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(pci_get_domain_bus_and_slot);
 
 static int match_pci_dev_by_id(struct device *dev, void *data)
 {
@@ -354,5 +355,4 @@ EXPORT_SYMBOL(pci_find_next_bus);
 EXPORT_SYMBOL(pci_get_device);
 EXPORT_SYMBOL(pci_get_subsys);
 EXPORT_SYMBOL(pci_get_slot);
-EXPORT_SYMBOL(pci_get_bus_and_slot);
 EXPORT_SYMBOL(pci_get_class);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9d646e60cae0..86c31ac454d1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -636,7 +636,13 @@ struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device,
 				unsigned int ss_vendor, unsigned int ss_device,
 				struct pci_dev *from);
 struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn);
-struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn);
+struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
+					    unsigned int devfn);
+static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
+						   unsigned int devfn)
+{
+	return pci_get_domain_bus_and_slot(0, bus, devfn);
+}
 struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from);
 int pci_dev_present(const struct pci_device_id *ids);
 
-- 
cgit v1.2.3


From 42bbb70980f3720b0ae6da6af862af0e95a04351 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 4 Nov 2009 15:34:18 -0700
Subject: powerpc/5200: Add mpc5200-spi (non-PSC) device driver

Adds support for the dedicated SPI device on the Freescale MPC5200(b)
SoC.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig             |   8 +
 drivers/spi/Makefile            |   1 +
 drivers/spi/mpc52xx_spi.c       | 520 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/mpc52xx_spi.h |  10 +
 4 files changed, 539 insertions(+)
 create mode 100644 drivers/spi/mpc52xx_spi.c
 create mode 100644 include/linux/spi/mpc52xx_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 4b6f7cba3b3d..2a4ba1993083 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -133,6 +133,14 @@ config SPI_LM70_LLP
 	  which interfaces to an LM70 temperature sensor using
 	  a parallel port.
 
+config SPI_MPC52xx
+	tristate "Freescale MPC52xx SPI (non-PSC) controller support"
+	depends on PPC_MPC52xx && SPI
+	select SPI_MASTER_OF
+	help
+	  This drivers supports the MPC52xx SPI controller in master SPI
+	  mode.
+
 config SPI_MPC52xx_PSC
 	tristate "Freescale MPC52xx PSC SPI controller"
 	depends on PPC_MPC52xx && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 21a118269cac..e3f092a9afa5 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
 obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
+obj-$(CONFIG_SPI_MPC52xx)		+= mpc52xx_spi.o
 obj-$(CONFIG_SPI_MPC8xxx)		+= spi_mpc8xxx.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi_ppc4xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/mpc52xx_spi.c b/drivers/spi/mpc52xx_spi.c
new file mode 100644
index 000000000000..ef8379b2c172
--- /dev/null
+++ b/drivers/spi/mpc52xx_spi.c
@@ -0,0 +1,520 @@
+/*
+ * MPC52xx SPI bus driver.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * This file is released under the GPLv2
+ *
+ * This is the driver for the MPC5200's dedicated SPI controller.
+ *
+ * Note: this driver does not support the MPC5200 PSC in SPI mode.  For
+ * that driver see drivers/spi/mpc52xx_psc_spi.c
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mpc52xx_spi.h>
+#include <linux/of_spi.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/mpc52xx.h>
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("MPC52xx SPI (non-PSC) Driver");
+MODULE_LICENSE("GPL");
+
+/* Register offsets */
+#define SPI_CTRL1	0x00
+#define SPI_CTRL1_SPIE		(1 << 7)
+#define SPI_CTRL1_SPE		(1 << 6)
+#define SPI_CTRL1_MSTR		(1 << 4)
+#define SPI_CTRL1_CPOL		(1 << 3)
+#define SPI_CTRL1_CPHA		(1 << 2)
+#define SPI_CTRL1_SSOE		(1 << 1)
+#define SPI_CTRL1_LSBFE		(1 << 0)
+
+#define SPI_CTRL2	0x01
+#define SPI_BRR		0x04
+
+#define SPI_STATUS	0x05
+#define SPI_STATUS_SPIF		(1 << 7)
+#define SPI_STATUS_WCOL		(1 << 6)
+#define SPI_STATUS_MODF		(1 << 4)
+
+#define SPI_DATA	0x09
+#define SPI_PORTDATA	0x0d
+#define SPI_DATADIR	0x10
+
+/* FSM state return values */
+#define FSM_STOP	0	/* Nothing more for the state machine to */
+				/* do.  If something interesting happens */
+				/* then and IRQ will be received */
+#define FSM_POLL	1	/* need to poll for completion, an IRQ is */
+				/* not expected */
+#define FSM_CONTINUE	2	/* Keep iterating the state machine */
+
+/* Driver internal data */
+struct mpc52xx_spi {
+	struct spi_master *master;
+	u32 sysclk;
+	void __iomem *regs;
+	int irq0;	/* MODF irq */
+	int irq1;	/* SPIF irq */
+	int ipb_freq;
+
+	/* Statistics */
+	int msg_count;
+	int wcol_count;
+	int wcol_ticks;
+	u32 wcol_tx_timestamp;
+	int modf_count;
+	int byte_count;
+
+	struct list_head queue;		/* queue of pending messages */
+	spinlock_t lock;
+	struct work_struct work;
+
+
+	/* Details of current transfer (length, and buffer pointers) */
+	struct spi_message *message;	/* current message */
+	struct spi_transfer *transfer;	/* current transfer */
+	int (*state)(int irq, struct mpc52xx_spi *ms, u8 status, u8 data);
+	int len;
+	int timestamp;
+	u8 *rx_buf;
+	const u8 *tx_buf;
+	int cs_change;
+};
+
+/*
+ * CS control function
+ */
+static void mpc52xx_spi_chipsel(struct mpc52xx_spi *ms, int value)
+{
+	out_8(ms->regs + SPI_PORTDATA, value ? 0 : 0x08);
+}
+
+/*
+ * Start a new transfer.  This is called both by the idle state
+ * for the first transfer in a message, and by the wait state when the
+ * previous transfer in a message is complete.
+ */
+static void mpc52xx_spi_start_transfer(struct mpc52xx_spi *ms)
+{
+	ms->rx_buf = ms->transfer->rx_buf;
+	ms->tx_buf = ms->transfer->tx_buf;
+	ms->len = ms->transfer->len;
+
+	/* Activate the chip select */
+	if (ms->cs_change)
+		mpc52xx_spi_chipsel(ms, 1);
+	ms->cs_change = ms->transfer->cs_change;
+
+	/* Write out the first byte */
+	ms->wcol_tx_timestamp = get_tbl();
+	if (ms->tx_buf)
+		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
+	else
+		out_8(ms->regs + SPI_DATA, 0);
+}
+
+/* Forward declaration of state handlers */
+static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
+					 u8 status, u8 data);
+static int mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms,
+				     u8 status, u8 data);
+
+/*
+ * IDLE state
+ *
+ * No transfers are in progress; if another transfer is pending then retrieve
+ * it and kick it off.  Otherwise, stop processing the state machine
+ */
+static int
+mpc52xx_spi_fsmstate_idle(int irq, struct mpc52xx_spi *ms, u8 status, u8 data)
+{
+	struct spi_device *spi;
+	int spr, sppr;
+	u8 ctrl1;
+
+	if (status && (irq != NO_IRQ))
+		dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
+			status);
+
+	/* Check if there is another transfer waiting. */
+	if (list_empty(&ms->queue))
+		return FSM_STOP;
+
+	/* get the head of the queue */
+	ms->message = list_first_entry(&ms->queue, struct spi_message, queue);
+	list_del_init(&ms->message->queue);
+
+	/* Setup the controller parameters */
+	ctrl1 = SPI_CTRL1_SPIE | SPI_CTRL1_SPE | SPI_CTRL1_MSTR;
+	spi = ms->message->spi;
+	if (spi->mode & SPI_CPHA)
+		ctrl1 |= SPI_CTRL1_CPHA;
+	if (spi->mode & SPI_CPOL)
+		ctrl1 |= SPI_CTRL1_CPOL;
+	if (spi->mode & SPI_LSB_FIRST)
+		ctrl1 |= SPI_CTRL1_LSBFE;
+	out_8(ms->regs + SPI_CTRL1, ctrl1);
+
+	/* Setup the controller speed */
+	/* minimum divider is '2'.  Also, add '1' to force rounding the
+	 * divider up. */
+	sppr = ((ms->ipb_freq / ms->message->spi->max_speed_hz) + 1) >> 1;
+	spr = 0;
+	if (sppr < 1)
+		sppr = 1;
+	while (((sppr - 1) & ~0x7) != 0) {
+		sppr = (sppr + 1) >> 1; /* add '1' to force rounding up */
+		spr++;
+	}
+	sppr--;		/* sppr quantity in register is offset by 1 */
+	if (spr > 7) {
+		/* Don't overrun limits of SPI baudrate register */
+		spr = 7;
+		sppr = 7;
+	}
+	out_8(ms->regs + SPI_BRR, sppr << 4 | spr); /* Set speed */
+
+	ms->cs_change = 1;
+	ms->transfer = container_of(ms->message->transfers.next,
+				    struct spi_transfer, transfer_list);
+
+	mpc52xx_spi_start_transfer(ms);
+	ms->state = mpc52xx_spi_fsmstate_transfer;
+
+	return FSM_CONTINUE;
+}
+
+/*
+ * TRANSFER state
+ *
+ * In the middle of a transfer.  If the SPI core has completed processing
+ * a byte, then read out the received data and write out the next byte
+ * (unless this transfer is finished; in which case go on to the wait
+ * state)
+ */
+static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
+					 u8 status, u8 data)
+{
+	if (!status)
+		return ms->irq0 ? FSM_STOP : FSM_POLL;
+
+	if (status & SPI_STATUS_WCOL) {
+		/* The SPI controller is stoopid.  At slower speeds, it may
+		 * raise the SPIF flag before the state machine is actually
+		 * finished, which causes a collision (internal to the state
+		 * machine only).  The manual recommends inserting a delay
+		 * between receiving the interrupt and sending the next byte,
+		 * but it can also be worked around simply by retrying the
+		 * transfer which is what we do here. */
+		ms->wcol_count++;
+		ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp;
+		ms->wcol_tx_timestamp = get_tbl();
+		data = 0;
+		if (ms->tx_buf)
+			data = *(ms->tx_buf-1);
+		out_8(ms->regs + SPI_DATA, data); /* try again */
+		return FSM_CONTINUE;
+	} else if (status & SPI_STATUS_MODF) {
+		ms->modf_count++;
+		dev_err(&ms->master->dev, "mode fault\n");
+		mpc52xx_spi_chipsel(ms, 0);
+		ms->message->status = -EIO;
+		ms->message->complete(ms->message->context);
+		ms->state = mpc52xx_spi_fsmstate_idle;
+		return FSM_CONTINUE;
+	}
+
+	/* Read data out of the spi device */
+	ms->byte_count++;
+	if (ms->rx_buf)
+		*ms->rx_buf++ = data;
+
+	/* Is the transfer complete? */
+	ms->len--;
+	if (ms->len == 0) {
+		ms->timestamp = get_tbl();
+		ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec;
+		ms->state = mpc52xx_spi_fsmstate_wait;
+		return FSM_CONTINUE;
+	}
+
+	/* Write out the next byte */
+	ms->wcol_tx_timestamp = get_tbl();
+	if (ms->tx_buf)
+		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
+	else
+		out_8(ms->regs + SPI_DATA, 0);
+
+	return FSM_CONTINUE;
+}
+
+/*
+ * WAIT state
+ *
+ * A transfer has completed; need to wait for the delay period to complete
+ * before starting the next transfer
+ */
+static int
+mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms, u8 status, u8 data)
+{
+	if (status && irq)
+		dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
+			status);
+
+	if (((int)get_tbl()) - ms->timestamp < 0)
+		return FSM_POLL;
+
+	ms->message->actual_length += ms->transfer->len;
+
+	/* Check if there is another transfer in this message.  If there
+	 * aren't then deactivate CS, notify sender, and drop back to idle
+	 * to start the next message. */
+	if (ms->transfer->transfer_list.next == &ms->message->transfers) {
+		ms->msg_count++;
+		mpc52xx_spi_chipsel(ms, 0);
+		ms->message->status = 0;
+		ms->message->complete(ms->message->context);
+		ms->state = mpc52xx_spi_fsmstate_idle;
+		return FSM_CONTINUE;
+	}
+
+	/* There is another transfer; kick it off */
+
+	if (ms->cs_change)
+		mpc52xx_spi_chipsel(ms, 0);
+
+	ms->transfer = container_of(ms->transfer->transfer_list.next,
+				    struct spi_transfer, transfer_list);
+	mpc52xx_spi_start_transfer(ms);
+	ms->state = mpc52xx_spi_fsmstate_transfer;
+	return FSM_CONTINUE;
+}
+
+/**
+ * mpc52xx_spi_fsm_process - Finite State Machine iteration function
+ * @irq: irq number that triggered the FSM or 0 for polling
+ * @ms: pointer to mpc52xx_spi driver data
+ */
+static void mpc52xx_spi_fsm_process(int irq, struct mpc52xx_spi *ms)
+{
+	int rc = FSM_CONTINUE;
+	u8 status, data;
+
+	while (rc == FSM_CONTINUE) {
+		/* Interrupt cleared by read of STATUS followed by
+		 * read of DATA registers */
+		status = in_8(ms->regs + SPI_STATUS);
+		data = in_8(ms->regs + SPI_DATA);
+		rc = ms->state(irq, ms, status, data);
+	}
+
+	if (rc == FSM_POLL)
+		schedule_work(&ms->work);
+}
+
+/**
+ * mpc52xx_spi_irq - IRQ handler
+ */
+static irqreturn_t mpc52xx_spi_irq(int irq, void *_ms)
+{
+	struct mpc52xx_spi *ms = _ms;
+	spin_lock(&ms->lock);
+	mpc52xx_spi_fsm_process(irq, ms);
+	spin_unlock(&ms->lock);
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_spi_wq - Workqueue function for polling the state machine
+ */
+static void mpc52xx_spi_wq(struct work_struct *work)
+{
+	struct mpc52xx_spi *ms = container_of(work, struct mpc52xx_spi, work);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ms->lock, flags);
+	mpc52xx_spi_fsm_process(0, ms);
+	spin_unlock_irqrestore(&ms->lock, flags);
+}
+
+/*
+ * spi_master ops
+ */
+
+static int mpc52xx_spi_setup(struct spi_device *spi)
+{
+	if (spi->bits_per_word % 8)
+		return -EINVAL;
+
+	if (spi->mode & ~(SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST))
+		return -EINVAL;
+
+	if (spi->chip_select >= spi->master->num_chipselect)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int mpc52xx_spi_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct mpc52xx_spi *ms = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	m->actual_length = 0;
+	m->status = -EINPROGRESS;
+
+	spin_lock_irqsave(&ms->lock, flags);
+	list_add_tail(&m->queue, &ms->queue);
+	spin_unlock_irqrestore(&ms->lock, flags);
+	schedule_work(&ms->work);
+
+	return 0;
+}
+
+/*
+ * OF Platform Bus Binding
+ */
+static int __devinit mpc52xx_spi_probe(struct of_device *op,
+				       const struct of_device_id *match)
+{
+	struct spi_master *master;
+	struct mpc52xx_spi *ms;
+	void __iomem *regs;
+	int rc;
+
+	/* MMIO registers */
+	dev_dbg(&op->dev, "probing mpc5200 SPI device\n");
+	regs = of_iomap(op->node, 0);
+	if (!regs)
+		return -ENODEV;
+
+	/* initialize the device */
+	out_8(regs+SPI_CTRL1, SPI_CTRL1_SPIE | SPI_CTRL1_SPE | SPI_CTRL1_MSTR);
+	out_8(regs + SPI_CTRL2, 0x0);
+	out_8(regs + SPI_DATADIR, 0xe);	/* Set output pins */
+	out_8(regs + SPI_PORTDATA, 0x8);	/* Deassert /SS signal */
+
+	/* Clear the status register and re-read it to check for a MODF
+	 * failure.  This driver cannot currently handle multiple masters
+	 * on the SPI bus.  This fault will also occur if the SPI signals
+	 * are not connected to any pins (port_config setting) */
+	in_8(regs + SPI_STATUS);
+	in_8(regs + SPI_DATA);
+	if (in_8(regs + SPI_STATUS) & SPI_STATUS_MODF) {
+		dev_err(&op->dev, "mode fault; is port_config correct?\n");
+		rc = -EIO;
+		goto err_init;
+	}
+
+	dev_dbg(&op->dev, "allocating spi_master struct\n");
+	master = spi_alloc_master(&op->dev, sizeof *ms);
+	if (!master) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+	master->bus_num = -1;
+	master->num_chipselect = 1;
+	master->setup = mpc52xx_spi_setup;
+	master->transfer = mpc52xx_spi_transfer;
+	dev_set_drvdata(&op->dev, master);
+
+	ms = spi_master_get_devdata(master);
+	ms->master = master;
+	ms->regs = regs;
+	ms->irq0 = irq_of_parse_and_map(op->node, 0);
+	ms->irq1 = irq_of_parse_and_map(op->node, 1);
+	ms->state = mpc52xx_spi_fsmstate_idle;
+	ms->ipb_freq = mpc5xxx_get_bus_frequency(op->node);
+	spin_lock_init(&ms->lock);
+	INIT_LIST_HEAD(&ms->queue);
+	INIT_WORK(&ms->work, mpc52xx_spi_wq);
+
+	/* Decide if interrupts can be used */
+	if (ms->irq0 && ms->irq1) {
+		rc = request_irq(ms->irq0, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
+				  "mpc5200-spi-modf", ms);
+		rc |= request_irq(ms->irq1, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
+				  "mpc5200-spi-spiF", ms);
+		if (rc) {
+			free_irq(ms->irq0, ms);
+			free_irq(ms->irq1, ms);
+			ms->irq0 = ms->irq1 = 0;
+		}
+	} else {
+		/* operate in polled mode */
+		ms->irq0 = ms->irq1 = 0;
+	}
+
+	if (!ms->irq0)
+		dev_info(&op->dev, "using polled mode\n");
+
+	dev_dbg(&op->dev, "registering spi_master struct\n");
+	rc = spi_register_master(master);
+	if (rc)
+		goto err_register;
+
+	of_register_spi_devices(master, op->node);
+	dev_info(&ms->master->dev, "registered MPC5200 SPI bus\n");
+
+	return rc;
+
+ err_register:
+	dev_err(&ms->master->dev, "initialization failed\n");
+	spi_master_put(master);
+ err_alloc:
+ err_init:
+	iounmap(regs);
+	return rc;
+}
+
+static int __devexit mpc52xx_spi_remove(struct of_device *op)
+{
+	struct spi_master *master = dev_get_drvdata(&op->dev);
+	struct mpc52xx_spi *ms = spi_master_get_devdata(master);
+
+	free_irq(ms->irq0, ms);
+	free_irq(ms->irq1, ms);
+
+	spi_unregister_master(master);
+	spi_master_put(master);
+	iounmap(ms->regs);
+
+	return 0;
+}
+
+static struct of_device_id mpc52xx_spi_match[] __devinitdata = {
+	{ .compatible = "fsl,mpc5200-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mpc52xx_spi_match);
+
+static struct of_platform_driver mpc52xx_spi_of_driver = {
+	.owner = THIS_MODULE,
+	.name = "mpc52xx-spi",
+	.match_table = mpc52xx_spi_match,
+	.probe = mpc52xx_spi_probe,
+	.remove = __exit_p(mpc52xx_spi_remove),
+};
+
+static int __init mpc52xx_spi_init(void)
+{
+	return of_register_platform_driver(&mpc52xx_spi_of_driver);
+}
+module_init(mpc52xx_spi_init);
+
+static void __exit mpc52xx_spi_exit(void)
+{
+	of_unregister_platform_driver(&mpc52xx_spi_of_driver);
+}
+module_exit(mpc52xx_spi_exit);
+
diff --git a/include/linux/spi/mpc52xx_spi.h b/include/linux/spi/mpc52xx_spi.h
new file mode 100644
index 000000000000..d1004cf09241
--- /dev/null
+++ b/include/linux/spi/mpc52xx_spi.h
@@ -0,0 +1,10 @@
+
+#ifndef INCLUDE_MPC5200_SPI_H
+#define INCLUDE_MPC5200_SPI_H
+
+extern void mpc52xx_spi_set_premessage_hook(struct spi_master *master,
+					    void (*hook)(struct spi_message *m,
+							 void *context),
+					    void *hook_context);
+
+#endif
-- 
cgit v1.2.3


From 3c5d92a0cfb5103c0d5ab74d4ae6373d3af38148 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 29 Sep 2009 14:25:16 +0200
Subject: nohz: Introduce arch_needs_cpu

Allow the architecture to request a normal jiffy tick when the system
goes idle and tick_nohz_stop_sched_tick is called . On s390 the hook is
used to prevent the system going fully idle if there has been an
interrupt other than a clock comparator interrupt since the last wakeup.

On s390 the HiperSockets response time for 1 connection ping-pong goes
down from 42 to 34 microseconds. The CPU cost decreases by 27%.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
LKML-Reference: <20090929122533.402715150@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/include/asm/cputime.h |  8 ++++++++
 arch/s390/kernel/s390_ext.c     |  2 ++
 arch/s390/kernel/vtime.c        |  2 ++
 drivers/s390/cio/cio.c          |  1 +
 include/linux/tick.h            |  3 +++
 kernel/time/tick-sched.c        | 13 ++++++++-----
 6 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 24b1244aadb9..95f3561517c7 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -183,6 +183,7 @@ struct s390_idle_data {
 	unsigned long long idle_count;
 	unsigned long long idle_enter;
 	unsigned long long idle_time;
+	int nohz_delay;
 };
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
@@ -198,4 +199,11 @@ static inline void s390_idle_check(void)
 		vtime_start_cpu();
 }
 
+static inline int s390_nohz_delay(int cpu)
+{
+	return per_cpu(s390_idle, cpu).nohz_delay != 0;
+}
+
+#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
+
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 0de305b598ce..59618bcd99b7 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -126,6 +126,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code)
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
 	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
+	if (code != 0x1004)
+		__get_cpu_var(s390_idle).nohz_delay = 1;
         index = ext_hash(code);
 	for (p = ext_int_hash[index]; p; p = p->next) {
 		if (likely(p->code == code))
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c41bb0d416e1..b59a812a010e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -167,6 +167,8 @@ void vtime_stop_cpu(void)
 	/* Wait for external, I/O or machine check interrupt. */
 	psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
 
+	idle->nohz_delay = 0;
+
 	/* Check if the CPU timer needs to be reprogrammed. */
 	if (vq->do_spt) {
 		__u64 vmax = VTIMER_MAX_SLICE;
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 138124fcfcad..126f240715a4 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -618,6 +618,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
 	old_regs = set_irq_regs(regs);
 	s390_idle_check();
 	irq_enter();
+	__get_cpu_var(s390_idle).nohz_delay = 1;
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 0482229c07db..8dc082194b22 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -98,6 +98,9 @@ extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
 extern void tick_check_idle(int cpu);
 extern int tick_oneshot_mode_active(void);
+#  ifndef arch_needs_cpu
+#   define arch_needs_cpu(cpu) (0)
+#  endif
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7378e2c71ca6..3840f6dff7eb 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -264,12 +264,15 @@ void tick_nohz_stop_sched_tick(int inidle)
 		last_jiffies = jiffies;
 	} while (read_seqretry(&xtime_lock, seq));
 
-	/* Get the next timer wheel timer */
-	next_jiffies = get_next_timer_interrupt(last_jiffies);
-	delta_jiffies = next_jiffies - last_jiffies;
-
-	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
+	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+	    arch_needs_cpu(cpu)) {
+		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
+	} else {
+		/* Get the next timer wheel timer */
+		next_jiffies = get_next_timer_interrupt(last_jiffies);
+		delta_jiffies = next_jiffies - last_jiffies;
+	}
 	/*
 	 * Do not stop the tick, if we are only one off
 	 * or if the cpu is required for rcu
-- 
cgit v1.2.3


From 5b915d9e6dc3d22fedde91dfef1cb1a8fa9a1870 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 5 Nov 2009 14:08:03 +0100
Subject: HID: fixup quirk for NCR devices

NCR devices are terminally broken by design -- they claim themselves to contain
proper input applications in their HID report descriptor, but behave very badly
if treated in standard way.

According to NCR developers, the devices get confused when queried for reports
in a standard way, rendering them unusable.

NCR is shipping application called "RPSL" that can be used to drive these
devices through hiddev, under the assumption that in-kernel driver doesn't
perform initial report query.
If it does, neither in-kernel nor hiddev-based driver can operate with these
devices any more.

Introduce a quirk that skips the report query for all NCR devices. The previous
NOGET quirk was wrong and had been introduced because I misunderstood the nature
of brokenness of these devices.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c   | 3 ++-
 drivers/hid/usbhid/hid-quirks.c | 2 +-
 include/linux/hid.h             | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 3f56e9c02e65..0258289f3b3e 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -996,7 +996,8 @@ static int usbhid_start(struct hid_device *hid)
 	usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma;
 	usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP);
 
-	usbhid_init_reports(hid);
+	if (!(hid->quirks & HID_QUIRK_NO_INIT_REPORTS))
+		usbhid_init_reports(hid);
 
 	set_bit(HID_STARTED, &usbhid->iofl);
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 2d445b270215..c3b02f59792b 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -281,7 +281,7 @@ u32 usbhid_lookup_quirk(const u16 idVendor, const u16 idProduct)
 	if (idVendor == USB_VENDOR_ID_NCR &&
 			idProduct >= USB_DEVICE_ID_NCR_FIRST &&
 			idProduct <= USB_DEVICE_ID_NCR_LAST)
-			return HID_QUIRK_NOGET;
+			return HID_QUIRK_NO_INIT_REPORTS;
 
 	down_read(&dquirks_rwsem);
 	bl_entry = usbhid_exists_dquirk(idVendor, idProduct);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 10f628416740..87093652dda8 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -312,6 +312,7 @@ struct hid_item {
 #define HID_QUIRK_MULTI_INPUT			0x00000040
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
+#define HID_QUIRK_NO_INIT_REPORTS		0x20000000
 
 /*
  * This is the global environment of the parser. This information is
-- 
cgit v1.2.3


From 8c10cbdb4af642d9a2efb45ea89251aaab905360 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 19 Oct 2009 12:04:53 +0200
Subject: nfsd: use STATEID_FMT and STATEID_VAL for printing stateids

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 46 ++++++++++++++++------------------------------
 include/linux/nfsd/state.h |  7 +++++++
 2 files changed, 23 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 42dab9587afe..c8b621a120cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2416,11 +2416,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 
 	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
 
-	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
-	             dp->dl_stateid.si_boot,
-	             dp->dl_stateid.si_stateownerid,
-	             dp->dl_stateid.si_fileid,
-	             dp->dl_stateid.si_generation);
+	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
+		STATEID_VAL(&dp->dl_stateid));
 out:
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
 			&& flag == NFS4_OPEN_DELEGATE_NONE
@@ -2510,9 +2507,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	status = nfs_ok;
 
-	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
-	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
-	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+	dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(&stp->st_stateid));
 out:
 	if (fp)
 		put_nfs4_file(fp);
@@ -2678,9 +2674,8 @@ STALE_STATEID(stateid_t *stateid)
 {
 	if (time_after((unsigned long)boot_time,
 			(unsigned long)stateid->si_boot)) {
-		dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2692,9 +2687,8 @@ EXPIRED_STATEID(stateid_t *stateid)
 	if (time_before((unsigned long)boot_time,
 			((unsigned long)stateid->si_boot)) &&
 	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
-		dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2708,9 +2702,8 @@ stateid_error_map(stateid_t *stateid)
 	if (EXPIRED_STATEID(stateid))
 		return nfserr_expired;
 
-	dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
-		stateid->si_boot, stateid->si_stateownerid,
-		stateid->si_fileid, stateid->si_generation);
+	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+		STATEID_VAL(stateid));
 	return nfserr_bad_stateid;
 }
 
@@ -2896,10 +2889,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 status;
 
-	dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
-			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
-		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
-		stateid->si_generation);
+	dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
+		seqid, STATEID_VAL(stateid));
 
 	*stpp = NULL;
 	*sopp = NULL;
@@ -3031,12 +3022,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	sop->so_confirmed = 1;
 	update_stateid(&stp->st_stateid);
 	memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
-	dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 
-		"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
-		         stp->st_stateid.si_boot,
-		         stp->st_stateid.si_stateownerid,
-		         stp->st_stateid.si_fileid,
-		         stp->st_stateid.si_generation);
+	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
 
 	nfsd4_create_clid_dir(sop->so_client);
 out:
@@ -3295,9 +3282,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
 	struct nfs4_file *fp;
 	struct nfs4_delegation *dl;
 
-	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
-                    stid->si_boot, stid->si_stateownerid,
-                    stid->si_fileid, stid->si_generation);
+	dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(stid));
 
 	fp = find_file(ino);
 	if (!fp)
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index b38d11324189..5aadf8aa3a97 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,13 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
 struct nfsd4_cb_sequence {
 	/* args/res */
 	u32			cbs_minorversion;
-- 
cgit v1.2.3


From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Nov 2009 23:06:50 +0100
Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to
 linux/hw_breakpoint.h

We plan to make the breakpoints parameters generic among architectures.
For that it's better to move the asm-generic header to a generic linux
header.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/hw_breakpoint.h |   2 +-
 include/asm-generic/hw_breakpoint.h  | 139 -----------------------------------
 include/linux/hw_breakpoint.h        | 136 ++++++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+), 140 deletions(-)
 delete mode 100644 include/asm-generic/hw_breakpoint.h
 create mode 100644 include/linux/hw_breakpoint.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 1acb4d45de70..3cfca8e2b5f6 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -12,7 +12,7 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <asm-generic/hw_breakpoint.h>
+#include <linux/hw_breakpoint.h>
 
 /* Available HW breakpoint length encodings */
 #define HW_BREAKPOINT_LEN_1		0x40
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
deleted file mode 100644
index 9bf2d12eb74a..000000000000
--- a/include/asm-generic/hw_breakpoint.h
+++ /dev/null
@@ -1,139 +0,0 @@
-#ifndef	_ASM_GENERIC_HW_BREAKPOINT_H
-#define	_ASM_GENERIC_HW_BREAKPOINT_H
-
-#ifndef __ARCH_HW_BREAKPOINT_H
-#error "Please don't include this file directly"
-#endif
-
-#ifdef	__KERNEL__
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/kallsyms.h>
-
-/**
- * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
- * @triggered: callback invoked after target address access
- * @info: arch-specific breakpoint info (address, length, and type)
- *
- * %hw_breakpoint structures are the kernel's way of representing
- * hardware breakpoints.  These are data breakpoints
- * (also known as "watchpoints", triggered on data access), and the breakpoint's
- * target address can be located in either kernel space or user space.
- *
- * The breakpoint's address, length, and type are highly
- * architecture-specific.  The values are encoded in the @info field; you
- * specify them when registering the breakpoint.  To examine the encoded
- * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
- * below.
- *
- * The address is specified as a regular kernel pointer (for kernel-space
- * breakponts) or as an %__user pointer (for user-space breakpoints).
- * With register_user_hw_breakpoint(), the address must refer to a
- * location in user space.  The breakpoint will be active only while the
- * requested task is running.  Conversely with
- * register_kernel_hw_breakpoint(), the address must refer to a location
- * in kernel space, and the breakpoint will be active on all CPUs
- * regardless of the current task.
- *
- * The length is the breakpoint's extent in bytes, which is subject to
- * certain limitations.  include/asm/hw_breakpoint.h contains macros
- * defining the available lengths for a specific architecture.  Note that
- * the address's alignment must match the length.  The breakpoint will
- * catch accesses to any byte in the range from address to address +
- * (length - 1).
- *
- * The breakpoint's type indicates the sort of access that will cause it
- * to trigger.  Possible values may include:
- *
- * 	%HW_BREAKPOINT_RW (triggered on read or write access),
- * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
- * 	%HW_BREAKPOINT_READ (triggered on read access).
- *
- * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
- * possibilities are available on all architectures.  Execute breakpoints
- * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
- *
- * When a breakpoint gets hit, the @triggered callback is
- * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
- * processor registers.
- * Data breakpoints occur after the memory access has taken place.
- * Breakpoints are disabled during execution @triggered, to avoid
- * recursive traps and allow unhindered access to breakpointed memory.
- *
- * This sample code sets a breakpoint on pid_max and registers a callback
- * function for writes to that variable.  Note that it is not portable
- * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
- *
- * ----------------------------------------------------------------------
- *
- * #include <asm/hw_breakpoint.h>
- *
- * struct hw_breakpoint my_bp;
- *
- * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
- * {
- * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
- * 	dump_stack();
- *  	.......<more debugging output>........
- * }
- *
- * static struct hw_breakpoint my_bp;
- *
- * static int init_module(void)
- * {
- *	..........<do anything>............
- *	my_bp.info.type = HW_BREAKPOINT_WRITE;
- *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
- *
- *	my_bp.installed = (void *)my_bp_installed;
- *
- *	rc = register_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * static void cleanup_module(void)
- * {
- *	..........<do anything>............
- *	unregister_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * ----------------------------------------------------------------------
- */
-struct hw_breakpoint {
-	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
-	struct arch_hw_breakpoint info;
-};
-
-/*
- * len and type values are defined in include/asm/hw_breakpoint.h.
- * Available values vary according to the architecture.  On i386 the
- * possibilities are:
- *
- *	HW_BREAKPOINT_LEN_1
- *	HW_BREAKPOINT_LEN_2
- *	HW_BREAKPOINT_LEN_4
- *	HW_BREAKPOINT_RW
- *	HW_BREAKPOINT_READ
- *
- * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
- * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
- * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
- */
-
-extern int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern int modify_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp);
-/*
- * Kernel breakpoints are not associated with any particular thread.
- */
-extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-
-extern unsigned int hbp_kernel_pos;
-
-#endif	/* __KERNEL__ */
-#endif	/* _ASM_GENERIC_HW_BREAKPOINT_H */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..61ccc8f17eac
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,136 @@
+#ifndef _LINUX_HW_BREAKPOINT_H
+#define _LINUX_HW_BREAKPOINT_H
+
+
+#ifdef	__KERNEL__
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+/**
+ * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
+ * @triggered: callback invoked after target address access
+ * @info: arch-specific breakpoint info (address, length, and type)
+ *
+ * %hw_breakpoint structures are the kernel's way of representing
+ * hardware breakpoints.  These are data breakpoints
+ * (also known as "watchpoints", triggered on data access), and the breakpoint's
+ * target address can be located in either kernel space or user space.
+ *
+ * The breakpoint's address, length, and type are highly
+ * architecture-specific.  The values are encoded in the @info field; you
+ * specify them when registering the breakpoint.  To examine the encoded
+ * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
+ * below.
+ *
+ * The address is specified as a regular kernel pointer (for kernel-space
+ * breakponts) or as an %__user pointer (for user-space breakpoints).
+ * With register_user_hw_breakpoint(), the address must refer to a
+ * location in user space.  The breakpoint will be active only while the
+ * requested task is running.  Conversely with
+ * register_kernel_hw_breakpoint(), the address must refer to a location
+ * in kernel space, and the breakpoint will be active on all CPUs
+ * regardless of the current task.
+ *
+ * The length is the breakpoint's extent in bytes, which is subject to
+ * certain limitations.  include/asm/hw_breakpoint.h contains macros
+ * defining the available lengths for a specific architecture.  Note that
+ * the address's alignment must match the length.  The breakpoint will
+ * catch accesses to any byte in the range from address to address +
+ * (length - 1).
+ *
+ * The breakpoint's type indicates the sort of access that will cause it
+ * to trigger.  Possible values may include:
+ *
+ * 	%HW_BREAKPOINT_RW (triggered on read or write access),
+ * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
+ * 	%HW_BREAKPOINT_READ (triggered on read access).
+ *
+ * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
+ * possibilities are available on all architectures.  Execute breakpoints
+ * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
+ *
+ * When a breakpoint gets hit, the @triggered callback is
+ * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
+ * processor registers.
+ * Data breakpoints occur after the memory access has taken place.
+ * Breakpoints are disabled during execution @triggered, to avoid
+ * recursive traps and allow unhindered access to breakpointed memory.
+ *
+ * This sample code sets a breakpoint on pid_max and registers a callback
+ * function for writes to that variable.  Note that it is not portable
+ * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * #include <asm/hw_breakpoint.h>
+ *
+ * struct hw_breakpoint my_bp;
+ *
+ * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+ * {
+ * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
+ * 	dump_stack();
+ *  	.......<more debugging output>........
+ * }
+ *
+ * static struct hw_breakpoint my_bp;
+ *
+ * static int init_module(void)
+ * {
+ *	..........<do anything>............
+ *	my_bp.info.type = HW_BREAKPOINT_WRITE;
+ *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
+ *
+ *	my_bp.installed = (void *)my_bp_installed;
+ *
+ *	rc = register_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ *	..........<do anything>............
+ *	unregister_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * ----------------------------------------------------------------------
+ */
+struct hw_breakpoint {
+	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
+	struct arch_hw_breakpoint info;
+};
+
+/*
+ * len and type values are defined in include/asm/hw_breakpoint.h.
+ * Available values vary according to the architecture.  On i386 the
+ * possibilities are:
+ *
+ *	HW_BREAKPOINT_LEN_1
+ *	HW_BREAKPOINT_LEN_2
+ *	HW_BREAKPOINT_LEN_4
+ *	HW_BREAKPOINT_RW
+ *	HW_BREAKPOINT_READ
+ *
+ * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
+ * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
+ * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
+ */
+
+extern int register_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern int modify_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
+						struct hw_breakpoint *bp);
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+
+extern unsigned int hbp_kernel_pos;
+
+#endif	/* __KERNEL__ */
+#endif	/* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3


From 13f18aa05f5abe135f47b6417537ae2b2fedc18c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 5 Nov 2009 20:44:37 -0800
Subject: net: drop capability from protocol definitions

struct can_proto had a capability field which wasn't ever used.  It is
dropped entirely.

struct inet_protosw had a capability field which can be more clearly
expressed in the code by just checking if sock->type = SOCK_RAW.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h | 2 --
 include/net/protocol.h   | 4 ----
 net/can/af_can.c         | 5 -----
 net/can/bcm.c            | 1 -
 net/can/raw.c            | 1 -
 net/dccp/ipv4.c          | 1 -
 net/dccp/ipv6.c          | 1 -
 net/ipv4/af_inet.c       | 5 +----
 net/ipv4/udplite.c       | 1 -
 net/ipv6/af_inet6.c      | 2 +-
 net/ipv6/raw.c           | 1 -
 net/ipv6/tcp_ipv6.c      | 1 -
 net/ipv6/udp.c           | 1 -
 net/ipv6/udplite.c       | 1 -
 net/sctp/ipv6.c          | 2 --
 net/sctp/protocol.c      | 2 --
 16 files changed, 2 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 25085cbadcfc..6c507bea275f 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -32,14 +32,12 @@
  * struct can_proto - CAN protocol structure
  * @type:       type argument in socket() syscall, e.g. SOCK_DGRAM.
  * @protocol:   protocol number in socket() syscall.
- * @capability: capability needed to open the socket, or -1 for no restriction.
  * @ops:        pointer to struct proto_ops for sock->ops.
  * @prot:       pointer to struct proto structure.
  */
 struct can_proto {
 	int              type;
 	int              protocol;
-	int              capability;
 	struct proto_ops *ops;
 	struct proto     *prot;
 };
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 89932d45da59..f1effdd3c265 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -82,10 +82,6 @@ struct inet_protosw {
 	struct proto	 *prot;
 	const struct proto_ops *ops;
   
-	int              capability; /* Which (if any) capability do
-				      * we need to use this socket
-				      * interface?
-                                      */
 	char             no_check;   /* checksum on rcv/xmit/none? */
 	unsigned char	 flags;      /* See INET_PROTOSW_* below.  */
 };
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 3f2eb27e1ffb..9c0426dc3184 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -160,11 +160,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
 		goto errout;
 	}
 
-	if (cp->capability >= 0 && !capable(cp->capability)) {
-		err = -EPERM;
-		goto errout;
-	}
-
 	sock->ops = cp->ops;
 
 	sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 2f47039c79dd..67b5433db13b 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1576,7 +1576,6 @@ static struct proto bcm_proto __read_mostly = {
 static struct can_proto bcm_can_proto __read_mostly = {
 	.type       = SOCK_DGRAM,
 	.protocol   = CAN_BCM,
-	.capability = -1,
 	.ops        = &bcm_ops,
 	.prot       = &bcm_proto,
 };
diff --git a/net/can/raw.c b/net/can/raw.c
index 6e77db58b9e6..abca920440b5 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -742,7 +742,6 @@ static struct proto raw_proto __read_mostly = {
 static struct can_proto raw_can_proto __read_mostly = {
 	.type       = SOCK_RAW,
 	.protocol   = CAN_RAW,
-	.capability = -1,
 	.ops        = &raw_ops,
 	.prot       = &raw_proto,
 };
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 00028d4b09d9..2423a0866733 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -991,7 +991,6 @@ static struct inet_protosw dccp_v4_protosw = {
 	.protocol	= IPPROTO_DCCP,
 	.prot		= &dccp_v4_prot,
 	.ops		= &inet_dccp_ops,
-	.capability	= -1,
 	.no_check	= 0,
 	.flags		= INET_PROTOSW_ICSK,
 };
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6d89f9f7d5d8..50ea91a77705 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1185,7 +1185,6 @@ static struct inet_protosw dccp_v6_protosw = {
 	.protocol	= IPPROTO_DCCP,
 	.prot		= &dccp_v6_prot,
 	.ops		= &inet6_dccp_ops,
-	.capability	= -1,
 	.flags		= INET_PROTOSW_ICSK,
 };
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 538e84d0bcba..180ec4c94919 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -325,7 +325,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (answer->capability > 0 && !capable(answer->capability))
+	if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	err = -EAFNOSUPPORT;
@@ -947,7 +947,6 @@ static struct inet_protosw inetsw_array[] =
 		.protocol =   IPPROTO_TCP,
 		.prot =       &tcp_prot,
 		.ops =        &inet_stream_ops,
-		.capability = -1,
 		.no_check =   0,
 		.flags =      INET_PROTOSW_PERMANENT |
 			      INET_PROTOSW_ICSK,
@@ -958,7 +957,6 @@ static struct inet_protosw inetsw_array[] =
 		.protocol =   IPPROTO_UDP,
 		.prot =       &udp_prot,
 		.ops =        &inet_dgram_ops,
-		.capability = -1,
 		.no_check =   UDP_CSUM_DEFAULT,
 		.flags =      INET_PROTOSW_PERMANENT,
        },
@@ -969,7 +967,6 @@ static struct inet_protosw inetsw_array[] =
 	       .protocol =   IPPROTO_IP,	/* wild card */
 	       .prot =       &raw_prot,
 	       .ops =        &inet_sockraw_ops,
-	       .capability = CAP_NET_RAW,
 	       .no_check =   UDP_CSUM_DEFAULT,
 	       .flags =      INET_PROTOSW_REUSE,
        }
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 470c504b9554..66f79513f4a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = {
 	.protocol	=  IPPROTO_UDPLITE,
 	.prot		=  &udplite_prot,
 	.ops		=  &inet_dgram_ops,
-	.capability	= -1,
 	.no_check	=  0,		/* must checksum (RFC 3828) */
 	.flags		=  INET_PROTOSW_PERMANENT,
 };
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9105b25defe5..1b3889356599 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -158,7 +158,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (answer->capability > 0 && !capable(answer->capability))
+	if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cb834ab7f071..818ef21ba76d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1336,7 +1336,6 @@ static struct inet_protosw rawv6_protosw = {
 	.protocol	= IPPROTO_IP,	/* wild card */
 	.prot		= &rawv6_prot,
 	.ops		= &inet6_sockraw_ops,
-	.capability	= CAP_NET_RAW,
 	.no_check	= UDP_CSUM_DEFAULT,
 	.flags		= INET_PROTOSW_REUSE,
 };
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 34925f089e07..696a22f034e8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2112,7 +2112,6 @@ static struct inet_protosw tcpv6_protosw = {
 	.protocol	=	IPPROTO_TCP,
 	.prot		=	&tcpv6_prot,
 	.ops		=	&inet6_stream_ops,
-	.capability	=	-1,
 	.no_check	=	0,
 	.flags		=	INET_PROTOSW_PERMANENT |
 				INET_PROTOSW_ICSK,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d3b59d73f507..bbe2f3e445fc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1286,7 +1286,6 @@ static struct inet_protosw udpv6_protosw = {
 	.protocol =  IPPROTO_UDP,
 	.prot =      &udpv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.capability =-1,
 	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_PERMANENT,
 };
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d737a27ee010..6ea6938919e6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = {
 	.protocol	= IPPROTO_UDPLITE,
 	.prot		= &udplitev6_prot,
 	.ops		= &inet6_dgram_ops,
-	.capability	= -1,
 	.no_check	= 0,
 	.flags		= INET_PROTOSW_PERMANENT,
 };
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bb280e60e00a..bacd6a7318be 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -930,7 +930,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = {
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.capability    = -1,
 	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG
 };
@@ -939,7 +938,6 @@ static struct inet_protosw sctpv6_stream_protosw = {
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.capability    = -1,
 	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG,
 };
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fe44c57101de..08ef203d36ac 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -909,7 +909,6 @@ static struct inet_protosw sctp_seqpacket_protosw = {
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.capability = -1,
 	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
@@ -918,7 +917,6 @@ static struct inet_protosw sctp_stream_protosw = {
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.capability = -1,
 	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
-- 
cgit v1.2.3


From 3f378b684453f2a028eda463ce383370545d9cc9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 5 Nov 2009 22:18:14 -0800
Subject: net: pass kern to net_proto_family create function

The generic __sock_create function has a kern argument which allows the
security system to make decisions based on if a socket is being created by
the kernel or by userspace.  This patch passes that flag to the
net_proto_family specific create function, so it can do the same thing.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/socket.c    | 2 +-
 drivers/net/pppox.c            | 3 ++-
 include/linux/net.h            | 3 ++-
 net/appletalk/ddp.c            | 3 ++-
 net/atm/pvc.c                  | 3 ++-
 net/atm/svc.c                  | 7 ++++---
 net/ax25/af_ax25.c             | 3 ++-
 net/bluetooth/af_bluetooth.c   | 5 +++--
 net/bluetooth/bnep/sock.c      | 3 ++-
 net/bluetooth/cmtp/sock.c      | 3 ++-
 net/bluetooth/hci_sock.c       | 3 ++-
 net/bluetooth/hidp/sock.c      | 3 ++-
 net/bluetooth/l2cap.c          | 3 ++-
 net/bluetooth/rfcomm/sock.c    | 3 ++-
 net/bluetooth/sco.c            | 3 ++-
 net/can/af_can.c               | 3 ++-
 net/decnet/af_decnet.c         | 3 ++-
 net/econet/af_econet.c         | 3 ++-
 net/ieee802154/af_ieee802154.c | 2 +-
 net/ipv4/af_inet.c             | 3 ++-
 net/ipv6/af_inet6.c            | 3 ++-
 net/ipx/af_ipx.c               | 3 ++-
 net/irda/af_irda.c             | 7 ++++---
 net/iucv/af_iucv.c             | 3 ++-
 net/key/af_key.c               | 3 ++-
 net/llc/af_llc.c               | 5 ++++-
 net/netlink/af_netlink.c       | 3 ++-
 net/netrom/af_netrom.c         | 3 ++-
 net/packet/af_packet.c         | 3 ++-
 net/phonet/af_phonet.c         | 3 ++-
 net/rds/af_rds.c               | 3 ++-
 net/rose/af_rose.c             | 3 ++-
 net/rxrpc/af_rxrpc.c           | 3 ++-
 net/socket.c                   | 2 +-
 net/tipc/socket.c              | 6 ++++--
 net/unix/af_unix.c             | 3 ++-
 net/x25/af_x25.c               | 3 ++-
 37 files changed, 80 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 28182ed8dea1..fcfe17a19a61 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -779,7 +779,7 @@ base_sock_create(struct net *net, struct socket *sock, int protocol)
 }
 
 static int
-mISDN_sock_create(struct net *net, struct socket *sock, int proto)
+mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern)
 {
 	int err = -EPROTONOSUPPORT;
 
diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index c14ee24c05a8..ac806b27c658 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -104,7 +104,8 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 EXPORT_SYMBOL(pppox_ioctl);
 
-static int pppox_create(struct net *net, struct socket *sock, int protocol)
+static int pppox_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	int rc = -EPROTOTYPE;
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 4da9d571b053..70ee3c310f15 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -204,7 +204,8 @@ struct proto_ops {
 
 struct net_proto_family {
 	int		family;
-	int		(*create)(struct net *net, struct socket *sock, int protocol);
+	int		(*create)(struct net *net, struct socket *sock,
+				  int protocol, int kern);
 	struct module	*owner;
 };
 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index abe38014b7fd..4b0ce2e2b46e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1021,7 +1021,8 @@ static struct proto ddp_proto = {
  * Create a socket. Initialise the socket, blank the addresses
  * set the state.
  */
-static int atalk_create(struct net *net, struct socket *sock, int protocol)
+static int atalk_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct sock *sk;
 	int rc = -ESOCKTNOSUPPORT;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index a6e1fdbae87f..8d74e62b0d79 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -127,7 +127,8 @@ static const struct proto_ops pvc_proto_ops = {
 };
 
 
-static int pvc_create(struct net *net, struct socket *sock,int protocol)
+static int pvc_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	if (net != &init_net)
 		return -EAFNOSUPPORT;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 819354233318..c7395070ee78 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -25,7 +25,7 @@
 #include "signaling.h"
 #include "addr.h"
 
-static int svc_create(struct net *net, struct socket *sock,int protocol);
+static int svc_create(struct net *net, struct socket *sock, int protocol, int kern);
 
 /*
  * Note: since all this is still nicely synchronized with the signaling demon,
@@ -330,7 +330,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
 
 	lock_sock(sk);
 
-	error = svc_create(sock_net(sk), newsock,0);
+	error = svc_create(sock_net(sk), newsock, 0, 0);
 	if (error)
 		goto out;
 
@@ -650,7 +650,8 @@ static const struct proto_ops svc_proto_ops = {
 };
 
 
-static int svc_create(struct net *net, struct socket *sock,int protocol)
+static int svc_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	int error;
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f1e998b2796e..d6ddfa4c4471 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -799,7 +799,8 @@ static struct proto ax25_proto = {
 	.obj_size = sizeof(struct sock),
 };
 
-static int ax25_create(struct net *net, struct socket *sock, int protocol)
+static int ax25_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	struct sock *sk;
 	ax25_cb *ax25;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 399e59c9c6cb..087cc51f5927 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -126,7 +126,8 @@ int bt_sock_unregister(int proto)
 }
 EXPORT_SYMBOL(bt_sock_unregister);
 
-static int bt_sock_create(struct net *net, struct socket *sock, int proto)
+static int bt_sock_create(struct net *net, struct socket *sock, int proto,
+			  int kern)
 {
 	int err;
 
@@ -144,7 +145,7 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto)
 	read_lock(&bt_proto_lock);
 
 	if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) {
-		err = bt_proto[proto]->create(net, sock, proto);
+		err = bt_proto[proto]->create(net, sock, proto, kern);
 		bt_sock_reclassify_lock(sock, proto);
 		module_put(bt_proto[proto]->owner);
 	}
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 0a2c5460bb48..2ff6ac7b2ed4 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -195,7 +195,8 @@ static struct proto bnep_proto = {
 	.obj_size	= sizeof(struct bt_sock)
 };
 
-static int bnep_sock_create(struct net *net, struct socket *sock, int protocol)
+static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index de7c8040bc56..978cc3a718ad 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -190,7 +190,8 @@ static struct proto cmtp_proto = {
 	.obj_size	= sizeof(struct bt_sock)
 };
 
-static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol)
+static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e7395f231989..1ca5c7ca9bd4 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -621,7 +621,8 @@ static struct proto hci_sk_proto = {
 	.obj_size	= sizeof(struct hci_pinfo)
 };
 
-static int hci_sock_create(struct net *net, struct socket *sock, int protocol)
+static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
+			   int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 4beb6a7a2953..9cfef68b9fec 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -241,7 +241,8 @@ static struct proto hidp_proto = {
 	.obj_size	= sizeof(struct bt_sock)
 };
 
-static int hidp_sock_create(struct net *net, struct socket *sock, int protocol)
+static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d65101d92ee5..365ae161d702 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -819,7 +819,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
 	return sk;
 }
 
-static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol)
+static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
+			     int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d3bfc1b0afb1..4b5968dda673 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -323,7 +323,8 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
 	return sk;
 }
 
-static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol)
+static int rfcomm_sock_create(struct net *net, struct socket *sock,
+			      int protocol, int kern)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 694a65541b73..dd8f6ec57dce 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -430,7 +430,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
 	return sk;
 }
 
-static int sco_sock_create(struct net *net, struct socket *sock, int protocol)
+static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
+			   int kern)
 {
 	struct sock *sk;
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 9c0426dc3184..833bd838edc6 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -114,7 +114,8 @@ static void can_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 }
 
-static int can_create(struct net *net, struct socket *sock, int protocol)
+static int can_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	struct sock *sk;
 	struct can_proto *cp;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2e355841ca99..9ade3a6de954 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -675,7 +675,8 @@ char *dn_addr2asc(__u16 addr, char *buf)
 
 
-static int dn_create(struct net *net, struct socket *sock, int protocol)
+static int dn_create(struct net *net, struct socket *sock, int protocol,
+		     int kern)
 {
 	struct sock *sk;
 
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 5e9426a11c3e..596679803de5 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -605,7 +605,8 @@ static struct proto econet_proto = {
  *	Create an Econet socket
  */
 
-static int econet_create(struct net *net, struct socket *sock, int protocol)
+static int econet_create(struct net *net, struct socket *sock, int protocol,
+			 int kern)
 {
 	struct sock *sk;
 	struct econet_sock *eo;
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 309348fba72b..de6e34d2a7f8 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -234,7 +234,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
  * set the state.
  */
 static int ieee802154_create(struct net *net, struct socket *sock,
-		int protocol)
+			     int protocol, int kern)
 {
 	struct sock *sk;
 	int rc;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 180ec4c94919..5c7e42c02afb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -262,7 +262,8 @@ static inline int inet_netns_ok(struct net *net, int protocol)
  *	Create an inet socket.
  */
 
-static int inet_create(struct net *net, struct socket *sock, int protocol)
+static int inet_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	struct sock *sk;
 	struct inet_protosw *answer;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1b3889356599..45ed5e05ab32 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 }
 
-static int inet6_create(struct net *net, struct socket *sock, int protocol)
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct inet_sock *inet;
 	struct ipv6_pinfo *np;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 6481ee4bdf72..96d193a24415 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1352,7 +1352,8 @@ static struct proto ipx_proto = {
 	.obj_size = sizeof(struct ipx_sock),
 };
 
-static int ipx_create(struct net *net, struct socket *sock, int protocol)
+static int ipx_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	int rc = -ESOCKTNOSUPPORT;
 	struct sock *sk;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 9429e4002bca..e73a0016c0aa 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -61,7 +61,7 @@
 
 #include <net/irda/af_irda.h>
 
-static int irda_create(struct net *net, struct socket *sock, int protocol);
+static int irda_create(struct net *net, struct socket *sock, int protocol, int kern);
 
 static const struct proto_ops irda_stream_ops;
 static const struct proto_ops irda_seqpacket_ops;
@@ -839,7 +839,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	IRDA_DEBUG(2, "%s()\n", __func__);
 
-	err = irda_create(sock_net(sk), newsock, sk->sk_protocol);
+	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
 	if (err)
 		return err;
 
@@ -1062,7 +1062,8 @@ static struct proto irda_proto = {
  *    Create IrDA socket
  *
  */
-static int irda_create(struct net *net, struct socket *sock, int protocol)
+static int irda_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	struct sock *sk;
 	struct irda_sock *self;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 3aebabb158a8..1e428863574f 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -481,7 +481,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 }
 
 /* Create an IUCV socket */
-static int iucv_sock_create(struct net *net, struct socket *sock, int protocol)
+static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
 {
 	struct sock *sk;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 472f6594184a..86b2c22d0918 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -177,7 +177,8 @@ static struct proto key_proto = {
 	.obj_size = sizeof(struct pfkey_sock),
 };
 
-static int pfkey_create(struct net *net, struct socket *sock, int protocol)
+static int pfkey_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 4866b4fb0c27..5266c286b260 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -140,14 +140,17 @@ static struct proto llc_proto = {
 
 /**
  *	llc_ui_create - alloc and init a new llc_ui socket
+ *	@net: network namespace (must be default network)
  *	@sock: Socket to initialize and attach allocated sk to.
  *	@protocol: Unused.
+ *	@kern: on behalf of kernel or userspace
  *
  *	Allocate and initialize a new llc_ui socket, validate the user wants a
  *	socket type we have available.
  *	Returns 0 upon success, negative upon failure.
  */
-static int llc_ui_create(struct net *net, struct socket *sock, int protocol)
+static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
+			 int kern)
 {
 	struct sock *sk;
 	int rc = -ESOCKTNOSUPPORT;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0cd2d8829313..aea805c98da3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -428,7 +428,8 @@ static int __netlink_create(struct net *net, struct socket *sock,
 	return 0;
 }
 
-static int netlink_create(struct net *net, struct socket *sock, int protocol)
+static int netlink_create(struct net *net, struct socket *sock, int protocol,
+			  int kern)
 {
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 281fa597cae5..4bdd5697f63b 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -425,7 +425,8 @@ static struct proto nr_proto = {
 	.obj_size = sizeof(struct nr_sock),
 };
 
-static int nr_create(struct net *net, struct socket *sock, int protocol)
+static int nr_create(struct net *net, struct socket *sock, int protocol,
+		     int kern)
 {
 	struct sock *sk;
 	struct nr_sock *nr;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91d246d34780..3304caa65347 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1344,7 +1344,8 @@ static struct proto packet_proto = {
  *	Create a packet of type SOCK_PACKET.
  */
 
-static int packet_create(struct net *net, struct socket *sock, int protocol)
+static int packet_create(struct net *net, struct socket *sock, int protocol,
+			 int kern)
 {
 	struct sock *sk;
 	struct packet_sock *po;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 66737aa995ea..3bd1be6b26f0 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -60,7 +60,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp)
 
 /* protocol family functions */
 
-static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
+static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
 {
 	struct sock *sk;
 	struct pn_sock *pn;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2b978dc6e75d..e25d8d5ce8df 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -410,7 +410,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	return 0;
 }
 
-static int rds_create(struct net *net, struct socket *sock, int protocol)
+static int rds_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	struct sock *sk;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index c17734c2ce89..4de4287fec37 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -512,7 +512,8 @@ static struct proto rose_proto = {
 	.obj_size = sizeof(struct rose_sock),
 };
 
-static int rose_create(struct net *net, struct socket *sock, int protocol)
+static int rose_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	struct sock *sk;
 	struct rose_sock *rose;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 6817c9781ef3..f978d02a248a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -608,7 +608,8 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
 /*
  * create an RxRPC socket
  */
-static int rxrpc_create(struct net *net, struct socket *sock, int protocol)
+static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct rxrpc_sock *rx;
 	struct sock *sk;
diff --git a/net/socket.c b/net/socket.c
index 9dff31c9b799..4f3e0f0c156b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1252,7 +1252,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol,
 	/* Now protected by module ref count */
 	rcu_read_unlock();
 
-	err = pf->create(net, sock, protocol);
+	err = pf->create(net, sock, protocol, kern);
 	if (err < 0)
 		goto out_module_put;
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e6d9abf7440e..d00c2119faf3 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -177,6 +177,7 @@ static void reject_rx_queue(struct sock *sk)
  * @net: network namespace (must be default network)
  * @sock: pre-allocated socket structure
  * @protocol: protocol indicator (must be 0)
+ * @kern: caused by kernel or by userspace?
  *
  * This routine creates additional data structures used by the TIPC socket,
  * initializes them, and links them together.
@@ -184,7 +185,8 @@ static void reject_rx_queue(struct sock *sk)
  * Returns 0 on success, errno otherwise
  */
 
-static int tipc_create(struct net *net, struct socket *sock, int protocol)
+static int tipc_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	const struct proto_ops *ops;
 	socket_state state;
@@ -1528,7 +1530,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 
 	buf = skb_peek(&sk->sk_receive_queue);
 
-	res = tipc_create(sock_net(sock->sk), new_sock, 0);
+	res = tipc_create(sock_net(sock->sk), new_sock, 0, 0);
 	if (!res) {
 		struct sock *new_sk = new_sock->sk;
 		struct tipc_sock *new_tsock = tipc_sk(new_sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3291902f0b88..178d3af2a605 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -621,7 +621,8 @@ out:
 	return sk;
 }
 
-static int unix_create(struct net *net, struct socket *sock, int protocol)
+static int unix_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
 {
 	if (protocol && protocol != PF_UNIX)
 		return -EPROTONOSUPPORT;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e19d811788a5..38e235f61e27 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -501,7 +501,8 @@ out:
 	return sk;
 }
 
-static int x25_create(struct net *net, struct socket *sock, int protocol)
+static int x25_create(struct net *net, struct socket *sock, int protocol,
+		      int kern)
 {
 	struct sock *sk;
 	struct x25_sock *x25;
-- 
cgit v1.2.3


From 765af10de6d93820def9978c53ed828e4d3bd4f4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Thu, 5 Nov 2009 22:59:46 -0800
Subject: Input: add new keycodes useful in mobile devices

Add new codes for camera focus key, and camera lens cover, keypad slide,
front proximity switches.

Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 9ee67b4b2b48..9a04e26daab2 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -595,6 +595,8 @@ struct input_absinfo {
 #define KEY_NUMERIC_STAR	0x20a
 #define KEY_NUMERIC_POUND	0x20b
 
+#define KEY_CAMERA_FOCUS	0x210
+
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x2ff
@@ -677,6 +679,9 @@ struct input_absinfo {
 #define SW_LINEOUT_INSERT	0x06  /* set = inserted */
 #define SW_JACK_PHYSICAL_INSERT 0x07  /* set = mechanical switch set */
 #define SW_VIDEOOUT_INSERT	0x08  /* set = inserted */
+#define SW_CAMERA_LENS_COVER	0x09  /* set = lens covered */
+#define SW_KEYPAD_SLIDE		0x0a  /* set = keypad slide out */
+#define SW_FRONT_PROXIMITY	0x0b  /* set = front proximity sensor active */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
-- 
cgit v1.2.3


From 1eaa9d03d3ee9156c8c405b006ce892ae28290ad Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Tue, 15 Sep 2009 17:04:44 +0400
Subject: ieee802154: add LIST_PHY command support

Add nl802154 command to get information about PHY's present in
the system.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 include/linux/nl802154.h   |   4 +
 net/ieee802154/Makefile    |   2 +-
 net/ieee802154/netlink.c   |   4 +
 net/ieee802154/nl-phy.c    | 188 +++++++++++++++++++++++++++++++++++++++++++++
 net/ieee802154/nl_policy.c |   2 +
 5 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 net/ieee802154/nl-phy.c

(limited to 'include/linux')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index b7d9435d5a9f..275fd94f4727 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -65,6 +65,9 @@ enum {
 	IEEE802154_ATTR_SEC,
 
 	IEEE802154_ATTR_PAGE,
+	IEEE802154_ATTR_CHANNEL_PAGE_LIST,
+
+	IEEE802154_ATTR_PHY_NAME,
 
 	__IEEE802154_ATTR_MAX,
 };
@@ -114,6 +117,7 @@ enum {
 	IEEE802154_RX_ENABLE_CONF, /* Not supported yet */
 
 	IEEE802154_LIST_IFACE,
+	IEEE802154_LIST_PHY,
 
 	__IEEE802154_CMD_MAX,
 };
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 69af9f6a404b..ce2d33582859 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_IEEE802154) +=	ieee802154.o af_802154.o
-ieee802154-y		:= netlink.o nl-mac.o nl_policy.o wpan-class.o
+ieee802154-y		:= netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
 af_802154-y		:= af_ieee802154.o raw.o dgram.o
 
 ccflags-y += -Wall -DDEBUG
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 5b738ec7d9f1..8a221737f75c 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -87,6 +87,10 @@ int __init ieee802154_nl_init(void)
 	if (rc)
 		goto fail;
 
+	rc = nl802154_phy_register();
+	if (rc)
+		goto fail;
+
 	return 0;
 
 fail:
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
new file mode 100644
index 000000000000..b7af722eb784
--- /dev/null
+++ b/net/ieee802154/nl-phy.c
@@ -0,0 +1,188 @@
+/*
+ * Netlink inteface for IEEE 802.15.4 stack
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ */
+
+#include <linux/kernel.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/wpan-phy.h>
+#include <linux/nl802154.h>
+
+#include "ieee802154.h"
+
+static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid,
+	u32 seq, int flags, struct wpan_phy *phy)
+{
+	void *hdr;
+	int i, pages = 0;
+	uint32_t *buf = kzalloc(32 * sizeof(uint32_t), GFP_KERNEL);
+
+	pr_debug("%s\n", __func__);
+
+	if (!buf)
+		goto out;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags,
+		IEEE802154_LIST_PHY);
+	if (!hdr)
+		goto out;
+
+	mutex_lock(&phy->pib_lock);
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy));
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, phy->current_page);
+	NLA_PUT_U8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel);
+	for (i = 0; i < 32; i++) {
+		if (phy->channels_supported[i])
+			buf[pages++] = phy->channels_supported[i] | (i << 27);
+	}
+	if (pages)
+		NLA_PUT(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST,
+				pages * sizeof(uint32_t), buf);
+
+	mutex_unlock(&phy->pib_lock);
+	return genlmsg_end(msg, hdr);
+
+nla_put_failure:
+	mutex_unlock(&phy->pib_lock);
+	genlmsg_cancel(msg, hdr);
+out:
+	kfree(buf);
+	return -EMSGSIZE;
+}
+
+static int ieee802154_list_phy(struct sk_buff *skb,
+	struct genl_info *info)
+{
+	/* Request for interface name, index, type, IEEE address,
+	   PAN Id, short address */
+	struct sk_buff *msg;
+	struct wpan_phy *phy;
+	const char *name;
+	int rc = -ENOBUFS;
+
+	pr_debug("%s\n", __func__);
+
+	if (!info->attrs[IEEE802154_ATTR_PHY_NAME])
+		return -EINVAL;
+
+	name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]);
+	if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0')
+		return -EINVAL; /* phy name should be null-terminated */
+
+
+	phy = wpan_phy_find(name);
+	if (!phy)
+		return -ENODEV;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		goto out_dev;
+
+	rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq,
+			0, phy);
+	if (rc < 0)
+		goto out_free;
+
+	wpan_phy_put(phy);
+
+	return genlmsg_reply(msg, info);
+out_free:
+	nlmsg_free(msg);
+out_dev:
+	wpan_phy_put(phy);
+	return rc;
+
+}
+
+struct dump_phy_data {
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx, s_idx;
+};
+
+static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data)
+{
+	int rc;
+	struct dump_phy_data *data = _data;
+
+	pr_debug("%s\n", __func__);
+
+	if (data->idx++ < data->s_idx)
+		return 0;
+
+	rc = ieee802154_nl_fill_phy(data->skb,
+			NETLINK_CB(data->cb->skb).pid,
+			data->cb->nlh->nlmsg_seq,
+			NLM_F_MULTI,
+			phy);
+
+	if (rc < 0) {
+		data->idx--;
+		return rc;
+	}
+
+	return 0;
+}
+
+static int ieee802154_dump_phy(struct sk_buff *skb,
+	struct netlink_callback *cb)
+{
+	struct dump_phy_data data = {
+		.cb = cb,
+		.skb = skb,
+		.s_idx = cb->args[0],
+		.idx = 0,
+	};
+
+	pr_debug("%s\n", __func__);
+
+	wpan_phy_for_each(ieee802154_dump_phy_iter, &data);
+
+	cb->args[0] = data.idx;
+
+	return skb->len;
+}
+
+static struct genl_ops ieee802154_phy_ops[] = {
+	IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
+							ieee802154_dump_phy),
+};
+
+/*
+ * No need to unregister as family unregistration will do it.
+ */
+int nl802154_phy_register(void)
+{
+	int i;
+	int rc;
+
+	for (i = 0; i < ARRAY_SIZE(ieee802154_phy_ops); i++) {
+		rc = genl_register_ops(&nl802154_family,
+				&ieee802154_phy_ops[i]);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 2363ebee02e7..6adda4d46f95 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -27,6 +27,7 @@
 const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, },
 	[IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, },
+	[IEEE802154_ATTR_PHY_NAME] = { .type = NLA_STRING, },
 
 	[IEEE802154_ATTR_STATUS] = { .type = NLA_U8, },
 	[IEEE802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, },
@@ -50,5 +51,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_CHANNELS] = { .type = NLA_U32, },
 	[IEEE802154_ATTR_DURATION] = { .type = NLA_U8, },
 	[IEEE802154_ATTR_ED_LIST] = { .len = 27 },
+	[IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, },
 };
 
-- 
cgit v1.2.3


From bb1cafb8fc414d6dbe933f888df6540c2ef02101 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Thu, 5 Nov 2009 16:56:23 +0300
Subject: ieee802154: add support for creation/removal of logic interfaces

Add support for two more NL802154 commands: ADD_IFACE and DEL_IFACE,
thus allowing creation and removal of logic WPAN interfaces on the top
of wpan-phy.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 include/linux/nl802154.h |   2 +
 include/net/wpan-phy.h   |   4 ++
 net/ieee802154/nl-phy.c  | 156 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 275fd94f4727..33d9f5175109 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -118,6 +118,8 @@ enum {
 
 	IEEE802154_LIST_IFACE,
 	IEEE802154_LIST_PHY,
+	IEEE802154_ADD_IFACE,
+	IEEE802154_DEL_IFACE,
 
 	__IEEE802154_CMD_MAX,
 };
diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h
index a65e98509fbc..85926231c07a 100644
--- a/include/net/wpan-phy.h
+++ b/include/net/wpan-phy.h
@@ -41,6 +41,10 @@ struct wpan_phy {
 	struct device dev;
 	int idx;
 
+	struct net_device *(*add_iface)(struct wpan_phy *phy,
+			const char *name);
+	void (*del_iface)(struct wpan_phy *phy, struct net_device *dev);
+
 	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
 };
 
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index b7af722eb784..199a2d9d12f9 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -26,6 +26,9 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/wpan-phy.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+#include <net/rtnetlink.h> /* for rtnl_{un,}lock */
 #include <linux/nl802154.h>
 
 #include "ieee802154.h"
@@ -164,9 +167,162 @@ static int ieee802154_dump_phy(struct sk_buff *skb,
 	return skb->len;
 }
 
+static int ieee802154_add_iface(struct sk_buff *skb,
+		struct genl_info *info)
+{
+	struct sk_buff *msg;
+	struct wpan_phy *phy;
+	const char *name;
+	const char *devname;
+	int rc = -ENOBUFS;
+	struct net_device *dev;
+
+	pr_debug("%s\n", __func__);
+
+	if (!info->attrs[IEEE802154_ATTR_PHY_NAME])
+		return -EINVAL;
+
+	name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]);
+	if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0')
+		return -EINVAL; /* phy name should be null-terminated */
+
+	if (info->attrs[IEEE802154_ATTR_DEV_NAME]) {
+		devname = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]);
+		if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1]
+				!= '\0')
+			return -EINVAL; /* phy name should be null-terminated */
+	} else  {
+		devname = "wpan%d";
+	}
+
+	if (strlen(devname) >= IFNAMSIZ)
+		return -ENAMETOOLONG;
+
+	phy = wpan_phy_find(name);
+	if (!phy)
+		return -ENODEV;
+
+	msg = ieee802154_nl_new_reply(info, 0, IEEE802154_ADD_IFACE);
+	if (!msg)
+		goto out_dev;
+
+	if (!phy->add_iface) {
+		rc = -EINVAL;
+		goto nla_put_failure;
+	}
+
+	dev = phy->add_iface(phy, devname);
+	if (IS_ERR(dev)) {
+		rc = PTR_ERR(dev);
+		goto nla_put_failure;
+	}
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy));
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+
+	dev_put(dev);
+
+	wpan_phy_put(phy);
+
+	return ieee802154_nl_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+out_dev:
+	wpan_phy_put(phy);
+	return rc;
+}
+
+static int ieee802154_del_iface(struct sk_buff *skb,
+		struct genl_info *info)
+{
+	struct sk_buff *msg;
+	struct wpan_phy *phy;
+	const char *name;
+	int rc;
+	struct net_device *dev;
+
+	pr_debug("%s\n", __func__);
+
+	if (!info->attrs[IEEE802154_ATTR_DEV_NAME])
+		return -EINVAL;
+
+	name = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]);
+	if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0')
+		return -EINVAL; /* name should be null-terminated */
+
+	dev = dev_get_by_name(genl_info_net(info), name);
+	if (!dev)
+		return -ENODEV;
+
+	phy = ieee802154_mlme_ops(dev)->get_phy(dev);
+	BUG_ON(!phy);
+
+	rc = -EINVAL;
+	/* phy name is optional, but should be checked if it's given */
+	if (info->attrs[IEEE802154_ATTR_PHY_NAME]) {
+		struct wpan_phy *phy2;
+
+		const char *pname =
+			nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]);
+		if (pname[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1]
+				!= '\0')
+			/* name should be null-terminated */
+			goto out_dev;
+
+		phy2 = wpan_phy_find(pname);
+		if (!phy2)
+			goto out_dev;
+
+		if (phy != phy2) {
+			wpan_phy_put(phy2);
+			goto out_dev;
+		}
+	}
+
+	rc = -ENOBUFS;
+
+	msg = ieee802154_nl_new_reply(info, 0, IEEE802154_DEL_IFACE);
+	if (!msg)
+		goto out_dev;
+
+	if (!phy->del_iface) {
+		rc = -EINVAL;
+		goto nla_put_failure;
+	}
+
+	rtnl_lock();
+	phy->del_iface(phy, dev);
+
+	/* We don't have device anymore */
+	dev_put(dev);
+	dev = NULL;
+
+	rtnl_unlock();
+
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy));
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, name);
+
+	wpan_phy_put(phy);
+
+	return ieee802154_nl_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+out_dev:
+	wpan_phy_put(phy);
+	if (dev)
+		dev_put(dev);
+
+	return rc;
+}
+
 static struct genl_ops ieee802154_phy_ops[] = {
 	IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
 							ieee802154_dump_phy),
+	IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface),
+	IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface),
 };
 
 /*
-- 
cgit v1.2.3


From 642c6d946b5cdc27d0146c41dc20b7c4d4c3ccd8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Apr 2009 01:08:48 -0700
Subject: sysctl: Make do_sysctl static

Now that all of the architectures use compat_sys_sysctl do_sysctl
can become static.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sysctl.h | 4 ----
 kernel/sysctl_binary.c | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1e4743ee6831..82c32b89932d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -996,10 +996,6 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int,
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
 				      void __user *, size_t *, loff_t *);
 
-extern int do_sysctl (int __user *name, int nlen,
-		      void __user *oldval, size_t __user *oldlenp,
-		      void __user *newval, size_t newlen);
-
 extern ctl_handler sysctl_data;
 extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 775cc49da622..642019894299 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -135,7 +135,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 	return;
 }
 
-int do_sysctl(int __user *args_name, int nlen,
+static int do_sysctl(int __user *args_name, int nlen,
 	void __user *oldval, size_t __user *oldlenp,
 	void __user *newval, size_t newlen)
 {
-- 
cgit v1.2.3


From 0efea0006335a2425b1a12a2ad35efad626fe353 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Thu, 5 Nov 2009 12:05:11 +0900
Subject: PCI: cache PCIe capability offset

There are a lot of codes that searches PCI express capability offset
in the PCI configuration space using pci_find_capability(). Caching it
in the struct pci_dev will reduce unncecessary search. This patch adds
an additional 'pcie_cap' fields into struct pci_dev, which is
initialized at pci device scan time (in set_pcie_port_type()).

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 1 +
 include/linux/pci.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 623086f9ba84..54b9f1501487 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -692,6 +692,7 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	if (!pos)
 		return;
 	pdev->is_pcie = 1;
+	pdev->pcie_cap = pos;
 	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 86c31ac454d1..233b3a092035 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -218,6 +218,7 @@ struct pci_dev {
 	unsigned int	class;		/* 3 bytes: (base,sub,prog-if) */
 	u8		revision;	/* PCI revision, low byte of class word */
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
+	u8		pcie_cap;	/* PCI-E capability offset */
 	u8		pcie_type;	/* PCI-E device/port type */
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;  		/* which interrupt pin this device uses */
-- 
cgit v1.2.3


From 2dceba14ef0e62738d58777a1bd4018130d47a74 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Nov 2009 08:09:03 +0000
Subject: compat: add struct compat_ifreq etc to compat.h

In order to move socket ioctl conversion code into multiple
places in the socket code, we need a common defintion of
the data structures it uses.

Also change the name from ifreq32 to compat_ifreq to
follow the naming convention for compat.h

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compat.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index af931ee43dd8..8311d2e29632 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -10,6 +10,8 @@
 #include <linux/stat.h>
 #include <linux/param.h>	/* for HZ */
 #include <linux/sem.h>
+#include <linux/socket.h>
+#include <linux/if.h>
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
@@ -154,6 +156,43 @@ typedef struct compat_sigevent {
 	} _sigev_un;
 } compat_sigevent_t;
 
+struct compat_ifmap {
+	compat_ulong_t mem_start;
+	compat_ulong_t mem_end;
+	unsigned short base_addr;
+	unsigned char irq;
+	unsigned char dma;
+	unsigned char port;
+};
+
+struct compat_ifreq {
+#define IFHWADDRLEN     6
+#define IFNAMSIZ        16
+        union {
+                char    ifrn_name[IFNAMSIZ];            /* if name, e.g. "en0" */
+        } ifr_ifrn;
+        union {
+                struct  sockaddr ifru_addr;
+                struct  sockaddr ifru_dstaddr;
+                struct  sockaddr ifru_broadaddr;
+                struct  sockaddr ifru_netmask;
+                struct  sockaddr ifru_hwaddr;
+                short   ifru_flags;
+                compat_int_t     ifru_ivalue;
+                compat_int_t     ifru_mtu;
+                struct  compat_ifmap ifru_map;
+                char    ifru_slave[IFNAMSIZ];   /* Just fits the size */
+		char	ifru_newname[IFNAMSIZ];
+                compat_caddr_t ifru_data;
+	    /* XXXX? ifru_settings should be here */
+        } ifr_ifru;
+};
+
+struct compat_ifconf {
+        compat_int_t	ifc_len;                        /* size of buffer       */
+        compat_caddr_t  ifcbuf;
+};
+
 struct compat_robust_list {
 	compat_uptr_t			next;
 };
-- 
cgit v1.2.3


From b622d97a63ad4ce890b625c62acd1bb894592e63 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 6 Nov 2009 20:46:52 -0800
Subject: net: compat: No need to define IFHWADDRLEN and IFNAMSIZ twice.

It's defined colloqually in linux/if.h and linux/compat.h
includes that.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compat.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8311d2e29632..224c7a896172 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -166,8 +166,6 @@ struct compat_ifmap {
 };
 
 struct compat_ifreq {
-#define IFHWADDRLEN     6
-#define IFNAMSIZ        16
         union {
                 char    ifrn_name[IFNAMSIZ];            /* if name, e.g. "en0" */
         } ifr_ifrn;
-- 
cgit v1.2.3


From b215c57dcc847b15693899d26aa0ee4669dacefb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 5 Nov 2009 04:37:30 +0000
Subject: net: kill proto_ops wrapper

All users of wrapped proto_ops are now gone, so we can safely remove
the wrappers as well.

Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 83 -----------------------------------------------------
 1 file changed, 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 70ee3c310f15..6ce87663551c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -268,89 +268,6 @@ extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 extern int kernel_sock_shutdown(struct socket *sock,
 				enum sock_shutdown_cmd how);
 
-#ifndef CONFIG_SMP
-#define SOCKOPS_WRAPPED(name) name
-#define SOCKOPS_WRAP(name, fam)
-#else
-
-#define SOCKOPS_WRAPPED(name) __unlocked_##name
-
-#define SOCKCALL_WRAP(name, call, parms, args)		\
-static int __lock_##name##_##call  parms		\
-{							\
-	int ret;					\
-	lock_kernel();					\
-	ret = __unlocked_##name##_ops.call  args ;\
-	unlock_kernel();				\
-	return ret;					\
-}
-
-#define SOCKCALL_UWRAP(name, call, parms, args)		\
-static unsigned int __lock_##name##_##call  parms	\
-{							\
-	int ret;					\
-	lock_kernel();					\
-	ret = __unlocked_##name##_ops.call  args ;\
-	unlock_kernel();				\
-	return ret;					\
-}
-
-
-#define SOCKOPS_WRAP(name, fam)					\
-SOCKCALL_WRAP(name, release, (struct socket *sock), (sock))	\
-SOCKCALL_WRAP(name, bind, (struct socket *sock, struct sockaddr *uaddr, int addr_len), \
-	      (sock, uaddr, addr_len))				\
-SOCKCALL_WRAP(name, connect, (struct socket *sock, struct sockaddr * uaddr, \
-			      int addr_len, int flags), 	\
-	      (sock, uaddr, addr_len, flags))			\
-SOCKCALL_WRAP(name, socketpair, (struct socket *sock1, struct socket *sock2), \
-	      (sock1, sock2))					\
-SOCKCALL_WRAP(name, accept, (struct socket *sock, struct socket *newsock, \
-			 int flags), (sock, newsock, flags)) \
-SOCKCALL_WRAP(name, getname, (struct socket *sock, struct sockaddr *uaddr, \
-			 int *addr_len, int peer), (sock, uaddr, addr_len, peer)) \
-SOCKCALL_UWRAP(name, poll, (struct file *file, struct socket *sock, struct poll_table_struct *wait), \
-	      (file, sock, wait)) \
-SOCKCALL_WRAP(name, ioctl, (struct socket *sock, unsigned int cmd, \
-			 unsigned long arg), (sock, cmd, arg)) \
-SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \
-			 unsigned long arg), (sock, cmd, arg)) \
-SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \
-SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \
-SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \
-			 char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \
-SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \
-			 char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \
-SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \
-	      (iocb, sock, m, len)) \
-SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \
-	      (iocb, sock, m, len, flags)) \
-SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \
-	      (file, sock, vma)) \
-	      \
-static const struct proto_ops name##_ops = {			\
-	.family		= fam,				\
-	.owner		= THIS_MODULE,			\
-	.release	= __lock_##name##_release,	\
-	.bind		= __lock_##name##_bind,		\
-	.connect	= __lock_##name##_connect,	\
-	.socketpair	= __lock_##name##_socketpair,	\
-	.accept		= __lock_##name##_accept,	\
-	.getname	= __lock_##name##_getname,	\
-	.poll		= __lock_##name##_poll,		\
-	.ioctl		= __lock_##name##_ioctl,	\
-	.compat_ioctl	= __lock_##name##_compat_ioctl,	\
-	.listen		= __lock_##name##_listen,	\
-	.shutdown	= __lock_##name##_shutdown,	\
-	.setsockopt	= __lock_##name##_setsockopt,	\
-	.getsockopt	= __lock_##name##_getsockopt,	\
-	.sendmsg	= __lock_##name##_sendmsg,	\
-	.recvmsg	= __lock_##name##_recvmsg,	\
-	.mmap		= __lock_##name##_mmap,		\
-};
-
-#endif
-
 #define MODULE_ALIAS_NETPROTO(proto) \
 	MODULE_ALIAS("net-pf-" __stringify(proto))
 
-- 
cgit v1.2.3


From 3806e94b0148350c72f9a3214274026b6ca03f49 Mon Sep 17 00:00:00 2001
From: Crane Cai <crane.cai@amd.com>
Date: Sat, 7 Nov 2009 13:10:46 +0100
Subject: i2c-piix4: Modify code name SB900 to Hudson-2

Change SB900 to its formal code name Hudson-2.

Signed-off-by: Crane Cai <crane.cai@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/busses/i2c-piix4 | 2 +-
 drivers/i2c/busses/Kconfig         | 2 +-
 drivers/i2c/busses/i2c-piix4.c     | 8 ++++----
 include/linux/pci_ids.h            | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index c5b37c570554..ac540c71c7eb 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -8,7 +8,7 @@ Supported adapters:
     Datasheet: Only available via NDA from ServerWorks
   * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
     Datasheet: Not publicly available
-  * AMD SB900
+  * AMD Hudson-2
     Datasheet: Not publicly available
   * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
     Datasheet: Publicly available at the SMSC website http://www.smsc.com
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 737335ff2b21..e8fe7f169e25 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -128,7 +128,7 @@ config I2C_PIIX4
 	    ATI SB600
 	    ATI SB700
 	    ATI SB800
-	    AMD SB900
+	    AMD Hudson-2
 	    Serverworks OSB4
 	    Serverworks CSB5
 	    Serverworks CSB6
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index d26a972aacaa..1e245e9cad31 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -22,7 +22,7 @@
 	Intel PIIX4, 440MX
 	Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100
 	ATI IXP200, IXP300, IXP400, SB600, SB700, SB800
-	AMD SB900
+	AMD Hudson-2
 	SMSC Victory66
 
    Note: we assume there can only be one device, with one SMBus interface.
@@ -233,9 +233,9 @@ static int __devinit piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 	unsigned short smba_idx = 0xcd6;
 	u8 smba_en_lo, smba_en_hi, i2ccfg, i2ccfg_offset = 0x10, smb_en = 0x2c;
 
-	/* SB800 SMBus does not support forcing address */
+	/* SB800 and later SMBus does not support forcing address */
 	if (force || force_addr) {
-		dev_err(&PIIX4_dev->dev, "SB800 SMBus does not support "
+		dev_err(&PIIX4_dev->dev, "SMBus does not support "
 			"forcing address!\n");
 		return -EINVAL;
 	}
@@ -480,7 +480,7 @@ static struct pci_device_id piix4_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP300_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
 		     PCI_DEVICE_ID_SERVERWORKS_OSB4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b0f0f3851cd4..9ceb392cb984 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -543,7 +543,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
-#define PCI_DEVICE_ID_AMD_SB900_SMBUS	0x780b
+#define PCI_DEVICE_ID_AMD_HUDSON2_SMBUS	0x780b
 #define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
-- 
cgit v1.2.3


From afa08974fe80c198b8650f73ed8ab59135ca10d0 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 7 Nov 2009 13:10:46 +0100
Subject: i2c: Add an interface to lock/unlock an I2C bus segment

Some drivers need to be able to prevent access to an I2C bus segment
for a specific period of time. Add an interface for them to do so
without twiddling with i2c-core internals.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/sfc/sfe4001.c |  4 ++--
 include/linux/i2c.h       | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sfc/sfe4001.c b/drivers/net/sfc/sfe4001.c
index cee00ad49b57..49eb91b5f50c 100644
--- a/drivers/net/sfc/sfe4001.c
+++ b/drivers/net/sfc/sfe4001.c
@@ -188,7 +188,7 @@ static int sfn4111t_reset(struct efx_nic *efx)
 	efx_oword_t reg;
 
 	/* GPIO 3 and the GPIO register are shared with I2C, so block that */
-	mutex_lock(&efx->i2c_adap.bus_lock);
+	i2c_lock_adapter(&efx->i2c_adap);
 
 	/* Pull RST_N (GPIO 2) low then let it up again, setting the
 	 * FLASH_CFG_1 strap (GPIO 3) appropriately.  Only change the
@@ -204,7 +204,7 @@ static int sfn4111t_reset(struct efx_nic *efx)
 	falcon_write(efx, &reg, GPIO_CTL_REG_KER);
 	msleep(1);
 
-	mutex_unlock(&efx->i2c_adap.bus_lock);
+	i2c_unlock_adapter(&efx->i2c_adap);
 
 	ssleep(1);
 	return 0;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 57d41b0abce2..7b40cda57a70 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -361,6 +361,24 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 	dev_set_drvdata(&dev->dev, data);
 }
 
+/**
+ * i2c_lock_adapter - Prevent access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ */
+static inline void i2c_lock_adapter(struct i2c_adapter *adapter)
+{
+	mutex_lock(&adapter->bus_lock);
+}
+
+/**
+ * i2c_unlock_adapter - Reauthorize access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ */
+static inline void i2c_unlock_adapter(struct i2c_adapter *adapter)
+{
+	mutex_unlock(&adapter->bus_lock);
+}
+
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC	0x04		/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN	0x10		/* we have a ten bit chip address */
-- 
cgit v1.2.3


From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 6 Nov 2009 04:13:05 +0100
Subject: tracing, perf_events: Protect the buffer from recursion in perf

While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.

Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.

This is because of a recursion inside perf.

1) When a trace event is triggered, it will fill a per cpu
   buffer and submit it to perf.

2) Perf will commit this event but will also protect some data
   using rcu_read_lock

3) A recursion appears: rcu_read_lock triggers a lock_acquire
   event that will fill the per cpu event and then submit the
   buffer to perf.

4) Perf detects a recursion and ignores it

5) Perf continues its work on the previous event, but its buffer
   has been overwritten by the lock_acquire event, it has then
   been turned into a lock_acquire event of rcu read lock

Such scenario also happens with lock_release with
rcu_read_unlock().

We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.

This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.

-v2: Fixed lost whitespace, added reviewed-by tag

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  9 +++++--
 include/trace/ftrace.h             | 39 ++++++++++++++++++++++-------
 kernel/trace/trace_event_profile.c | 41 ++++++++++++++-----------------
 kernel/trace/trace_kprobe.c        | 50 ++++++++++++++++++++++++++++++++------
 kernel/trace/trace_syscalls.c      | 44 +++++++++++++++++++++++++++------
 5 files changed, 133 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f7b47c336703..43360c1d8f70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,8 +137,13 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-extern char			*trace_profile_buf;
-extern char			*trace_profile_buf_nmi;
+struct perf_trace_buf {
+	char	buf[FTRACE_MAX_PROFILE_SIZE];
+	int	recursion;
+};
+
+extern struct perf_trace_buf	*perf_trace_buf;
+extern struct perf_trace_buf	*perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a7f946094128..4945d1c99864 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	struct ftrace_event_call *event_call = &event_<call>;
  *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
+ *	struct perf_trace_buf *trace_buf;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
  *	struct trace_entry *ent;
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	__cpu = smp_processor_id();
  *
  *	if (in_nmi())
- *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *		trace_buf = rcu_dereference(perf_trace_buf_nmi);
  *	else
- *		raw_data = rcu_dereference(trace_profile_buf);
+ *		trace_buf = rcu_dereference(perf_trace_buf);
  *
- *	if (!raw_data)
+ *	if (!trace_buf)
  *		goto end;
  *
- *	raw_data = per_cpu_ptr(raw_data, __cpu);
+ *	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+ *
+ * 	// Avoid recursion from perf that could mess up the buffer
+ * 	if (trace_buf->recursion++)
+ *		goto end_recursion;
+ *
+ * 	raw_data = trace_buf->buf;
+ *
+ *	// Make recursion update visible before entering perf_tp_event
+ *	// so that we protect from perf recursions.
+ *
+ *	barrier();
  *
  *	//zero dead bytes from alignment to avoid stack leak to userspace:
  *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tp_event(int, u64, u64, void *, int);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
+	struct perf_trace_buf *trace_buf;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
 	struct trace_entry *ent;					\
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto)				\
 	__cpu = smp_processor_id();					\
 									\
 	if (in_nmi())							\
-		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);	\
 	else								\
-		raw_data = rcu_dereference(trace_profile_buf);		\
+		trace_buf = rcu_dereference(perf_trace_buf);		\
 									\
-	if (!raw_data)							\
+	if (!trace_buf)							\
 		goto end;						\
 									\
-	raw_data = per_cpu_ptr(raw_data, __cpu);			\
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);			\
+	if (trace_buf->recursion++)					\
+		goto end_recursion;					\
+									\
+	barrier();							\
+									\
+	raw_data = trace_buf->buf;					\
 									\
 	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
 	entry = (struct ftrace_raw_##call *)raw_data;			\
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
+end_recursion:								\
+	trace_buf->recursion--;						\
 end:									\
 	local_irq_restore(irq_flags);					\
 									\
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index c9f687ab0d4f..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,41 +8,36 @@
 #include <linux/module.h>
 #include "trace.h"
 
-/*
- * We can't use a size but a type in alloc_percpu()
- * So let's create a dummy type that matches the desired size
- */
-typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
 
-char		*trace_profile_buf;
-EXPORT_SYMBOL_GPL(trace_profile_buf);
+struct perf_trace_buf *perf_trace_buf;
+EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-char		*trace_profile_buf_nmi;
-EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+struct perf_trace_buf *perf_trace_buf_nmi;
+EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
 
 /* Count the events in use (per event id, not per instance) */
 static int	total_profile_count;
 
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
-	char *buf;
+	struct perf_trace_buf *buf;
 	int ret = -ENOMEM;
 
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
 	if (!total_profile_count) {
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = alloc_percpu(struct perf_trace_buf);
 		if (!buf)
 			goto fail_buf;
 
-		rcu_assign_pointer(trace_profile_buf, buf);
+		rcu_assign_pointer(perf_trace_buf, buf);
 
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = alloc_percpu(struct perf_trace_buf);
 		if (!buf)
 			goto fail_buf_nmi;
 
-		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+		rcu_assign_pointer(perf_trace_buf_nmi, buf);
 	}
 
 	ret = event->profile_enable(event);
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 
 fail_buf_nmi:
 	if (!total_profile_count) {
-		free_percpu(trace_profile_buf_nmi);
-		free_percpu(trace_profile_buf);
-		trace_profile_buf_nmi = NULL;
-		trace_profile_buf = NULL;
+		free_percpu(perf_trace_buf_nmi);
+		free_percpu(perf_trace_buf);
+		perf_trace_buf_nmi = NULL;
+		perf_trace_buf = NULL;
 	}
 fail_buf:
 	atomic_dec(&event->profile_count);
@@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
-	char *buf, *nmi_buf;
+	struct perf_trace_buf *buf, *nmi_buf;
 
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
@@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 	event->profile_disable(event);
 
 	if (!--total_profile_count) {
-		buf = trace_profile_buf;
-		rcu_assign_pointer(trace_profile_buf, NULL);
+		buf = perf_trace_buf;
+		rcu_assign_pointer(perf_trace_buf, NULL);
 
-		nmi_buf = trace_profile_buf_nmi;
-		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+		nmi_buf = perf_trace_buf_nmi;
+		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
 
 		/*
 		 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index cf17a6694f32..3696476f307d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
+	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
@@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, __cpu);
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
+
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 	entry = (struct kprobe_trace_entry *)raw_data;
@@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
+
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(irq_flags);
+
 	return 0;
 }
 
@@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
+	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
@@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, __cpu);
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
+
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 	entry = (struct kretprobe_trace_entry *)raw_data;
@@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(irq_flags);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
+	struct perf_trace_buf *trace_buf;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
 	char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 			       (unsigned long *)&rec->args);
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(flags);
 }
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
+	struct perf_trace_buf *trace_buf;
 	unsigned long flags;
 	int syscall_nr;
 	char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From e0000163e30eeb112b41486ea113fd54f64e1f17 Mon Sep 17 00:00:00 2001
From: Christian Pellegrin <chripell@fsfe.org>
Date: Mon, 2 Nov 2009 23:07:00 +0000
Subject: can: Driver for the Microchip MCP251x SPI CAN controllers

Signed-off-by: Christian Pellegrin <chripell@fsfe.org>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig              |    6 +
 drivers/net/can/Makefile             |    1 +
 drivers/net/can/mcp251x.c            | 1164 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/mcp251x.h |   36 ++
 4 files changed, 1207 insertions(+)
 create mode 100644 drivers/net/can/mcp251x.c
 create mode 100644 include/linux/can/platform/mcp251x.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 26d77cc0ded7..b819cc2a429e 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -102,6 +102,12 @@ config CAN_TI_HECC
 	  Driver for TI HECC (High End CAN Controller) module found on many
 	  TI devices. The device specifications are available from www.ti.com
 
+config CAN_MCP251X
+	tristate "Microchip MCP251x SPI CAN controllers"
+	depends on CAN_DEV && SPI
+	---help---
+	  Driver for the Microchip MCP251x SPI CAN controllers.
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 31f4ab5df28b..14891817ea5b 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -12,5 +12,6 @@ obj-y				+= usb/
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
+obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
new file mode 100644
index 000000000000..8f48f4b50b7c
--- /dev/null
+++ b/drivers/net/can/mcp251x.c
@@ -0,0 +1,1164 @@
+/*
+ * CAN bus driver for Microchip 251x CAN Controller with SPI Interface
+ *
+ * MCP2510 support and bug fixes by Christian Pellegrin
+ * <chripell@evolware.org>
+ *
+ * Copyright 2009 Christian Pellegrin EVOL S.r.l.
+ *
+ * Copyright 2007 Raymarine UK, Ltd. All Rights Reserved.
+ * Written under contract by:
+ *   Chris Elston, Katalix Systems, Ltd.
+ *
+ * Based on Microchip MCP251x CAN controller driver written by
+ * David Vrabel, Copyright 2006 Arcom Control Systems Ltd.
+ *
+ * Based on CAN bus driver for the CCAN controller written by
+ * - Sascha Hauer, Marc Kleine-Budde, Pengutronix
+ * - Simon Kallweit, intefo AG
+ * Copyright 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ *
+ * Your platform definition file should specify something like:
+ *
+ * static struct mcp251x_platform_data mcp251x_info = {
+ *         .oscillator_frequency = 8000000,
+ *         .board_specific_setup = &mcp251x_setup,
+ *         .model = CAN_MCP251X_MCP2510,
+ *         .power_enable = mcp251x_power_enable,
+ *         .transceiver_enable = NULL,
+ * };
+ *
+ * static struct spi_board_info spi_board_info[] = {
+ *         {
+ *                 .modalias = "mcp251x",
+ *                 .platform_data = &mcp251x_info,
+ *                 .irq = IRQ_EINT13,
+ *                 .max_speed_hz = 2*1000*1000,
+ *                 .chip_select = 2,
+ *         },
+ * };
+ *
+ * Please see mcp251x.h for a description of the fields in
+ * struct mcp251x_platform_data.
+ *
+ */
+
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/dev.h>
+#include <linux/can/platform/mcp251x.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/freezer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/uaccess.h>
+
+/* SPI interface instruction set */
+#define INSTRUCTION_WRITE	0x02
+#define INSTRUCTION_READ	0x03
+#define INSTRUCTION_BIT_MODIFY	0x05
+#define INSTRUCTION_LOAD_TXB(n)	(0x40 + 2 * (n))
+#define INSTRUCTION_READ_RXB(n)	(((n) == 0) ? 0x90 : 0x94)
+#define INSTRUCTION_RESET	0xC0
+
+/* MPC251x registers */
+#define CANSTAT	      0x0e
+#define CANCTRL	      0x0f
+#  define CANCTRL_REQOP_MASK	    0xe0
+#  define CANCTRL_REQOP_CONF	    0x80
+#  define CANCTRL_REQOP_LISTEN_ONLY 0x60
+#  define CANCTRL_REQOP_LOOPBACK    0x40
+#  define CANCTRL_REQOP_SLEEP	    0x20
+#  define CANCTRL_REQOP_NORMAL	    0x00
+#  define CANCTRL_OSM		    0x08
+#  define CANCTRL_ABAT		    0x10
+#define TEC	      0x1c
+#define REC	      0x1d
+#define CNF1	      0x2a
+#  define CNF1_SJW_SHIFT   6
+#define CNF2	      0x29
+#  define CNF2_BTLMODE	   0x80
+#  define CNF2_SAM         0x40
+#  define CNF2_PS1_SHIFT   3
+#define CNF3	      0x28
+#  define CNF3_SOF	   0x08
+#  define CNF3_WAKFIL	   0x04
+#  define CNF3_PHSEG2_MASK 0x07
+#define CANINTE	      0x2b
+#  define CANINTE_MERRE 0x80
+#  define CANINTE_WAKIE 0x40
+#  define CANINTE_ERRIE 0x20
+#  define CANINTE_TX2IE 0x10
+#  define CANINTE_TX1IE 0x08
+#  define CANINTE_TX0IE 0x04
+#  define CANINTE_RX1IE 0x02
+#  define CANINTE_RX0IE 0x01
+#define CANINTF	      0x2c
+#  define CANINTF_MERRF 0x80
+#  define CANINTF_WAKIF 0x40
+#  define CANINTF_ERRIF 0x20
+#  define CANINTF_TX2IF 0x10
+#  define CANINTF_TX1IF 0x08
+#  define CANINTF_TX0IF 0x04
+#  define CANINTF_RX1IF 0x02
+#  define CANINTF_RX0IF 0x01
+#define EFLG	      0x2d
+#  define EFLG_EWARN	0x01
+#  define EFLG_RXWAR	0x02
+#  define EFLG_TXWAR	0x04
+#  define EFLG_RXEP	0x08
+#  define EFLG_TXEP	0x10
+#  define EFLG_TXBO	0x20
+#  define EFLG_RX0OVR	0x40
+#  define EFLG_RX1OVR	0x80
+#define TXBCTRL(n)  (((n) * 0x10) + 0x30 + TXBCTRL_OFF)
+#  define TXBCTRL_ABTF	0x40
+#  define TXBCTRL_MLOA	0x20
+#  define TXBCTRL_TXERR 0x10
+#  define TXBCTRL_TXREQ 0x08
+#define TXBSIDH(n)  (((n) * 0x10) + 0x30 + TXBSIDH_OFF)
+#  define SIDH_SHIFT    3
+#define TXBSIDL(n)  (((n) * 0x10) + 0x30 + TXBSIDL_OFF)
+#  define SIDL_SID_MASK    7
+#  define SIDL_SID_SHIFT   5
+#  define SIDL_EXIDE_SHIFT 3
+#  define SIDL_EID_SHIFT   16
+#  define SIDL_EID_MASK    3
+#define TXBEID8(n)  (((n) * 0x10) + 0x30 + TXBEID8_OFF)
+#define TXBEID0(n)  (((n) * 0x10) + 0x30 + TXBEID0_OFF)
+#define TXBDLC(n)   (((n) * 0x10) + 0x30 + TXBDLC_OFF)
+#  define DLC_RTR_SHIFT    6
+#define TXBCTRL_OFF 0
+#define TXBSIDH_OFF 1
+#define TXBSIDL_OFF 2
+#define TXBEID8_OFF 3
+#define TXBEID0_OFF 4
+#define TXBDLC_OFF  5
+#define TXBDAT_OFF  6
+#define RXBCTRL(n)  (((n) * 0x10) + 0x60 + RXBCTRL_OFF)
+#  define RXBCTRL_BUKT	0x04
+#  define RXBCTRL_RXM0	0x20
+#  define RXBCTRL_RXM1	0x40
+#define RXBSIDH(n)  (((n) * 0x10) + 0x60 + RXBSIDH_OFF)
+#  define RXBSIDH_SHIFT 3
+#define RXBSIDL(n)  (((n) * 0x10) + 0x60 + RXBSIDL_OFF)
+#  define RXBSIDL_IDE   0x08
+#  define RXBSIDL_EID   3
+#  define RXBSIDL_SHIFT 5
+#define RXBEID8(n)  (((n) * 0x10) + 0x60 + RXBEID8_OFF)
+#define RXBEID0(n)  (((n) * 0x10) + 0x60 + RXBEID0_OFF)
+#define RXBDLC(n)   (((n) * 0x10) + 0x60 + RXBDLC_OFF)
+#  define RXBDLC_LEN_MASK  0x0f
+#  define RXBDLC_RTR       0x40
+#define RXBCTRL_OFF 0
+#define RXBSIDH_OFF 1
+#define RXBSIDL_OFF 2
+#define RXBEID8_OFF 3
+#define RXBEID0_OFF 4
+#define RXBDLC_OFF  5
+#define RXBDAT_OFF  6
+
+#define GET_BYTE(val, byte)			\
+	(((val) >> ((byte) * 8)) & 0xff)
+#define SET_BYTE(val, byte)			\
+	(((val) & 0xff) << ((byte) * 8))
+
+/*
+ * Buffer size required for the largest SPI transfer (i.e., reading a
+ * frame)
+ */
+#define CAN_FRAME_MAX_DATA_LEN	8
+#define SPI_TRANSFER_BUF_LEN	(6 + CAN_FRAME_MAX_DATA_LEN)
+#define CAN_FRAME_MAX_BITS	128
+
+#define TX_ECHO_SKB_MAX	1
+
+#define DEVICE_NAME "mcp251x"
+
+static int mcp251x_enable_dma; /* Enable SPI DMA. Default: 0 (Off) */
+module_param(mcp251x_enable_dma, int, S_IRUGO);
+MODULE_PARM_DESC(mcp251x_enable_dma, "Enable SPI DMA. Default: 0 (Off)");
+
+static struct can_bittiming_const mcp251x_bittiming_const = {
+	.name = DEVICE_NAME,
+	.tseg1_min = 3,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 64,
+	.brp_inc = 1,
+};
+
+struct mcp251x_priv {
+	struct can_priv	   can;
+	struct net_device *net;
+	struct spi_device *spi;
+
+	struct mutex spi_lock; /* SPI buffer lock */
+	u8 *spi_tx_buf;
+	u8 *spi_rx_buf;
+	dma_addr_t spi_tx_dma;
+	dma_addr_t spi_rx_dma;
+
+	struct sk_buff *tx_skb;
+	int tx_len;
+	struct workqueue_struct *wq;
+	struct work_struct tx_work;
+	struct work_struct irq_work;
+	struct completion awake;
+	int wake;
+	int force_quit;
+	int after_suspend;
+#define AFTER_SUSPEND_UP 1
+#define AFTER_SUSPEND_DOWN 2
+#define AFTER_SUSPEND_POWER 4
+#define AFTER_SUSPEND_RESTART 8
+	int restart_tx;
+};
+
+static void mcp251x_clean(struct net_device *net)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+
+	net->stats.tx_errors++;
+	if (priv->tx_skb)
+		dev_kfree_skb(priv->tx_skb);
+	if (priv->tx_len)
+		can_free_echo_skb(priv->net, 0);
+	priv->tx_skb = NULL;
+	priv->tx_len = 0;
+}
+
+/*
+ * Note about handling of error return of mcp251x_spi_trans: accessing
+ * registers via SPI is not really different conceptually than using
+ * normal I/O assembler instructions, although it's much more
+ * complicated from a practical POV. So it's not advisable to always
+ * check the return value of this function. Imagine that every
+ * read{b,l}, write{b,l} and friends would be bracketed in "if ( < 0)
+ * error();", it would be a great mess (well there are some situation
+ * when exception handling C++ like could be useful after all). So we
+ * just check that transfers are OK at the beginning of our
+ * conversation with the chip and to avoid doing really nasty things
+ * (like injecting bogus packets in the network stack).
+ */
+static int mcp251x_spi_trans(struct spi_device *spi, int len)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	struct spi_transfer t = {
+		.tx_buf = priv->spi_tx_buf,
+		.rx_buf = priv->spi_rx_buf,
+		.len = len,
+		.cs_change = 0,
+	};
+	struct spi_message m;
+	int ret;
+
+	spi_message_init(&m);
+
+	if (mcp251x_enable_dma) {
+		t.tx_dma = priv->spi_tx_dma;
+		t.rx_dma = priv->spi_rx_dma;
+		m.is_dma_mapped = 1;
+	}
+
+	spi_message_add_tail(&t, &m);
+
+	ret = spi_sync(spi, &m);
+	if (ret)
+		dev_err(&spi->dev, "spi transfer failed: ret = %d\n", ret);
+	return ret;
+}
+
+static u8 mcp251x_read_reg(struct spi_device *spi, uint8_t reg)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	u8 val = 0;
+
+	mutex_lock(&priv->spi_lock);
+
+	priv->spi_tx_buf[0] = INSTRUCTION_READ;
+	priv->spi_tx_buf[1] = reg;
+
+	mcp251x_spi_trans(spi, 3);
+	val = priv->spi_rx_buf[2];
+
+	mutex_unlock(&priv->spi_lock);
+
+	return val;
+}
+
+static void mcp251x_write_reg(struct spi_device *spi, u8 reg, uint8_t val)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+
+	mutex_lock(&priv->spi_lock);
+
+	priv->spi_tx_buf[0] = INSTRUCTION_WRITE;
+	priv->spi_tx_buf[1] = reg;
+	priv->spi_tx_buf[2] = val;
+
+	mcp251x_spi_trans(spi, 3);
+
+	mutex_unlock(&priv->spi_lock);
+}
+
+static void mcp251x_write_bits(struct spi_device *spi, u8 reg,
+			       u8 mask, uint8_t val)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+
+	mutex_lock(&priv->spi_lock);
+
+	priv->spi_tx_buf[0] = INSTRUCTION_BIT_MODIFY;
+	priv->spi_tx_buf[1] = reg;
+	priv->spi_tx_buf[2] = mask;
+	priv->spi_tx_buf[3] = val;
+
+	mcp251x_spi_trans(spi, 4);
+
+	mutex_unlock(&priv->spi_lock);
+}
+
+static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf,
+				int len, int tx_buf_idx)
+{
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+
+	if (pdata->model == CAN_MCP251X_MCP2510) {
+		int i;
+
+		for (i = 1; i < TXBDAT_OFF + len; i++)
+			mcp251x_write_reg(spi, TXBCTRL(tx_buf_idx) + i,
+					  buf[i]);
+	} else {
+		mutex_lock(&priv->spi_lock);
+		memcpy(priv->spi_tx_buf, buf, TXBDAT_OFF + len);
+		mcp251x_spi_trans(spi, TXBDAT_OFF + len);
+		mutex_unlock(&priv->spi_lock);
+	}
+}
+
+static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame,
+			  int tx_buf_idx)
+{
+	u32 sid, eid, exide, rtr;
+	u8 buf[SPI_TRANSFER_BUF_LEN];
+
+	exide = (frame->can_id & CAN_EFF_FLAG) ? 1 : 0; /* Extended ID Enable */
+	if (exide)
+		sid = (frame->can_id & CAN_EFF_MASK) >> 18;
+	else
+		sid = frame->can_id & CAN_SFF_MASK; /* Standard ID */
+	eid = frame->can_id & CAN_EFF_MASK; /* Extended ID */
+	rtr = (frame->can_id & CAN_RTR_FLAG) ? 1 : 0; /* Remote transmission */
+
+	buf[TXBCTRL_OFF] = INSTRUCTION_LOAD_TXB(tx_buf_idx);
+	buf[TXBSIDH_OFF] = sid >> SIDH_SHIFT;
+	buf[TXBSIDL_OFF] = ((sid & SIDL_SID_MASK) << SIDL_SID_SHIFT) |
+		(exide << SIDL_EXIDE_SHIFT) |
+		((eid >> SIDL_EID_SHIFT) & SIDL_EID_MASK);
+	buf[TXBEID8_OFF] = GET_BYTE(eid, 1);
+	buf[TXBEID0_OFF] = GET_BYTE(eid, 0);
+	buf[TXBDLC_OFF] = (rtr << DLC_RTR_SHIFT) | frame->can_dlc;
+	memcpy(buf + TXBDAT_OFF, frame->data, frame->can_dlc);
+	mcp251x_hw_tx_frame(spi, buf, frame->can_dlc, tx_buf_idx);
+	mcp251x_write_reg(spi, TXBCTRL(tx_buf_idx), TXBCTRL_TXREQ);
+}
+
+static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
+				int buf_idx)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+
+	if (pdata->model == CAN_MCP251X_MCP2510) {
+		int i, len;
+
+		for (i = 1; i < RXBDAT_OFF; i++)
+			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
+		len = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
+		if (len > 8)
+			len = 8;
+		for (; i < (RXBDAT_OFF + len); i++)
+			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
+	} else {
+		mutex_lock(&priv->spi_lock);
+
+		priv->spi_tx_buf[RXBCTRL_OFF] = INSTRUCTION_READ_RXB(buf_idx);
+		mcp251x_spi_trans(spi, SPI_TRANSFER_BUF_LEN);
+		memcpy(buf, priv->spi_rx_buf, SPI_TRANSFER_BUF_LEN);
+
+		mutex_unlock(&priv->spi_lock);
+	}
+}
+
+static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	struct sk_buff *skb;
+	struct can_frame *frame;
+	u8 buf[SPI_TRANSFER_BUF_LEN];
+
+	skb = alloc_can_skb(priv->net, &frame);
+	if (!skb) {
+		dev_err(&spi->dev, "cannot allocate RX skb\n");
+		priv->net->stats.rx_dropped++;
+		return;
+	}
+
+	mcp251x_hw_rx_frame(spi, buf, buf_idx);
+	if (buf[RXBSIDL_OFF] & RXBSIDL_IDE) {
+		/* Extended ID format */
+		frame->can_id = CAN_EFF_FLAG;
+		frame->can_id |=
+			/* Extended ID part */
+			SET_BYTE(buf[RXBSIDL_OFF] & RXBSIDL_EID, 2) |
+			SET_BYTE(buf[RXBEID8_OFF], 1) |
+			SET_BYTE(buf[RXBEID0_OFF], 0) |
+			/* Standard ID part */
+			(((buf[RXBSIDH_OFF] << RXBSIDH_SHIFT) |
+			  (buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT)) << 18);
+		/* Remote transmission request */
+		if (buf[RXBDLC_OFF] & RXBDLC_RTR)
+			frame->can_id |= CAN_RTR_FLAG;
+	} else {
+		/* Standard ID format */
+		frame->can_id =
+			(buf[RXBSIDH_OFF] << RXBSIDH_SHIFT) |
+			(buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT);
+	}
+	/* Data length */
+	frame->can_dlc = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
+	if (frame->can_dlc > 8) {
+		dev_warn(&spi->dev, "invalid frame recevied\n");
+		priv->net->stats.rx_errors++;
+		dev_kfree_skb(skb);
+		return;
+	}
+	memcpy(frame->data, buf + RXBDAT_OFF, frame->can_dlc);
+
+	priv->net->stats.rx_packets++;
+	priv->net->stats.rx_bytes += frame->can_dlc;
+	netif_rx(skb);
+}
+
+static void mcp251x_hw_sleep(struct spi_device *spi)
+{
+	mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_SLEEP);
+}
+
+static void mcp251x_hw_wakeup(struct spi_device *spi)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+
+	priv->wake = 1;
+
+	/* Can only wake up by generating a wake-up interrupt. */
+	mcp251x_write_bits(spi, CANINTE, CANINTE_WAKIE, CANINTE_WAKIE);
+	mcp251x_write_bits(spi, CANINTF, CANINTF_WAKIF, CANINTF_WAKIF);
+
+	/* Wait until the device is awake */
+	if (!wait_for_completion_timeout(&priv->awake, HZ))
+		dev_err(&spi->dev, "MCP251x didn't wake-up\n");
+}
+
+static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *net)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+	struct spi_device *spi = priv->spi;
+
+	if (priv->tx_skb || priv->tx_len) {
+		dev_warn(&spi->dev, "hard_xmit called while tx busy\n");
+		netif_stop_queue(net);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (skb->len != sizeof(struct can_frame)) {
+		dev_err(&spi->dev, "dropping packet - bad length\n");
+		dev_kfree_skb(skb);
+		net->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	netif_stop_queue(net);
+	priv->tx_skb = skb;
+	net->trans_start = jiffies;
+	queue_work(priv->wq, &priv->tx_work);
+
+	return NETDEV_TX_OK;
+}
+
+static int mcp251x_do_set_mode(struct net_device *net, enum can_mode mode)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+
+	switch (mode) {
+	case CAN_MODE_START:
+		/* We have to delay work since SPI I/O may sleep */
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+		priv->restart_tx = 1;
+		if (priv->can.restart_ms == 0)
+			priv->after_suspend = AFTER_SUSPEND_RESTART;
+		queue_work(priv->wq, &priv->irq_work);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void mcp251x_set_normal_mode(struct spi_device *spi)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	unsigned long timeout;
+
+	/* Enable interrupts */
+	mcp251x_write_reg(spi, CANINTE,
+			  CANINTE_ERRIE | CANINTE_TX2IE | CANINTE_TX1IE |
+			  CANINTE_TX0IE | CANINTE_RX1IE | CANINTE_RX0IE |
+			  CANINTF_MERRF);
+
+	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
+		/* Put device into loopback mode */
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LOOPBACK);
+	} else {
+		/* Put device into normal mode */
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL);
+
+		/* Wait for the device to enter normal mode */
+		timeout = jiffies + HZ;
+		while (mcp251x_read_reg(spi, CANSTAT) & CANCTRL_REQOP_MASK) {
+			schedule();
+			if (time_after(jiffies, timeout)) {
+				dev_err(&spi->dev, "MCP251x didn't"
+					" enter in normal mode\n");
+				return;
+			}
+		}
+	}
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+static int mcp251x_do_set_bittiming(struct net_device *net)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	struct spi_device *spi = priv->spi;
+
+	mcp251x_write_reg(spi, CNF1, ((bt->sjw - 1) << CNF1_SJW_SHIFT) |
+			  (bt->brp - 1));
+	mcp251x_write_reg(spi, CNF2, CNF2_BTLMODE |
+			  (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES ?
+			   CNF2_SAM : 0) |
+			  ((bt->phase_seg1 - 1) << CNF2_PS1_SHIFT) |
+			  (bt->prop_seg - 1));
+	mcp251x_write_bits(spi, CNF3, CNF3_PHSEG2_MASK,
+			   (bt->phase_seg2 - 1));
+	dev_info(&spi->dev, "CNF: 0x%02x 0x%02x 0x%02x\n",
+		 mcp251x_read_reg(spi, CNF1),
+		 mcp251x_read_reg(spi, CNF2),
+		 mcp251x_read_reg(spi, CNF3));
+
+	return 0;
+}
+
+static int mcp251x_setup(struct net_device *net, struct mcp251x_priv *priv,
+			 struct spi_device *spi)
+{
+	int ret;
+
+	ret = open_candev(net);
+	if (ret) {
+		dev_err(&spi->dev, "unable to set initial baudrate!\n");
+		return ret;
+	}
+
+	/* Enable RX0->RX1 buffer roll over and disable filters */
+	mcp251x_write_bits(spi, RXBCTRL(0),
+			   RXBCTRL_BUKT | RXBCTRL_RXM0 | RXBCTRL_RXM1,
+			   RXBCTRL_BUKT | RXBCTRL_RXM0 | RXBCTRL_RXM1);
+	mcp251x_write_bits(spi, RXBCTRL(1),
+			   RXBCTRL_RXM0 | RXBCTRL_RXM1,
+			   RXBCTRL_RXM0 | RXBCTRL_RXM1);
+	return 0;
+}
+
+static void mcp251x_hw_reset(struct spi_device *spi)
+{
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	int ret;
+
+	mutex_lock(&priv->spi_lock);
+
+	priv->spi_tx_buf[0] = INSTRUCTION_RESET;
+
+	ret = spi_write(spi, priv->spi_tx_buf, 1);
+
+	mutex_unlock(&priv->spi_lock);
+
+	if (ret)
+		dev_err(&spi->dev, "reset failed: ret = %d\n", ret);
+	/* Wait for reset to finish */
+	mdelay(10);
+}
+
+static int mcp251x_hw_probe(struct spi_device *spi)
+{
+	int st1, st2;
+
+	mcp251x_hw_reset(spi);
+
+	/*
+	 * Please note that these are "magic values" based on after
+	 * reset defaults taken from data sheet which allows us to see
+	 * if we really have a chip on the bus (we avoid common all
+	 * zeroes or all ones situations)
+	 */
+	st1 = mcp251x_read_reg(spi, CANSTAT) & 0xEE;
+	st2 = mcp251x_read_reg(spi, CANCTRL) & 0x17;
+
+	dev_dbg(&spi->dev, "CANSTAT 0x%02x CANCTRL 0x%02x\n", st1, st2);
+
+	/* Check for power up default values */
+	return (st1 == 0x80 && st2 == 0x07) ? 1 : 0;
+}
+
+static irqreturn_t mcp251x_can_isr(int irq, void *dev_id)
+{
+	struct net_device *net = (struct net_device *)dev_id;
+	struct mcp251x_priv *priv = netdev_priv(net);
+
+	/* Schedule bottom half */
+	if (!work_pending(&priv->irq_work))
+		queue_work(priv->wq, &priv->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static int mcp251x_open(struct net_device *net)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+	struct spi_device *spi = priv->spi;
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	int ret;
+
+	if (pdata->transceiver_enable)
+		pdata->transceiver_enable(1);
+
+	priv->force_quit = 0;
+	priv->tx_skb = NULL;
+	priv->tx_len = 0;
+
+	ret = request_irq(spi->irq, mcp251x_can_isr,
+			  IRQF_TRIGGER_FALLING, DEVICE_NAME, net);
+	if (ret) {
+		dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq);
+		if (pdata->transceiver_enable)
+			pdata->transceiver_enable(0);
+		return ret;
+	}
+
+	mcp251x_hw_wakeup(spi);
+	mcp251x_hw_reset(spi);
+	ret = mcp251x_setup(net, priv, spi);
+	if (ret) {
+		free_irq(spi->irq, net);
+		if (pdata->transceiver_enable)
+			pdata->transceiver_enable(0);
+		return ret;
+	}
+	mcp251x_set_normal_mode(spi);
+	netif_wake_queue(net);
+
+	return 0;
+}
+
+static int mcp251x_stop(struct net_device *net)
+{
+	struct mcp251x_priv *priv = netdev_priv(net);
+	struct spi_device *spi = priv->spi;
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+
+	close_candev(net);
+
+	/* Disable and clear pending interrupts */
+	mcp251x_write_reg(spi, CANINTE, 0x00);
+	mcp251x_write_reg(spi, CANINTF, 0x00);
+
+	priv->force_quit = 1;
+	free_irq(spi->irq, net);
+	flush_workqueue(priv->wq);
+
+	mcp251x_write_reg(spi, TXBCTRL(0), 0);
+	if (priv->tx_skb || priv->tx_len)
+		mcp251x_clean(net);
+
+	mcp251x_hw_sleep(spi);
+
+	if (pdata->transceiver_enable)
+		pdata->transceiver_enable(0);
+
+	priv->can.state = CAN_STATE_STOPPED;
+
+	return 0;
+}
+
+static void mcp251x_tx_work_handler(struct work_struct *ws)
+{
+	struct mcp251x_priv *priv = container_of(ws, struct mcp251x_priv,
+						 tx_work);
+	struct spi_device *spi = priv->spi;
+	struct net_device *net = priv->net;
+	struct can_frame *frame;
+
+	if (priv->tx_skb) {
+		frame = (struct can_frame *)priv->tx_skb->data;
+
+		if (priv->can.state == CAN_STATE_BUS_OFF) {
+			mcp251x_clean(net);
+			netif_wake_queue(net);
+			return;
+		}
+		if (frame->can_dlc > CAN_FRAME_MAX_DATA_LEN)
+			frame->can_dlc = CAN_FRAME_MAX_DATA_LEN;
+		mcp251x_hw_tx(spi, frame, 0);
+		priv->tx_len = 1 + frame->can_dlc;
+		can_put_echo_skb(priv->tx_skb, net, 0);
+		priv->tx_skb = NULL;
+	}
+}
+
+static void mcp251x_irq_work_handler(struct work_struct *ws)
+{
+	struct mcp251x_priv *priv = container_of(ws, struct mcp251x_priv,
+						 irq_work);
+	struct spi_device *spi = priv->spi;
+	struct net_device *net = priv->net;
+	u8 txbnctrl;
+	u8 intf;
+	enum can_state new_state;
+
+	if (priv->after_suspend) {
+		mdelay(10);
+		mcp251x_hw_reset(spi);
+		mcp251x_setup(net, priv, spi);
+		if (priv->after_suspend & AFTER_SUSPEND_RESTART) {
+			mcp251x_set_normal_mode(spi);
+		} else if (priv->after_suspend & AFTER_SUSPEND_UP) {
+			netif_device_attach(net);
+			/* Clean since we lost tx buffer */
+			if (priv->tx_skb || priv->tx_len) {
+				mcp251x_clean(net);
+				netif_wake_queue(net);
+			}
+			mcp251x_set_normal_mode(spi);
+		} else {
+			mcp251x_hw_sleep(spi);
+		}
+		priv->after_suspend = 0;
+	}
+
+	if (priv->can.restart_ms == 0 && priv->can.state == CAN_STATE_BUS_OFF)
+		return;
+
+	while (!priv->force_quit && !freezing(current)) {
+		u8 eflag = mcp251x_read_reg(spi, EFLG);
+		int can_id = 0, data1 = 0;
+
+		mcp251x_write_reg(spi, EFLG, 0x00);
+
+		if (priv->restart_tx) {
+			priv->restart_tx = 0;
+			mcp251x_write_reg(spi, TXBCTRL(0), 0);
+			if (priv->tx_skb || priv->tx_len)
+				mcp251x_clean(net);
+			netif_wake_queue(net);
+			can_id |= CAN_ERR_RESTARTED;
+		}
+
+		if (priv->wake) {
+			/* Wait whilst the device wakes up */
+			mdelay(10);
+			priv->wake = 0;
+		}
+
+		intf = mcp251x_read_reg(spi, CANINTF);
+		mcp251x_write_bits(spi, CANINTF, intf, 0x00);
+
+		/* Update can state */
+		if (eflag & EFLG_TXBO) {
+			new_state = CAN_STATE_BUS_OFF;
+			can_id |= CAN_ERR_BUSOFF;
+		} else if (eflag & EFLG_TXEP) {
+			new_state = CAN_STATE_ERROR_PASSIVE;
+			can_id |= CAN_ERR_CRTL;
+			data1 |= CAN_ERR_CRTL_TX_PASSIVE;
+		} else if (eflag & EFLG_RXEP) {
+			new_state = CAN_STATE_ERROR_PASSIVE;
+			can_id |= CAN_ERR_CRTL;
+			data1 |= CAN_ERR_CRTL_RX_PASSIVE;
+		} else if (eflag & EFLG_TXWAR) {
+			new_state = CAN_STATE_ERROR_WARNING;
+			can_id |= CAN_ERR_CRTL;
+			data1 |= CAN_ERR_CRTL_TX_WARNING;
+		} else if (eflag & EFLG_RXWAR) {
+			new_state = CAN_STATE_ERROR_WARNING;
+			can_id |= CAN_ERR_CRTL;
+			data1 |= CAN_ERR_CRTL_RX_WARNING;
+		} else {
+			new_state = CAN_STATE_ERROR_ACTIVE;
+		}
+
+		/* Update can state statistics */
+		switch (priv->can.state) {
+		case CAN_STATE_ERROR_ACTIVE:
+			if (new_state >= CAN_STATE_ERROR_WARNING &&
+			    new_state <= CAN_STATE_BUS_OFF)
+				priv->can.can_stats.error_warning++;
+		case CAN_STATE_ERROR_WARNING:	/* fallthrough */
+			if (new_state >= CAN_STATE_ERROR_PASSIVE &&
+			    new_state <= CAN_STATE_BUS_OFF)
+				priv->can.can_stats.error_passive++;
+			break;
+		default:
+			break;
+		}
+		priv->can.state = new_state;
+
+		if ((intf & CANINTF_ERRIF) || (can_id & CAN_ERR_RESTARTED)) {
+			struct sk_buff *skb;
+			struct can_frame *frame;
+
+			/* Create error frame */
+			skb = alloc_can_err_skb(net, &frame);
+			if (skb) {
+				/* Set error frame flags based on bus state */
+				frame->can_id = can_id;
+				frame->data[1] = data1;
+
+				/* Update net stats for overflows */
+				if (eflag & (EFLG_RX0OVR | EFLG_RX1OVR)) {
+					if (eflag & EFLG_RX0OVR)
+						net->stats.rx_over_errors++;
+					if (eflag & EFLG_RX1OVR)
+						net->stats.rx_over_errors++;
+					frame->can_id |= CAN_ERR_CRTL;
+					frame->data[1] |=
+						CAN_ERR_CRTL_RX_OVERFLOW;
+				}
+
+				netif_rx(skb);
+			} else {
+				dev_info(&spi->dev,
+					 "cannot allocate error skb\n");
+			}
+		}
+
+		if (priv->can.state == CAN_STATE_BUS_OFF) {
+			if (priv->can.restart_ms == 0) {
+				can_bus_off(net);
+				mcp251x_hw_sleep(spi);
+				return;
+			}
+		}
+
+		if (intf == 0)
+			break;
+
+		if (intf & CANINTF_WAKIF)
+			complete(&priv->awake);
+
+		if (intf & CANINTF_MERRF) {
+			/* If there are pending Tx buffers, restart queue */
+			txbnctrl = mcp251x_read_reg(spi, TXBCTRL(0));
+			if (!(txbnctrl & TXBCTRL_TXREQ)) {
+				if (priv->tx_skb || priv->tx_len)
+					mcp251x_clean(net);
+				netif_wake_queue(net);
+			}
+		}
+
+		if (intf & (CANINTF_TX2IF | CANINTF_TX1IF | CANINTF_TX0IF)) {
+			net->stats.tx_packets++;
+			net->stats.tx_bytes += priv->tx_len - 1;
+			if (priv->tx_len) {
+				can_get_echo_skb(net, 0);
+				priv->tx_len = 0;
+			}
+			netif_wake_queue(net);
+		}
+
+		if (intf & CANINTF_RX0IF)
+			mcp251x_hw_rx(spi, 0);
+
+		if (intf & CANINTF_RX1IF)
+			mcp251x_hw_rx(spi, 1);
+	}
+}
+
+static const struct net_device_ops mcp251x_netdev_ops = {
+	.ndo_open = mcp251x_open,
+	.ndo_stop = mcp251x_stop,
+	.ndo_start_xmit = mcp251x_hard_start_xmit,
+};
+
+static int __devinit mcp251x_can_probe(struct spi_device *spi)
+{
+	struct net_device *net;
+	struct mcp251x_priv *priv;
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	int ret = -ENODEV;
+
+	if (!pdata)
+		/* Platform data is required for osc freq */
+		goto error_out;
+
+	/* Allocate can/net device */
+	net = alloc_candev(sizeof(struct mcp251x_priv), TX_ECHO_SKB_MAX);
+	if (!net) {
+		ret = -ENOMEM;
+		goto error_alloc;
+	}
+
+	net->netdev_ops = &mcp251x_netdev_ops;
+	net->flags |= IFF_ECHO;
+
+	priv = netdev_priv(net);
+	priv->can.bittiming_const = &mcp251x_bittiming_const;
+	priv->can.do_set_mode = mcp251x_do_set_mode;
+	priv->can.clock.freq = pdata->oscillator_frequency / 2;
+	priv->can.do_set_bittiming = mcp251x_do_set_bittiming;
+	priv->net = net;
+	dev_set_drvdata(&spi->dev, priv);
+
+	priv->spi = spi;
+	mutex_init(&priv->spi_lock);
+
+	/* If requested, allocate DMA buffers */
+	if (mcp251x_enable_dma) {
+		spi->dev.coherent_dma_mask = ~0;
+
+		/*
+		 * Minimum coherent DMA allocation is PAGE_SIZE, so allocate
+		 * that much and share it between Tx and Rx DMA buffers.
+		 */
+		priv->spi_tx_buf = dma_alloc_coherent(&spi->dev,
+						      PAGE_SIZE,
+						      &priv->spi_tx_dma,
+						      GFP_DMA);
+
+		if (priv->spi_tx_buf) {
+			priv->spi_rx_buf = (u8 *)(priv->spi_tx_buf +
+						  (PAGE_SIZE / 2));
+			priv->spi_rx_dma = (dma_addr_t)(priv->spi_tx_dma +
+							(PAGE_SIZE / 2));
+		} else {
+			/* Fall back to non-DMA */
+			mcp251x_enable_dma = 0;
+		}
+	}
+
+	/* Allocate non-DMA buffers */
+	if (!mcp251x_enable_dma) {
+		priv->spi_tx_buf = kmalloc(SPI_TRANSFER_BUF_LEN, GFP_KERNEL);
+		if (!priv->spi_tx_buf) {
+			ret = -ENOMEM;
+			goto error_tx_buf;
+		}
+		priv->spi_rx_buf = kmalloc(SPI_TRANSFER_BUF_LEN, GFP_KERNEL);
+		if (!priv->spi_tx_buf) {
+			ret = -ENOMEM;
+			goto error_rx_buf;
+		}
+	}
+
+	if (pdata->power_enable)
+		pdata->power_enable(1);
+
+	/* Call out to platform specific setup */
+	if (pdata->board_specific_setup)
+		pdata->board_specific_setup(spi);
+
+	SET_NETDEV_DEV(net, &spi->dev);
+
+	priv->wq = create_freezeable_workqueue("mcp251x_wq");
+
+	INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
+	INIT_WORK(&priv->irq_work, mcp251x_irq_work_handler);
+
+	init_completion(&priv->awake);
+
+	/* Configure the SPI bus */
+	spi->mode = SPI_MODE_0;
+	spi->bits_per_word = 8;
+	spi_setup(spi);
+
+	if (!mcp251x_hw_probe(spi)) {
+		dev_info(&spi->dev, "Probe failed\n");
+		goto error_probe;
+	}
+	mcp251x_hw_sleep(spi);
+
+	if (pdata->transceiver_enable)
+		pdata->transceiver_enable(0);
+
+	ret = register_candev(net);
+	if (!ret) {
+		dev_info(&spi->dev, "probed\n");
+		return ret;
+	}
+error_probe:
+	if (!mcp251x_enable_dma)
+		kfree(priv->spi_rx_buf);
+error_rx_buf:
+	if (!mcp251x_enable_dma)
+		kfree(priv->spi_tx_buf);
+error_tx_buf:
+	free_candev(net);
+	if (mcp251x_enable_dma)
+		dma_free_coherent(&spi->dev, PAGE_SIZE,
+				  priv->spi_tx_buf, priv->spi_tx_dma);
+error_alloc:
+	if (pdata->power_enable)
+		pdata->power_enable(0);
+	dev_err(&spi->dev, "probe failed\n");
+error_out:
+	return ret;
+}
+
+static int __devexit mcp251x_can_remove(struct spi_device *spi)
+{
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	struct net_device *net = priv->net;
+
+	unregister_candev(net);
+	free_candev(net);
+
+	priv->force_quit = 1;
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+
+	if (mcp251x_enable_dma) {
+		dma_free_coherent(&spi->dev, PAGE_SIZE,
+				  priv->spi_tx_buf, priv->spi_tx_dma);
+	} else {
+		kfree(priv->spi_tx_buf);
+		kfree(priv->spi_rx_buf);
+	}
+
+	if (pdata->power_enable)
+		pdata->power_enable(0);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcp251x_can_suspend(struct spi_device *spi, pm_message_t state)
+{
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+	struct net_device *net = priv->net;
+
+	if (netif_running(net)) {
+		netif_device_detach(net);
+
+		mcp251x_hw_sleep(spi);
+		if (pdata->transceiver_enable)
+			pdata->transceiver_enable(0);
+		priv->after_suspend = AFTER_SUSPEND_UP;
+	} else {
+		priv->after_suspend = AFTER_SUSPEND_DOWN;
+	}
+
+	if (pdata->power_enable) {
+		pdata->power_enable(0);
+		priv->after_suspend |= AFTER_SUSPEND_POWER;
+	}
+
+	return 0;
+}
+
+static int mcp251x_can_resume(struct spi_device *spi)
+{
+	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+
+	if (priv->after_suspend & AFTER_SUSPEND_POWER) {
+		pdata->power_enable(1);
+		queue_work(priv->wq, &priv->irq_work);
+	} else {
+		if (priv->after_suspend & AFTER_SUSPEND_UP) {
+			if (pdata->transceiver_enable)
+				pdata->transceiver_enable(1);
+			queue_work(priv->wq, &priv->irq_work);
+		} else {
+			priv->after_suspend = 0;
+		}
+	}
+	return 0;
+}
+#else
+#define mcp251x_can_suspend NULL
+#define mcp251x_can_resume NULL
+#endif
+
+static struct spi_driver mcp251x_can_driver = {
+	.driver = {
+		.name = DEVICE_NAME,
+		.bus = &spi_bus_type,
+		.owner = THIS_MODULE,
+	},
+
+	.probe = mcp251x_can_probe,
+	.remove = __devexit_p(mcp251x_can_remove),
+	.suspend = mcp251x_can_suspend,
+	.resume = mcp251x_can_resume,
+};
+
+static int __init mcp251x_can_init(void)
+{
+	return spi_register_driver(&mcp251x_can_driver);
+}
+
+static void __exit mcp251x_can_exit(void)
+{
+	spi_unregister_driver(&mcp251x_can_driver);
+}
+
+module_init(mcp251x_can_init);
+module_exit(mcp251x_can_exit);
+
+MODULE_AUTHOR("Chris Elston <celston@katalix.com>, "
+	      "Christian Pellegrin <chripell@evolware.org>");
+MODULE_DESCRIPTION("Microchip 251x CAN driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
new file mode 100644
index 000000000000..1448177d86d5
--- /dev/null
+++ b/include/linux/can/platform/mcp251x.h
@@ -0,0 +1,36 @@
+#ifndef __CAN_PLATFORM_MCP251X_H__
+#define __CAN_PLATFORM_MCP251X_H__
+
+/*
+ *
+ * CAN bus driver for Microchip 251x CAN Controller with SPI Interface
+ *
+ */
+
+#include <linux/spi/spi.h>
+
+/**
+ * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data
+ * @oscillator_frequency:       - oscillator frequency in Hz
+ * @model:                      - actual type of chip
+ * @board_specific_setup:       - called before probing the chip (power,reset)
+ * @transceiver_enable:         - called to power on/off the transceiver
+ * @power_enable:               - called to power on/off the mcp *and* the
+ *                                transceiver
+ *
+ * Please note that you should define power_enable or transceiver_enable or
+ * none of them. Defining both of them is no use.
+ *
+ */
+
+struct mcp251x_platform_data {
+	unsigned long oscillator_frequency;
+	int model;
+#define CAN_MCP251X_MCP2510 0
+#define CAN_MCP251X_MCP2515 1
+	int (*board_specific_setup)(struct spi_device *spi);
+	int (*transceiver_enable)(int enable);
+	int (*power_enable) (int enable);
+};
+
+#endif /* __CAN_PLATFORM_MCP251X_H__ */
-- 
cgit v1.2.3


From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 9 Sep 2009 19:22:48 +0200
Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf
 events

This patch rebase the implementation of the breakpoints API on top of
perf events instances.

Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..

The new layering is now made as follows:

       ptrace       kgdb      ftrace   perf syscall
          \          |          /         /
           \         |         /         /
                                        /
            Core breakpoint API        /
                                      /
                     |               /
                     |              /

              Breakpoints perf events

                     |
                     |

               Breakpoints PMU ---- Debug Register constraints handling
                                    (Part of core breakpoint API)
                     |
                     |

             Hardware debug registers

Reasons of this rewrite:

- Use the centralized/optimized pmu registers scheduling,
  implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
  events, exclusive/non-exclusive, tunable period, etc...)

Impact:

- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
  thread breakpoints references.

Todo (in the order):

- Support breakpoints perf counter events for perf tools (ie: implement
  perf_bpcounter_event())
- Support from perf tools

Changes in v2:

- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
  weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
  perf_event_attr.
- Separate core and arch specific headers, drop
  asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch

Changes in v3:

- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
  changes to kvm when we exit the guest and restore the bp registers
  to the host.

Changes in v4:

- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
  module
- Restore the breakpoints unconditionally on kvm guest exit:
  TIF_DEBUG_THREAD doesn't anymore cover every cases of running
  breakpoints and vcpu->arch.switch_db_regs might not always be
  set when the guest used debug registers.
  (Waiting for a reliable optimization)

Changes in v5:

- Split-up the asm-generic/hw-breakpoint.h moving to
  linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
  to host. We only want to restore the state if we have active
  breakpoints to the host, otherwise we don't care about messed-up
  address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c

Changes in v6:

- Fix wrong header inclusion in trace.h (triggered a build
  error with CONFIG_FTRACE_SELFTEST

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 arch/Kconfig                         |   3 +
 arch/x86/include/asm/Kbuild          |   1 +
 arch/x86/include/asm/debugreg.h      |  11 +-
 arch/x86/include/asm/hw_breakpoint.h |  58 +++--
 arch/x86/include/asm/processor.h     |  12 +-
 arch/x86/kernel/hw_breakpoint.c      | 391 +++++++++++++++++++++-----------
 arch/x86/kernel/process.c            |   7 +-
 arch/x86/kernel/process_32.c         |  26 +--
 arch/x86/kernel/process_64.c         |  26 +--
 arch/x86/kernel/ptrace.c             | 182 ++++++++++-----
 arch/x86/kernel/smpboot.c            |   3 -
 arch/x86/kvm/x86.c                   |  18 +-
 arch/x86/power/cpu.c                 |   6 -
 include/linux/hw_breakpoint.h        | 243 ++++++++++----------
 include/linux/perf_event.h           |  26 ++-
 kernel/exit.c                        |   5 +
 kernel/hw_breakpoint.c               | 424 ++++++++++++++---------------------
 kernel/perf_event.c                  |  53 ++++-
 kernel/trace/trace.h                 |   5 +-
 kernel/trace/trace_entries.h         |   6 +-
 kernel/trace/trace_ksym.c            | 126 +++++------
 kernel/trace/trace_selftest.c        |   3 +-
 22 files changed, 885 insertions(+), 750 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index acb664397945..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES
 
 config HAVE_HW_BREAKPOINT
 	bool
+	depends on HAVE_PERF_EVENTS
+	select ANON_INODES
+	select PERF_EVENTS
 
 
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
+header-y += hw_breakpoint.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fbb1d0e..9a3333c91f9a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
  */
 #ifdef __KERNEL__
 
-/* For process management */
-extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
-		struct task_struct *child, unsigned long clone_flags);
+DECLARE_PER_CPU(unsigned long, dr7);
 
-/* For CPU management */
-extern void load_debug_registers(void);
 static inline void hw_breakpoint_disable(void)
 {
 	/* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
+#ifdef CONFIG_KVM
+extern void hw_breakpoint_restore(void);
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e2b5f6..0675a7c4c20e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
 #ifdef	__KERNEL__
 #define	__ARCH_HW_BREAKPOINT_H
 
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
 struct arch_hw_breakpoint {
 	char		*name; /* Contains name of the symbol to set bkpt */
 	unsigned long	address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <linux/hw_breakpoint.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
 
 /* Available HW breakpoint length encodings */
-#define HW_BREAKPOINT_LEN_1		0x40
-#define HW_BREAKPOINT_LEN_2		0x44
-#define HW_BREAKPOINT_LEN_4		0x4c
-#define HW_BREAKPOINT_LEN_EXECUTE	0x40
+#define X86_BREAKPOINT_LEN_1		0x40
+#define X86_BREAKPOINT_LEN_2		0x44
+#define X86_BREAKPOINT_LEN_4		0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE	0x40
 
 #ifdef CONFIG_X86_64
-#define HW_BREAKPOINT_LEN_8		0x48
+#define X86_BREAKPOINT_LEN_8		0x48
 #endif
 
 /* Available HW breakpoint type encodings */
 
 /* trigger on instruction execute */
-#define HW_BREAKPOINT_EXECUTE	0x80
+#define X86_BREAKPOINT_EXECUTE	0x80
 /* trigger on memory write */
-#define HW_BREAKPOINT_WRITE	0x81
+#define X86_BREAKPOINT_WRITE	0x81
 /* trigger on memory read or write */
-#define HW_BREAKPOINT_RW	0x83
+#define X86_BREAKPOINT_RW	0x83
 
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
-DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
-extern unsigned int hbp_user_refcount[HBP_NUM];
+struct perf_event;
+struct pmu;
 
-extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_uninstall_thread_hw_breakpoint(void);
 extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
-						struct task_struct *tsk);
-extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
-extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_update_kernel_hw_breakpoint(void *);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+					 struct task_struct *tsk);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
-				     unsigned long val, void *data);
+					   unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+				  int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
 #endif	/* __KERNEL__ */
 #endif	/* _I386_HW_BREAKPOINT_H */
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb71c7ef..820f3000f736 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
-	/* Hardware debugging registers: */
-	unsigned long		debugreg[HBP_NUM];
-	unsigned long		debugreg6;
-	unsigned long		debugreg7;
-	/* Hardware breakpoint info */
-	struct hw_breakpoint	*hbp[HBP_NUM];
+	/* Save middle states of ptrace breakpoints */
+	struct perf_event	*ptrace_bps[HBP_NUM];
+	/* Debug status used for traps, single steps, etc... */
+	unsigned long           debugreg6;
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 9316a9de4de3..e622620790bd 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
  *
  * Copyright (C) 2007 Alan Stern
  * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 /*
@@ -22,6 +23,8 @@
  * using the CPU's debug registers.
  */
 
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
@@ -38,26 +41,24 @@
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 
-/* Unmasked kernel DR7 value */
-static unsigned long kdr7;
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
 
 /*
- * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
- * Used to clear and verify the status of bits corresponding to DR0 - DR3
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
  */
-static const unsigned long	dr7_masks[HBP_NUM] = {
-	0x000f0003,	/* LEN0, R/W0, G0, L0 */
-	0x00f0000c,	/* LEN1, R/W1, G1, L1 */
-	0x0f000030,	/* LEN2, R/W2, G2, L2 */
-	0xf00000c0	/* LEN3, R/W3, G3, L3 */
-};
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
 
 
 /*
  * Encode the length, type, Exact, and Enable bits for a particular breakpoint
  * as stored in debug register 7.
  */
-static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
 {
 	unsigned long bp_info;
 
@@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
 	return bp_info;
 }
 
-void arch_update_kernel_hw_breakpoint(void *unused)
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7.  Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
 {
-	struct hw_breakpoint *bp;
-	int i, cpu = get_cpu();
-	unsigned long temp_kdr7 = 0;
-
-	/* Don't allow debug exceptions while we update the registers */
-	set_debugreg(0UL, 7);
+	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
 
-	for (i = hbp_kernel_pos; i < HBP_NUM; i++) {
-		per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
-		if (bp) {
-			temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
-			set_debugreg(bp->info.address, i);
-		}
-	}
+	*len = (bp_info & 0xc) | 0x40;
+	*type = (bp_info & 0x3) | 0x80;
 
-	/* No need to set DR6. Update the debug registers with kernel-space
-	 * breakpoint values from kdr7 and user-space requests from the
-	 * current process
-	 */
-	kdr7 = temp_kdr7;
-	set_debugreg(kdr7 | current->thread.debugreg7, 7);
-	put_cpu();
+	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
 }
 
 /*
- * Install the thread breakpoints in their debug registers.
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
  */
-void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
+int arch_install_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-
-	switch (hbp_kernel_pos) {
-	case 4:
-		set_debugreg(thread->debugreg[3], 3);
-	case 3:
-		set_debugreg(thread->debugreg[2], 2);
-	case 2:
-		set_debugreg(thread->debugreg[1], 1);
-	case 1:
-		set_debugreg(thread->debugreg[0], 0);
-	default:
-		break;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
 	}
 
-	/* No need to set DR6 */
-	set_debugreg((kdr7 | thread->debugreg7), 7);
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return -EBUSY;
+
+	set_debugreg(info->address, i);
+	__get_cpu_var(cpu_debugreg[i]) = info->address;
+
+	dr7 = &__get_cpu_var(dr7);
+	*dr7 |= encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
+
+	return 0;
 }
 
 /*
- * Install the debug register values for just the kernel, no thread.
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
  */
-void arch_uninstall_thread_hw_breakpoint(void)
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 {
-	/* Clear the user-space portion of debugreg7 by setting only kdr7 */
-	set_debugreg(kdr7, 7);
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return;
 
+	dr7 = &__get_cpu_var(dr7);
+	*dr7 &= ~encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
 }
 
 static int get_hbp_len(u8 hbp_len)
@@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len)
 	unsigned int len_in_bytes = 0;
 
 	switch (hbp_len) {
-	case HW_BREAKPOINT_LEN_1:
+	case X86_BREAKPOINT_LEN_1:
 		len_in_bytes = 1;
 		break;
-	case HW_BREAKPOINT_LEN_2:
+	case X86_BREAKPOINT_LEN_2:
 		len_in_bytes = 2;
 		break;
-	case HW_BREAKPOINT_LEN_4:
+	case X86_BREAKPOINT_LEN_4:
 		len_in_bytes = 4;
 		break;
 #ifdef CONFIG_X86_64
-	case HW_BREAKPOINT_LEN_8:
+	case X86_BREAKPOINT_LEN_8:
 		len_in_bytes = 8;
 		break;
 #endif
@@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
 /*
  * Store a breakpoint's encoded address, length, and type.
  */
-static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
+static int arch_store_info(struct perf_event *bp)
 {
-	/*
-	 * User-space requests will always have the address field populated
-	 * Symbol names from user-space are rejected
-	 */
-	if (tsk && bp->info.name)
-		return -EINVAL;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	/*
 	 * For kernel-addresses, either the address or symbol name can be
 	 * specified.
 	 */
-	if (bp->info.name)
-		bp->info.address = (unsigned long)
-					kallsyms_lookup_name(bp->info.name);
-	if (bp->info.address)
+	if (info->name)
+		info->address = (unsigned long)
+				kallsyms_lookup_name(info->name);
+	if (info->address)
 		return 0;
+
 	return -EINVAL;
 }
 
-/*
- * Validate the arch-specific HW Breakpoint register settings
- */
-int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
-						struct task_struct *tsk)
+int arch_bp_generic_fields(int x86_len, int x86_type,
+			   int *gen_len, int *gen_type)
 {
-	unsigned int align;
-	int ret = -EINVAL;
+	/* Len */
+	switch (x86_len) {
+	case X86_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
 
-	switch (bp->info.type) {
-	/*
-	 * Ptrace-refactoring code
-	 * For now, we'll allow instruction breakpoint only for user-space
-	 * addresses
-	 */
-	case HW_BREAKPOINT_EXECUTE:
-		if ((!arch_check_va_in_userspace(bp->info.address,
-							bp->info.len)) &&
-			bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
-			return ret;
+	/* Type */
+	switch (x86_type) {
+	case X86_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
 		break;
-	case HW_BREAKPOINT_WRITE:
+	case X86_BREAKPOINT_WRITE:
+		*gen_type = HW_BREAKPOINT_W;
 		break;
-	case HW_BREAKPOINT_RW:
+	case X86_BREAKPOINT_RW:
+		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 		break;
 	default:
-		return ret;
+		return -EINVAL;
 	}
 
-	switch (bp->info.len) {
+	return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	info->address = bp->attr.bp_addr;
+
+	/* Len */
+	switch (bp->attr.bp_len) {
 	case HW_BREAKPOINT_LEN_1:
-		align = 0;
+		info->len = X86_BREAKPOINT_LEN_1;
 		break;
 	case HW_BREAKPOINT_LEN_2:
-		align = 1;
+		info->len = X86_BREAKPOINT_LEN_2;
 		break;
 	case HW_BREAKPOINT_LEN_4:
-		align = 3;
+		info->len = X86_BREAKPOINT_LEN_4;
 		break;
 #ifdef CONFIG_X86_64
 	case HW_BREAKPOINT_LEN_8:
+		info->len = X86_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_W:
+		info->type = X86_BREAKPOINT_WRITE;
+		break;
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		info->type = X86_BREAKPOINT_RW;
+		break;
+	case HW_BREAKPOINT_X:
+		info->type = X86_BREAKPOINT_EXECUTE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+				  struct task_struct *tsk)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned int align;
+	int ret;
+
+
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+
+	if (info->type == X86_BREAKPOINT_EXECUTE)
+		/*
+		 * Ptrace-refactoring code
+		 * For now, we'll allow instruction breakpoint only for user-space
+		 * addresses
+		 */
+		if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+			info->len != X86_BREAKPOINT_EXECUTE)
+			return ret;
+
+	switch (info->len) {
+	case X86_BREAKPOINT_LEN_1:
+		align = 0;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		align = 1;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		align = 3;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
 		align = 7;
 		break;
 #endif
@@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
 		return ret;
 	}
 
-	if (bp->triggered)
-		ret = arch_store_info(bp, tsk);
+	if (bp->callback)
+		ret = arch_store_info(bp);
 
 	if (ret < 0)
 		return ret;
@@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
 	 * Check that the low-order bits of the address are appropriate
 	 * for the alignment implied by len.
 	 */
-	if (bp->info.address & align)
+	if (info->address & align)
 		return -EINVAL;
 
 	/* Check that the virtual address is in the proper range */
 	if (tsk) {
-		if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
+		if (!arch_check_va_in_userspace(info->address, info->len))
 			return -EFAULT;
 	} else {
-		if (!arch_check_va_in_kernelspace(bp->info.address,
-								bp->info.len))
+		if (!arch_check_va_in_kernelspace(info->address, info->len))
 			return -EFAULT;
 	}
+
 	return 0;
 }
 
-void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	struct hw_breakpoint *bp = thread->hbp[pos];
-
-	thread->debugreg7 &= ~dr7_masks[pos];
-	if (bp) {
-		thread->debugreg[pos] = bp->info.address;
-		thread->debugreg7 |= encode_dr7(pos, bp->info.len,
-							bp->info.type);
-	} else
-		thread->debugreg[pos] = 0;
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		unregister_hw_breakpoint(t->ptrace_bps[i]);
+		t->ptrace_bps[i] = NULL;
+	}
 }
 
-void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
+#ifdef CONFIG_KVM
+void hw_breakpoint_restore(void)
 {
-	int i;
-	struct thread_struct *thread = &(tsk->thread);
-
-	thread->debugreg7 = 0;
-	for (i = 0; i < HBP_NUM; i++)
-		thread->debugreg[i] = 0;
+	set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+	set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+	set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+	set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+	set_debugreg(current->thread.debugreg6, 6);
+	set_debugreg(__get_cpu_var(dr7), 7);
 }
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+#endif
 
 /*
  * Handle debug exception notifications.
@@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
 static int __kprobes hw_breakpoint_handler(struct die_args *args)
 {
 	int i, cpu, rc = NOTIFY_STOP;
-	struct hw_breakpoint *bp;
+	struct perf_event *bp;
 	unsigned long dr7, dr6;
 	unsigned long *dr6_p;
 
@@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	if ((dr6 & DR_TRAP_BITS) == 0)
 		return NOTIFY_DONE;
 
-	/* Lazy debug register switching */
-	if (!test_tsk_thread_flag(current, TIF_DEBUG))
-		arch_uninstall_thread_hw_breakpoint();
-
 	get_debugreg(dr7, 7);
 	/* Disable breakpoints during exception handling */
 	set_debugreg(0UL, 7);
@@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	for (i = 0; i < HBP_NUM; ++i) {
 		if (likely(!(dr6 & (DR_TRAP0 << i))))
 			continue;
+
 		/*
-		 * Find the corresponding hw_breakpoint structure and
-		 * invoke its triggered callback.
+		 * The counter may be concurrently released but that can only
+		 * occur from a call_rcu() path. We can then safely fetch
+		 * the breakpoint, use its callback, touch its counter
+		 * while we are in an rcu_read_lock() path.
 		 */
-		if (i >= hbp_kernel_pos)
-			bp = per_cpu(this_hbp_kernel[i], cpu);
-		else {
-			bp = current->thread.hbp[i];
-			if (bp)
-				rc = NOTIFY_DONE;
-		}
+		rcu_read_lock();
+
+		bp = per_cpu(bp_per_reg[i], cpu);
+		if (bp)
+			rc = NOTIFY_DONE;
 		/*
 		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
 		 * exception handling
@@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 		(*dr6_p) &= ~(DR_TRAP0 << i);
 		/*
 		 * bp can be NULL due to lazy debug register switching
-		 * or due to the delay between updates of hbp_kernel_pos
-		 * and this_hbp_kernel.
+		 * or due to concurrent perf counter removing.
 		 */
-		if (!bp)
-			continue;
+		if (!bp) {
+			rcu_read_unlock();
+			break;
+		}
+
+		(bp->callback)(bp, args->regs);
 
-		(bp->triggered)(bp, args->regs);
+		rcu_read_unlock();
 	}
 	if (dr6 & (~DR_TRAP_BITS))
 		rc = NOTIFY_DONE;
 
 	set_debugreg(dr7, 7);
 	put_cpu();
+
 	return rc;
 }
 
@@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify(
 
 	return hw_breakpoint_handler(data);
 }
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+	/* TODO */
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cf8ee0016307..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/clockchips.h>
 #include <linux/random.h>
 #include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -18,7 +19,6 @@
 #include <asm/i387.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		flush_thread_hw_breakpoint(tsk);
 
 	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
@@ -107,8 +105,7 @@ void flush_thread(void)
 	}
 #endif
 
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		flush_thread_hw_breakpoint(tsk);
+	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 209e74801763..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,6 @@
 #include <asm/syscalls.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.io_bitmap_ptr = NULL;
 	tsk = current;
 	err = -ENOMEM;
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
-			goto out;
+
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		err = do_set_thread_area(p, -1,
 			(struct user_desc __user *)childregs->si, 0);
 
-out:
 	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
-	if (err)
-		flush_thread_hw_breakpoint(p);
 
 	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
 	p->thread.ds_ctx = NULL;
@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		lazy_load_gs(next->gs);
 
 	percpu_write(current_task, next_p);
-	/*
-	 * There's a problem with moving the arch_install_thread_hw_breakpoint()
-	 * call before current is updated.  Suppose a kernel breakpoint is
-	 * triggered in between the two, the hw-breakpoint handler will see that
-	 * the 'current' task does not have TIF_DEBUG flag set and will think it
-	 * is leftover from an old task (lazy switching) and will erase it. Then
-	 * until the next context switch, no user-breakpoints will be installed.
-	 *
-	 * The real problem is that it's impossible to update both current and
-	 * physical debug registers at the same instant, so there will always be
-	 * a window in which they disagree and a breakpoint might get triggered.
-	 * Since we use lazy switching, we are forced to assume that a
-	 * disagreement means that current is correct and the exception is due
-	 * to lazy debug register switching.
-	 */
-	if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
-		arch_install_thread_hw_breakpoint(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 72edac026a78..5bafdec34441 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,7 +53,6 @@
 #include <asm/syscalls.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
 			BUG();
 		}
 	}
-	if (unlikely(dead_task->thread.debugreg7))
-		flush_thread_hw_breakpoint(dead_task);
 }
 
 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	savesegment(ds, p->thread.ds);
 
 	err = -ENOMEM;
-	if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
-		if (copy_thread_hw_breakpoint(me, p, clone_flags))
-			goto out;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -351,8 +346,6 @@ out:
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
-	if (err)
-		flush_thread_hw_breakpoint(p);
 
 	return err;
 }
@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	if (preload_fpu)
 		__math_state_restore();
-	/*
-	 * There's a problem with moving the arch_install_thread_hw_breakpoint()
-	 * call before current is updated.  Suppose a kernel breakpoint is
-	 * triggered in between the two, the hw-breakpoint handler will see that
-	 * the 'current' task does not have TIF_DEBUG flag set and will think it
-	 * is leftover from an old task (lazy switching) and will erase it. Then
-	 * until the next context switch, no user-breakpoints will be installed.
-	 *
-	 * The real problem is that it's impossible to update both current and
-	 * physical debug registers at the same instant, so there will always be
-	 * a window in which they disagree and a breakpoint might get triggered.
-	 * Since we use lazy switching, we are forced to assume that a
-	 * disagreement means that current is correct and the exception is due
-	 * to lazy debug register switching.
-	 */
-	if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
-		arch_install_thread_hw_breakpoint(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 267cb85b479c..e79610d95971 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
-/*
- * Decode the length and type bits for a particular breakpoint as
- * stored in debug register 7.  Return the "enabled" status.
- */
-static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
-		unsigned *type)
-{
-	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
-
-	*len = (bp_info & 0xc) | 0x40;
-	*type = (bp_info & 0x3) | 0x80;
-	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
-}
-
-static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+static void ptrace_triggered(struct perf_event *bp, void *data)
 {
-	struct thread_struct *thread = &(current->thread);
 	int i;
+	struct thread_struct *thread = &(current->thread);
 
 	/*
 	 * Store in the virtual DR6 register the fact that the breakpoint
 	 * was hit so the thread's debugger will see it.
 	 */
-	for (i = 0; i < hbp_kernel_pos; i++)
-		/*
-		 * We will check bp->info.address against the address stored in
-		 * thread's hbp structure and not debugreg[i]. This is to ensure
-		 * that the corresponding bit for 'i' in DR7 register is enabled
-		 */
-		if (bp->info.address == thread->hbp[i]->info.address)
+	for (i = 0; i < HBP_NUM; i++) {
+		if (thread->ptrace_bps[i] == bp)
 			break;
+	}
 
 	thread->debugreg6 |= (DR_TRAP0 << i);
 }
 
+/*
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
+ */
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
+{
+	int i;
+	int dr7 = 0;
+	struct arch_hw_breakpoint *info;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		if (bp[i] && !bp[i]->attr.disabled) {
+			info = counter_arch_bp(bp[i]);
+			dr7 |= encode_dr7(i, info->len, info->type);
+		}
+	}
+
+	return dr7;
+}
+
 /*
  * Handle ptrace writes to debug register 7.
  */
 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 {
 	struct thread_struct *thread = &(tsk->thread);
-	unsigned long old_dr7 = thread->debugreg7;
+	unsigned long old_dr7;
 	int i, orig_ret = 0, rc = 0;
 	int enabled, second_pass = 0;
 	unsigned len, type;
-	struct hw_breakpoint *bp;
+	int gen_len, gen_type;
+	struct perf_event *bp;
 
 	data &= ~DR_CONTROL_RESERVED;
+	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
 	/*
 	 * Loop through all the hardware breakpoints, making the
@@ -496,11 +503,12 @@ restore:
 	 */
 	for (i = 0; i < HBP_NUM; i++) {
 		enabled = decode_dr7(data, i, &len, &type);
-		bp = thread->hbp[i];
+		bp = thread->ptrace_bps[i];
 
 		if (!enabled) {
 			if (bp) {
-				/* Don't unregister the breakpoints right-away,
+				/*
+				 * Don't unregister the breakpoints right-away,
 				 * unless all register_user_hw_breakpoint()
 				 * requests have succeeded. This prevents
 				 * any window of opportunity for debug
@@ -508,27 +516,45 @@ restore:
 				 */
 				if (!second_pass)
 					continue;
-				unregister_user_hw_breakpoint(tsk, bp);
-				kfree(bp);
+				thread->ptrace_bps[i] = NULL;
+				unregister_hw_breakpoint(bp);
 			}
 			continue;
 		}
+
+		/*
+		 * We shoud have at least an inactive breakpoint at this
+		 * slot. It means the user is writing dr7 without having
+		 * written the address register first
+		 */
 		if (!bp) {
-			rc = -ENOMEM;
-			bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
-			if (bp) {
-				bp->info.address = thread->debugreg[i];
-				bp->triggered = ptrace_triggered;
-				bp->info.len = len;
-				bp->info.type = type;
-				rc = register_user_hw_breakpoint(tsk, bp);
-				if (rc)
-					kfree(bp);
-			}
-		} else
-			rc = modify_user_hw_breakpoint(tsk, bp);
+			rc = -EINVAL;
+			break;
+		}
+
+		rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
 		if (rc)
 			break;
+
+		/*
+		 * This is a temporary thing as bp is unregistered/registered
+		 * to simulate modification
+		 */
+		bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
+					       gen_type, bp->callback,
+					       tsk, true);
+		thread->ptrace_bps[i] = NULL;
+
+		if (!bp) { /* incorrect bp, or we have a bug in bp API */
+			rc = -EINVAL;
+			break;
+		}
+		if (IS_ERR(bp)) {
+			rc = PTR_ERR(bp);
+			bp = NULL;
+			break;
+		}
+		thread->ptrace_bps[i] = bp;
 	}
 	/*
 	 * Make a second pass to free the remaining unused breakpoints
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 	struct thread_struct *thread = &(tsk->thread);
 	unsigned long val = 0;
 
-	if (n < HBP_NUM)
-		val = thread->debugreg[n];
-	else if (n == 6)
+	if (n < HBP_NUM) {
+		struct perf_event *bp;
+		bp = thread->ptrace_bps[n];
+		if (!bp)
+			return 0;
+		val = bp->hw.info.address;
+	} else if (n == 6) {
 		val = thread->debugreg6;
-	else if (n == 7)
-		val = thread->debugreg7;
+	 } else if (n == 7) {
+		val = ptrace_get_dr7(thread->ptrace_bps);
+	}
 	return val;
 }
 
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+				      unsigned long addr)
+{
+	struct perf_event *bp;
+	struct thread_struct *t = &tsk->thread;
+
+	if (!t->ptrace_bps[nr]) {
+		/*
+		 * Put stub len and type to register (reserve) an inactive but
+		 * correct bp
+		 */
+		bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
+						 HW_BREAKPOINT_W,
+						 ptrace_triggered, tsk,
+						 false);
+	} else {
+		bp = t->ptrace_bps[nr];
+		t->ptrace_bps[nr] = NULL;
+		bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
+					       bp->attr.bp_type,
+					       bp->callback,
+					       tsk,
+					       bp->attr.disabled);
+	}
+
+	if (!bp)
+		return -EIO;
+	/*
+	 * CHECKME: the previous code returned -EIO if the addr wasn't a
+	 * valid task virtual addr. The new one will return -EINVAL in this
+	 * case.
+	 * -EINVAL may be what we want for in-kernel breakpoints users, but
+	 * -EIO looks better for ptrace, since we refuse a register writing
+	 * for the user. And anyway this is the previous behaviour.
+	 */
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	t->ptrace_bps[nr] = bp;
+
+	return 0;
+}
+
 /*
  * Handle PTRACE_POKEUSR calls for the debug register area.
  */
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
 		return -EIO;
 
 	if (n == 6) {
-		tsk->thread.debugreg6 = val;
+		thread->debugreg6 = val;
 		goto ret_path;
 	}
 	if (n < HBP_NUM) {
-		if (thread->hbp[n]) {
-			if (arch_check_va_in_userspace(val,
-					thread->hbp[n]->info.len) == 0) {
-				rc = -EIO;
-				goto ret_path;
-			}
-			thread->hbp[n]->info.address = val;
-		}
-		thread->debugreg[n] = val;
+		rc = ptrace_set_breakpoint_addr(tsk, n, val);
+		if (rc)
+			return rc;
 	}
 	/* All that's left is DR7 */
 	if (n == 7)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 213a7a3e4562..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,6 @@
 #include <asm/apic.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
-#include <asm/debugreg.h>
 #include <linux/mc146818rtc.h>
 
 #include <asm/smpboot_hooks.h>
@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
 	x86_cpuinit.setup_percpu_clockev();
 
 	wmb();
-	load_debug_registers();
 	cpu_idle();
 }
 
@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
 	fixup_irqs();
-	hw_breakpoint_disable();
 }
 
 int native_cpu_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2974adf9b6..22dee7aa7813 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#include <asm/debugreg.h>
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	trace_kvm_entry(vcpu->vcpu_id);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
-	if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
-		set_debugreg(current->thread.debugreg[0], 0);
-		set_debugreg(current->thread.debugreg[1], 1);
-		set_debugreg(current->thread.debugreg[2], 2);
-		set_debugreg(current->thread.debugreg[3], 3);
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-	}
+	/*
+	 * If the guest has used debug registers, at least dr7
+	 * will be disabled while returning to the host.
+	 * If we don't have active breakpoints in the host, we don't
+	 * care about the messed up debug address registers. But if
+	 * we have some of them active, restore the old state.
+	 */
+	if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
+		hw_breakpoint_restore();
 
 	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e09a44fc4664..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
 	ctxt->cr4 = read_cr4();
 	ctxt->cr8 = read_cr8();
 #endif
-	hw_breakpoint_disable();
 }
 
 /* Needed by apm.c */
@@ -144,11 +143,6 @@ static void fix_processor_context(void)
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	load_debug_registers();
 }
 
 /**
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 61ccc8f17eac..7eba9b92e5f3 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,136 +1,131 @@
 #ifndef _LINUX_HW_BREAKPOINT_H
 #define _LINUX_HW_BREAKPOINT_H
 
+#include <linux/perf_event.h>
 
-#ifdef	__KERNEL__
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/kallsyms.h>
-
-/**
- * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
- * @triggered: callback invoked after target address access
- * @info: arch-specific breakpoint info (address, length, and type)
- *
- * %hw_breakpoint structures are the kernel's way of representing
- * hardware breakpoints.  These are data breakpoints
- * (also known as "watchpoints", triggered on data access), and the breakpoint's
- * target address can be located in either kernel space or user space.
- *
- * The breakpoint's address, length, and type are highly
- * architecture-specific.  The values are encoded in the @info field; you
- * specify them when registering the breakpoint.  To examine the encoded
- * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
- * below.
- *
- * The address is specified as a regular kernel pointer (for kernel-space
- * breakponts) or as an %__user pointer (for user-space breakpoints).
- * With register_user_hw_breakpoint(), the address must refer to a
- * location in user space.  The breakpoint will be active only while the
- * requested task is running.  Conversely with
- * register_kernel_hw_breakpoint(), the address must refer to a location
- * in kernel space, and the breakpoint will be active on all CPUs
- * regardless of the current task.
- *
- * The length is the breakpoint's extent in bytes, which is subject to
- * certain limitations.  include/asm/hw_breakpoint.h contains macros
- * defining the available lengths for a specific architecture.  Note that
- * the address's alignment must match the length.  The breakpoint will
- * catch accesses to any byte in the range from address to address +
- * (length - 1).
- *
- * The breakpoint's type indicates the sort of access that will cause it
- * to trigger.  Possible values may include:
- *
- * 	%HW_BREAKPOINT_RW (triggered on read or write access),
- * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
- * 	%HW_BREAKPOINT_READ (triggered on read access).
- *
- * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
- * possibilities are available on all architectures.  Execute breakpoints
- * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
- *
- * When a breakpoint gets hit, the @triggered callback is
- * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
- * processor registers.
- * Data breakpoints occur after the memory access has taken place.
- * Breakpoints are disabled during execution @triggered, to avoid
- * recursive traps and allow unhindered access to breakpointed memory.
- *
- * This sample code sets a breakpoint on pid_max and registers a callback
- * function for writes to that variable.  Note that it is not portable
- * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
- *
- * ----------------------------------------------------------------------
- *
- * #include <asm/hw_breakpoint.h>
- *
- * struct hw_breakpoint my_bp;
- *
- * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
- * {
- * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
- * 	dump_stack();
- *  	.......<more debugging output>........
- * }
- *
- * static struct hw_breakpoint my_bp;
- *
- * static int init_module(void)
- * {
- *	..........<do anything>............
- *	my_bp.info.type = HW_BREAKPOINT_WRITE;
- *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
- *
- *	my_bp.installed = (void *)my_bp_installed;
- *
- *	rc = register_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * static void cleanup_module(void)
- * {
- *	..........<do anything>............
- *	unregister_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * ----------------------------------------------------------------------
- */
-struct hw_breakpoint {
-	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
-	struct arch_hw_breakpoint info;
+enum {
+	HW_BREAKPOINT_LEN_1 = 1,
+	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_8 = 8,
 };
 
-/*
- * len and type values are defined in include/asm/hw_breakpoint.h.
- * Available values vary according to the architecture.  On i386 the
- * possibilities are:
- *
- *	HW_BREAKPOINT_LEN_1
- *	HW_BREAKPOINT_LEN_2
- *	HW_BREAKPOINT_LEN_4
- *	HW_BREAKPOINT_RW
- *	HW_BREAKPOINT_READ
- *
- * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
- * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
- * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
- */
+enum {
+	HW_BREAKPOINT_R = 1,
+	HW_BREAKPOINT_W = 2,
+	HW_BREAKPOINT_X = 4,
+};
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return &bp->hw.info;
+}
+
+static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
+{
+	return bp->attr.bp_addr;
+}
+
+static inline int hw_breakpoint_type(struct perf_event *bp)
+{
+	return bp->attr.bp_type;
+}
+
+static inline int hw_breakpoint_len(struct perf_event *bp)
+{
+	return bp->attr.bp_len;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active);
+
+/* FIXME: only change from the attr, and don't unregister */
+extern struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active);
 
-extern int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern int modify_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp);
 /*
  * Kernel breakpoints are not associated with any particular thread.
  */
-extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+extern struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				int cpu,
+				bool active);
+
+extern struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active);
+
+extern int register_perf_hw_breakpoint(struct perf_event *bp);
+extern int __register_perf_hw_breakpoint(struct perf_event *bp);
+extern void unregister_hw_breakpoint(struct perf_event *bp);
+extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+
+extern int reserve_bp_slot(struct perf_event *bp);
+extern void release_bp_slot(struct perf_event *bp);
+
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+
+static inline struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active)		{ return NULL; }
+static inline struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active)			{ return NULL; }
+static inline struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				int cpu,
+				bool active)		{ return NULL; }
+static inline struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active)		{ return NULL; }
+static inline int
+register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
+static inline int
+__register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
+static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
+static inline void
+unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+static inline int
+reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
+static inline void release_bp_slot(struct perf_event *bp) 		{ }
+
+static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk)	{ }
 
-extern unsigned int hbp_kernel_pos;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
-#endif	/* __KERNEL__ */
-#endif	/* _LINUX_HW_BREAKPOINT_H */
+#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8d54e6d25eeb..cead64ea6c15 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
 #include <linux/ioctl.h>
 #include <asm/byteorder.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
 /*
  * User-space ABI bits:
  */
@@ -31,6 +35,7 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -207,6 +212,15 @@ struct perf_event_attr {
 		__u32		wakeup_events;	  /* wakeup every n events */
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
+
+	union {
+		struct { /* Hardware breakpoint info */
+			__u64		bp_addr;
+			__u32		bp_type;
+			__u32		bp_len;
+		};
+	};
+
 	__u32			__reserved_2;
 
 	__u64			__reserved_3;
@@ -476,6 +490,11 @@ struct hw_perf_event {
 			atomic64_t	count;
 			struct hrtimer	hrtimer;
 		};
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		union { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+		};
+#endif
 	};
 	atomic64_t			prev_count;
 	u64				sample_period;
@@ -588,7 +607,7 @@ struct perf_event {
 	u64				tstamp_running;
 	u64				tstamp_stopped;
 
-	struct perf_event_attr	attr;
+	struct perf_event_attr		attr;
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
@@ -643,6 +662,8 @@ struct perf_event {
 
 	perf_callback_t			callback;
 
+	perf_callback_t			event_callback;
+
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate;
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count,
 				 void *record, int entry_size);
+extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
 #define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
+static inline void
+perf_bp_event(struct perf_event *event, void *data)		{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
diff --git a/kernel/exit.c b/kernel/exit.c
index e61891f80123..266f8920628a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code)
 
 	proc_exit_connector(tsk);
 
+	/*
+	 * FIXME: do that only when needed, using sched_exit tracepoint
+	 */
+	flush_ptrace_hw_breakpoint(tsk);
 	/*
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c1f64e65a9f3..08f6d0163201 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
  *
  * Copyright (C) 2007 Alan Stern
  * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 /*
@@ -35,334 +36,242 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 
-#include <asm/hw_breakpoint.h>
+#include <linux/hw_breakpoint.h>
+
 #include <asm/processor.h>
 
 #ifdef CONFIG_X86
 #include <asm/debugreg.h>
 #endif
-/*
- * Spinlock that protects all (un)register operations over kernel/user-space
- * breakpoint requests
- */
-static DEFINE_SPINLOCK(hw_breakpoint_lock);
-
-/* Array of kernel-space breakpoint structures */
-struct hw_breakpoint *hbp_kernel[HBP_NUM];
-
-/*
- * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
- * modified but we need the older copy to handle any hbp exceptions. It will
- * sync with hbp_kernel[] value after updation is done through IPIs.
- */
-DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
-
-/*
- * Kernel breakpoints grow downwards, starting from HBP_NUM
- * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
- * kernel-space request. We will initialise it here and not in an __init
- * routine because load_debug_registers(), which uses this variable can be
- * called very early during CPU initialisation.
- */
-unsigned int hbp_kernel_pos = HBP_NUM;
 
-/*
- * An array containing refcount of threads using a given bkpt register
- * Accesses are synchronised by acquiring hw_breakpoint_lock
- */
-unsigned int hbp_user_refcount[HBP_NUM];
+static atomic_t bp_slot;
 
-/*
- * Load the debug registers during startup of a CPU.
- */
-void load_debug_registers(void)
+int reserve_bp_slot(struct perf_event *bp)
 {
-	unsigned long flags;
-	struct task_struct *tsk = current;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* Prevent IPIs for new kernel breakpoint updates */
-	local_irq_save(flags);
-	arch_update_kernel_hw_breakpoint(NULL);
-	local_irq_restore(flags);
-
-	if (test_tsk_thread_flag(tsk, TIF_DEBUG))
-		arch_install_thread_hw_breakpoint(tsk);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
-}
+	if (atomic_inc_return(&bp_slot) == HBP_NUM) {
+		atomic_dec(&bp_slot);
 
-/*
- * Erase all the hardware breakpoint info associated with a thread.
- *
- * If tsk != current then tsk must not be usable (for example, a
- * child being cleaned up from a failed fork).
- */
-void flush_thread_hw_breakpoint(struct task_struct *tsk)
-{
-	int i;
-	struct thread_struct *thread = &(tsk->thread);
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* The thread no longer has any breakpoints associated with it */
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
-	for (i = 0; i < HBP_NUM; i++) {
-		if (thread->hbp[i]) {
-			hbp_user_refcount[i]--;
-			kfree(thread->hbp[i]);
-			thread->hbp[i] = NULL;
-		}
+		return -ENOSPC;
 	}
 
-	arch_flush_thread_hw_breakpoint(tsk);
-
-	/* Actually uninstall the breakpoints if necessary */
-	if (tsk == current)
-		arch_uninstall_thread_hw_breakpoint();
-	spin_unlock_bh(&hw_breakpoint_lock);
+	return 0;
 }
 
-/*
- * Copy the hardware breakpoint info from a thread to its cloned child.
- */
-int copy_thread_hw_breakpoint(struct task_struct *tsk,
-		struct task_struct *child, unsigned long clone_flags)
+void release_bp_slot(struct perf_event *bp)
 {
-	/*
-	 * We will assume that breakpoint settings are not inherited
-	 * and the child starts out with no debug registers set.
-	 * But what about CLONE_PTRACE?
-	 */
-	clear_tsk_thread_flag(child, TIF_DEBUG);
-
-	/* We will call flush routine since the debugregs are not inherited */
-	arch_flush_thread_hw_breakpoint(child);
-
-	return 0;
+	atomic_dec(&bp_slot);
 }
 
-static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+int __register_perf_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int rc;
+	int ret;
 
-	/* Do not overcommit. Fail if kernel has used the hbp registers */
-	if (pos >= hbp_kernel_pos)
-		return -ENOSPC;
+	ret = reserve_bp_slot(bp);
+	if (ret)
+		return ret;
 
-	rc = arch_validate_hwbkpt_settings(bp, tsk);
-	if (rc)
-		return rc;
+	if (!bp->attr.disabled)
+		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
 
-	thread->hbp[pos] = bp;
-	hbp_user_refcount[pos]++;
+	return ret;
+}
 
-	arch_update_user_hw_breakpoint(pos, tsk);
-	/*
-	 * Does it need to be installed right now?
-	 * Otherwise it will get installed the next time tsk runs
-	 */
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+int register_perf_hw_breakpoint(struct perf_event *bp)
+{
+	bp->callback = perf_bp_event;
 
-	return rc;
+	return __register_perf_hw_breakpoint(bp);
 }
 
 /*
- * Modify the address of a hbp register already in use by the task
- * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
+ * Register a breakpoint bound to a task and a given cpu.
+ * If cpu is -1, the breakpoint is active for the task in every cpu
+ * If the task is -1, the breakpoint is active for every tasks in the given
+ * cpu.
  */
-static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+static struct perf_event *
+register_user_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				pid_t pid,
+				int cpu,
+				bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-
-	if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
-		return -EINVAL;
-
-	if (thread->hbp[pos] == NULL)
-		return -EINVAL;
-
-	thread->hbp[pos] = bp;
+	struct perf_event_attr *attr;
+	struct perf_event *bp;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return ERR_PTR(-ENOMEM);
+
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(*attr);
+	attr->bp_addr = addr;
+	attr->bp_len = len;
+	attr->bp_type = type;
 	/*
-	 * 'pos' must be that of a hbp register already used by 'tsk'
-	 * Otherwise arch_modify_user_hw_breakpoint() will fail
+	 * Such breakpoints are used by debuggers to trigger signals when
+	 * we hit the excepted memory op. We can't miss such events, they
+	 * must be pinned.
 	 */
-	arch_update_user_hw_breakpoint(pos, tsk);
+	attr->pinned = 1;
 
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+	if (!active)
+		attr->disabled = 1;
 
-	return 0;
-}
-
-static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
-{
-	hbp_user_refcount[pos]--;
-	tsk->thread.hbp[pos] = NULL;
+	bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
+	kfree(attr);
 
-	arch_update_user_hw_breakpoint(pos, tsk);
-
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+	return bp;
 }
 
 /**
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @bp: the breakpoint structure to register
- *
- * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
- * @bp->triggered must be set properly before invocation
+ * @active: should we activate it while registering it
  *
  */
-int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, rc = -ENOSPC;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (!thread->hbp[i]) {
-			rc = __register_user_hw_breakpoint(i, tsk, bp);
-			break;
-		}
-	}
-	if (!rc)
-		set_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return rc;
+	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+					       tsk->pid, -1, active);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
 /**
  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
+ * @bp: the breakpoint structure to modify
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @bp: the breakpoint structure to unregister
- *
+ * @active: should we activate it while registering it
  */
-int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp)
+struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, ret = -ENOENT;
+	/*
+	 * FIXME: do it without unregistering
+	 * - We don't want to lose our slot
+	 * - If the new bp is incorrect, don't lose the older one
+	 */
+	unregister_hw_breakpoint(bp);
 
-	spin_lock_bh(&hw_breakpoint_lock);
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (bp == thread->hbp[i]) {
-			ret = __modify_user_hw_breakpoint(i, tsk, bp);
-			break;
-		}
-	}
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return ret;
+	return register_user_hw_breakpoint(addr, len, type, triggered,
+					   tsk, active);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
 /**
- * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
- * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
  * @bp: the breakpoint structure to unregister
- *
  */
-void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp)
+void unregister_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, pos = -1, hbp_counter = 0;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (thread->hbp[i])
-			hbp_counter++;
-		if (bp == thread->hbp[i])
-			pos = i;
-	}
-	if (pos >= 0) {
-		__unregister_user_hw_breakpoint(pos, tsk);
-		hbp_counter--;
-	}
-	if (!hbp_counter)
-		clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
+	if (!bp)
+		return;
+	perf_event_release_kernel(bp);
+}
+EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
+
+static struct perf_event *
+register_kernel_hw_breakpoint_cpu(unsigned long addr,
+				  int len,
+				  int type,
+				  perf_callback_t triggered,
+				  int cpu,
+				  bool active)
+{
+	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+					       -1, cpu, active);
 }
-EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
 
 /**
- * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
- * @bp: the breakpoint structure to register
- *
- * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
- * @bp->triggered must be set properly before invocation
+ * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @active: should we activate it while registering it
  *
+ * @return a set of per_cpu pointers to perf events
  */
-int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active)
 {
-	int rc;
+	struct perf_event **cpu_events, **pevent, *bp;
+	long err;
+	int cpu;
+
+	cpu_events = alloc_percpu(typeof(*cpu_events));
+	if (!cpu_events)
+		return ERR_PTR(-ENOMEM);
 
-	rc = arch_validate_hwbkpt_settings(bp, NULL);
-	if (rc)
-		return rc;
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
+					triggered, cpu, active);
 
-	spin_lock_bh(&hw_breakpoint_lock);
+		*pevent = bp;
 
-	rc = -ENOSPC;
-	/* Check if we are over-committing */
-	if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
-		hbp_kernel_pos--;
-		hbp_kernel[hbp_kernel_pos] = bp;
-		on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
-		rc = 0;
+		if (IS_ERR(bp) || !bp) {
+			err = PTR_ERR(bp);
+			goto fail;
+		}
 	}
 
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return rc;
+	return cpu_events;
+
+fail:
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		if (IS_ERR(*pevent) || !*pevent)
+			break;
+		unregister_hw_breakpoint(*pevent);
+	}
+	free_percpu(cpu_events);
+	/* return the error if any */
+	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
 
 /**
- * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
- * @bp: the breakpoint structure to unregister
- *
- * Uninstalls and unregisters @bp.
+ * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
+ * @cpu_events: the per cpu set of events to unregister
  */
-void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
 {
-	int i, j;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* Find the 'bp' in our list of breakpoints for kernel */
-	for (i = hbp_kernel_pos; i < HBP_NUM; i++)
-		if (bp == hbp_kernel[i])
-			break;
+	int cpu;
+	struct perf_event **pevent;
 
-	/* Check if we did not find a match for 'bp'. If so return early */
-	if (i == HBP_NUM) {
-		spin_unlock_bh(&hw_breakpoint_lock);
-		return;
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		unregister_hw_breakpoint(*pevent);
 	}
-
-	/*
-	 * We'll shift the breakpoints one-level above to compact if
-	 * unregistration creates a hole
-	 */
-	for (j = i; j > hbp_kernel_pos; j--)
-		hbp_kernel[j] = hbp_kernel[j-1];
-
-	hbp_kernel[hbp_kernel_pos] = NULL;
-	on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
-	hbp_kernel_pos++;
-
-	spin_unlock_bh(&hw_breakpoint_lock);
+	free_percpu(cpu_events);
 }
-EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
+
 
 static struct notifier_block hw_breakpoint_exceptions_nb = {
 	.notifier_call = hw_breakpoint_exceptions_notify,
@@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void)
 {
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 }
-
 core_initcall(init_hw_breakpoint);
+
+
+struct pmu perf_ops_bp = {
+	.enable		= arch_install_hw_breakpoint,
+	.disable	= arch_uninstall_hw_breakpoint,
+	.read		= hw_breakpoint_pmu_read,
+	.unthrottle	= hw_breakpoint_pmu_unthrottle
+};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5087125e2a00..98dc56b2ebe4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event)
 
 #endif /* CONFIG_EVENT_PROFILE */
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+	release_bp_slot(event);
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	int err;
+	/*
+	 * The breakpoint is already filled if we haven't created the counter
+	 * through perf syscall
+	 * FIXME: manage to get trigerred to NULL if it comes from syscalls
+	 */
+	if (!bp->callback)
+		err = register_perf_hw_breakpoint(bp);
+	else
+		err = __register_perf_hw_breakpoint(bp);
+	if (err)
+		return ERR_PTR(err);
+
+	bp->destroy = bp_perf_event_destroy;
+
+	return &perf_ops_bp;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+	/* TODO */
+}
+#else
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	return NULL;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+}
+#endif
+
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr,
 		pmu = tp_perf_event_init(event);
 		break;
 
+	case PERF_TYPE_BREAKPOINT:
+		pmu = bp_perf_event_init(event);
+		break;
+
+
 	default:
 		break;
 	}
@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
-		return NULL ;
+		return NULL;
 
 	event = perf_event_alloc(attr, cpu, ctx, NULL,
 				     NULL, callback, GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 91c3d0e9a5a1..d72f06ff263f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,14 +11,11 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
+#include <linux/hw_breakpoint.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 
-#ifdef CONFIG_KSYM_TRACER
-#include <asm/hw_breakpoint.h>
-#endif
-
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e19747d4f860..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
 	F_STRUCT(
 		__field(	unsigned long,	ip			  )
 		__field(	unsigned char,	type			  )
-		__array(	char	     ,	ksym_name, KSYM_NAME_LEN  )
 		__array(	char	     ,	cmd,	   TASK_COMM_LEN  )
+		__field(	unsigned long,  addr			  )
 	),
 
-	F_printk("ip: %pF type: %d ksym_name: %s cmd: %s",
+	F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
 		(void *)__entry->ip, (unsigned int)__entry->type,
-		__entry->ksym_name, __entry->cmd)
+		(void *)__entry->addr,  __entry->cmd)
 );
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 6d5609c67378..fea83eeeef09 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -29,7 +29,11 @@
 #include "trace_stat.h"
 #include "trace.h"
 
-/* For now, let us restrict the no. of symbols traced simultaneously to number
+#include <linux/hw_breakpoint.h>
+#include <asm/hw_breakpoint.h>
+
+/*
+ * For now, let us restrict the no. of symbols traced simultaneously to number
  * of available hardware breakpoint registers.
  */
 #define KSYM_TRACER_MAX HBP_NUM
@@ -37,8 +41,10 @@
 #define KSYM_TRACER_OP_LEN 3 /* rw- */
 
 struct trace_ksym {
-	struct hw_breakpoint	*ksym_hbp;
+	struct perf_event	**ksym_hbp;
 	unsigned long		ksym_addr;
+	int			type;
+	int			len;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
 	unsigned long		counter;
 #endif
@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 }
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+void ksym_hbp_handler(struct perf_event *hbp, void *data)
 {
 	struct ring_buffer_event *event;
 	struct ksym_trace_entry *entry;
+	struct pt_regs *regs = data;
 	struct ring_buffer *buffer;
 	int pc;
 
@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
 
 	entry		= ring_buffer_event_data(event);
 	entry->ip	= instruction_pointer(regs);
-	entry->type	= hbp->info.type;
-	strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+	entry->type	= hw_breakpoint_type(hbp);
+	entry->addr	= hw_breakpoint_addr(hbp);
 	strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
 
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-	ksym_collect_stats(hbp->info.address);
+	ksym_collect_stats(hw_breakpoint_addr(hbp));
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
 
 	trace_buffer_unlock_commit(buffer, event, 0, pc);
@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str)
 	int access = 0;
 
 	if (str[0] == 'r')
-		access += 4;
-	else if (str[0] != '-')
-		return -EINVAL;
+		access |= HW_BREAKPOINT_R;
 
 	if (str[1] == 'w')
-		access += 2;
-	else if (str[1] != '-')
-		return -EINVAL;
+		access |= HW_BREAKPOINT_W;
 
-	if (str[2] != '-')
-		return -EINVAL;
+	if (str[2] == 'x')
+		access |= HW_BREAKPOINT_X;
 
 	switch (access) {
-	case 6:
-		access = HW_BREAKPOINT_RW;
-		break;
-	case 4:
-		access = -EINVAL;
-		break;
-	case 2:
-		access = HW_BREAKPOINT_WRITE;
-		break;
+	case HW_BREAKPOINT_W:
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		return access;
+	default:
+		return -EINVAL;
 	}
-
-	return access;
 }
 
 /*
@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	if (!entry)
 		return -ENOMEM;
 
-	entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
-	if (!entry->ksym_hbp)
-		goto err;
-
-	entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL);
-	if (!entry->ksym_hbp->info.name)
-		goto err;
-
-	entry->ksym_hbp->info.type = op;
-	entry->ksym_addr = entry->ksym_hbp->info.address = addr;
-#ifdef CONFIG_X86
-	entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
-#endif
-	entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+	entry->type = op;
+	entry->ksym_addr = addr;
+	entry->len = HW_BREAKPOINT_LEN_4;
+
+	ret = -EAGAIN;
+	entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
+					entry->len, entry->type,
+					ksym_hbp_handler, true);
+	if (IS_ERR(entry->ksym_hbp)) {
+		entry->ksym_hbp = NULL;
+		ret = PTR_ERR(entry->ksym_hbp);
+	}
 
-	ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
-	if (ret < 0) {
+	if (!entry->ksym_hbp) {
 		printk(KERN_INFO "ksym_tracer request failed. Try again"
 					" later!!\n");
-		ret = -EAGAIN;
 		goto err;
 	}
+
 	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
 	ksym_filter_entry_count++;
+
 	return 0;
+
 err:
-	if (entry->ksym_hbp)
-		kfree(entry->ksym_hbp->info.name);
-	kfree(entry->ksym_hbp);
 	kfree(entry);
+
 	return ret;
 }
 
@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
 	mutex_lock(&ksym_tracer_mutex);
 
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name);
-		if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+		ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
+		if (entry->type == HW_BREAKPOINT_W)
 			ret = trace_seq_puts(s, "-w-\n");
-		else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+		else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
 			ret = trace_seq_puts(s, "rw-\n");
 		WARN_ON_ONCE(!ret);
 	}
@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void)
 	mutex_lock(&ksym_tracer_mutex);
 	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
 								ksym_hlist) {
-		unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
 		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
-		kfree(entry->ksym_hbp->info.name);
-		kfree(entry->ksym_hbp);
 		kfree(entry);
 	}
 	mutex_unlock(&ksym_tracer_mutex);
@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
 		if (entry->ksym_addr == ksym_addr) {
 			/* Check for malformed request: (6) */
-			if (entry->ksym_hbp->info.type != op)
+			if (entry->type != op)
 				changed = 1;
 			else
 				goto out;
@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 		}
 	}
 	if (changed) {
-		unregister_kernel_hw_breakpoint(entry->ksym_hbp);
-		entry->ksym_hbp->info.type = op;
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
+		entry->type = op;
 		if (op > 0) {
-			ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
-			if (ret == 0)
+			entry->ksym_hbp =
+				register_wide_hw_breakpoint(entry->ksym_addr,
+					entry->len, entry->type,
+					ksym_hbp_handler, true);
+			if (IS_ERR(entry->ksym_hbp))
+				entry->ksym_hbp = NULL;
+			if (!entry->ksym_hbp)
 				goto out;
 		}
 		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
-		kfree(entry->ksym_hbp->info.name);
-		kfree(entry->ksym_hbp);
 		kfree(entry);
 		ret = 0;
 		goto out;
@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd,
-				entry->pid, iter->cpu, field->ksym_name);
+	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
+				entry->pid, iter->cpu, (char *)field->addr);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (field->type) {
-	case HW_BREAKPOINT_WRITE:
+	case HW_BREAKPOINT_W:
 		ret = trace_seq_printf(s, " W  ");
 		break;
-	case HW_BREAKPOINT_RW:
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
 		ret = trace_seq_printf(s, " RW ");
 		break;
 	default:
@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 
 	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
 
-	if (entry->ksym_hbp)
-		access_type = entry->ksym_hbp->info.type;
+	access_type = entry->type;
 
 	switch (access_type) {
-	case HW_BREAKPOINT_WRITE:
+	case HW_BREAKPOINT_W:
 		seq_puts(m, "  W           ");
 		break;
-	case HW_BREAKPOINT_RW:
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
 		seq_puts(m, "  RW          ");
 		break;
 	default:
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 7179c12e4f0f..27c5072c2e6b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
 
 	ksym_selftest_dummy = 0;
 	/* Register the read-write tracing request */
-	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY,
+				     HW_BREAKPOINT_R | HW_BREAKPOINT_W,
 					(unsigned long)(&ksym_selftest_dummy));
 
 	if (ret < 0) {
-- 
cgit v1.2.3


From d4cada4ae1c012815f95fa507eb86a0ae9d607d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:17:30 +0000
Subject: udp: split sk_hash into two u16 hashes

Union sk_hash with two u16 hashes for udp (no extra memory taken)

One 16 bits hash on (local port) value (the previous udp 'hash')

One 16 bits hash on (local address, local port) values, initialized
but not yet used. This second hash is using jenkin hash for better
distribution.

Because the 'port' is xored later, a partial hash is performed
on local address + net_hash_mix(net)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  2 ++
 include/net/sock.h  |  6 +++++-
 net/ipv4/udp.c      | 25 +++++++++++++++++++------
 net/ipv6/udp.c      | 27 +++++++++++++++++++++++++--
 4 files changed, 51 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 832361e3e596..5b4b5274e683 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -55,6 +55,8 @@ static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
 struct udp_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock inet;
+#define udp_port_hash		inet.sk.__sk_common.skc_u16hashes[0]
+#define udp_portaddr_hash	inet.sk.__sk_common.skc_u16hashes[1]
 	int		 pending;	/* Any pending frames ? */
 	unsigned int	 corkflag;	/* Cork is required */
   	__u16		 encap_type;	/* Is this an Encapsulation socket? */
diff --git a/include/net/sock.h b/include/net/sock.h
index 55de3bd719a5..827366b62680 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -109,6 +109,7 @@ struct net;
  *	@skc_refcnt: reference count
  *	@skc_tx_queue_mapping: tx queue number for this connection
  *	@skc_hash: hash value used with various protocol lookup tables
+ *	@skc_u16hashes: two u16 hash values used by UDP lookup tables
  *	@skc_family: network address family
  *	@skc_state: Connection state
  *	@skc_reuse: %SO_REUSEADDR setting
@@ -131,7 +132,10 @@ struct sock_common {
 	atomic_t		skc_refcnt;
 	int			skc_tx_queue_mapping;
 
-	unsigned int		skc_hash;
+	union  {
+		unsigned int	skc_hash;
+		__u16		skc_u16hashes[2];
+	};
 	unsigned short		skc_family;
 	volatile unsigned char	skc_state;
 	unsigned char		skc_reuse;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ffc837643a04..af72de1c8690 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -138,13 +138,14 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 	sk_nulls_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net)			&&
 		    sk2 != sk					&&
-		    (bitmap || sk2->sk_hash == num)		&&
+		    (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
 		    (!sk2->sk_reuse || !sk->sk_reuse)		&&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
 			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 		    (*saddr_comp)(sk, sk2)) {
 			if (bitmap)
-				__set_bit(sk2->sk_hash >> log, bitmap);
+				__set_bit(udp_sk(sk2)->udp_port_hash >> log,
+					  bitmap);
 			else
 				return 1;
 		}
@@ -215,7 +216,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	}
 found:
 	inet_sk(sk)->inet_num = snum;
-	sk->sk_hash = snum;
+	udp_sk(sk)->udp_port_hash = snum;
+	udp_sk(sk)->udp_portaddr_hash ^= snum;
 	if (sk_unhashed(sk)) {
 		sk_nulls_add_node_rcu(sk, &hslot->head);
 		hslot->count++;
@@ -238,8 +240,19 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
 }
 
+static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
+				       unsigned int port)
+{
+	return jhash_1word(saddr, net_hash_mix(net)) ^ port;
+}
+
 int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
+	/* precompute partial secondary hash */
+	udp_sk(sk)->udp_portaddr_hash =
+		udp4_portaddr_hash(sock_net(sk),
+				   inet_sk(sk)->inet_rcv_saddr,
+				   0);
 	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
 }
 
@@ -249,7 +262,7 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 {
 	int score = -1;
 
-	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			!ipv6_only_sock(sk)) {
 		struct inet_sock *inet = inet_sk(sk);
 
@@ -360,7 +373,7 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
 		struct inet_sock *inet = inet_sk(s);
 
 		if (!net_eq(sock_net(s), net)				||
-		    s->sk_hash != hnum					||
+		    udp_sk(s)->udp_port_hash != hnum			||
 		    (inet->inet_daddr && inet->inet_daddr != rmt_addr)	||
 		    (inet->inet_dport != rmt_port && inet->inet_dport)	||
 		    (inet->inet_rcv_saddr	&&
@@ -1050,7 +1063,7 @@ void udp_lib_unhash(struct sock *sk)
 	if (sk_hashed(sk)) {
 		struct udp_table *udptable = sk->sk_prot->h.udp_table;
 		struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
-						     sk->sk_hash);
+						       udp_sk(sk)->udp_port_hash);
 
 		spin_lock_bh(&hslot->lock);
 		if (sk_nulls_del_node_init_rcu(sk)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5bc7cdbf030a..1e5fadd997b7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -81,8 +81,30 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	return 0;
 }
 
+static unsigned int udp6_portaddr_hash(struct net *net,
+				       const struct in6_addr *addr6,
+				       unsigned int port)
+{
+	unsigned int hash, mix = net_hash_mix(net);
+
+	if (ipv6_addr_any(addr6))
+		hash = jhash_1word(0, mix);
+	else if (ipv6_addr_type(addr6) == IPV6_ADDR_MAPPED)
+		hash = jhash_1word(addr6->s6_addr32[3], mix);
+	else
+		hash = jhash2(addr6->s6_addr32, 4, mix);
+
+	return hash ^ port;
+}
+
+
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
+	/* precompute partial secondary hash */
+	udp_sk(sk)->udp_portaddr_hash =
+		udp6_portaddr_hash(sock_net(sk),
+				   &inet6_sk(sk)->rcv_saddr,
+				   0);
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
@@ -94,7 +116,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 {
 	int score = -1;
 
-	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
@@ -415,7 +437,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 		if (!net_eq(sock_net(s), net))
 			continue;
 
-		if (s->sk_hash == num && s->sk_family == PF_INET6) {
+		if (udp_sk(s)->udp_port_hash == num &&
+		    s->sk_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(s);
 			if (inet->inet_dport) {
 				if (inet->inet_dport != rmt_port)
-- 
cgit v1.2.3


From 512615b6b843ff3ff5ad583f34c39b3f302f5f26 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 8 Nov 2009 10:17:58 +0000
Subject: udp: secondary hash on (local port, local address)

Extends udp_table to contain a secondary hash table.

socket anchor for this second hash is free, because UDP
doesnt use skc_bind_node : We define an union to hold
both skc_bind_node & a new hlist_nulls_node udp_portaddr_node

udp_lib_get_port() inserts sockets into second hash chain
(additional cost of one atomic op)

udp_lib_unhash() deletes socket from second hash chain
(additional cost of one atomic op)

Note : No spinlock lockdep annotation is needed, because
lock for the secondary hash chain is always get after
lock for primary hash chain.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  1 +
 include/net/sock.h  |  8 ++++++--
 include/net/udp.h   | 22 ++++++++++++++++++++--
 net/ipv4/udp.c      | 31 ++++++++++++++++++++++++++-----
 4 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 5b4b5274e683..59f0ddf2d284 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -57,6 +57,7 @@ struct udp_sock {
 	struct inet_sock inet;
 #define udp_port_hash		inet.sk.__sk_common.skc_u16hashes[0]
 #define udp_portaddr_hash	inet.sk.__sk_common.skc_u16hashes[1]
+#define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node
 	int		 pending;	/* Any pending frames ? */
 	unsigned int	 corkflag;	/* Cork is required */
   	__u16		 encap_type;	/* Is this an Encapsulation socket? */
diff --git a/include/net/sock.h b/include/net/sock.h
index 827366b62680..3f1a4804bb3f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -105,7 +105,7 @@ struct net;
 /**
  *	struct sock_common - minimal network layer representation of sockets
  *	@skc_node: main hash linkage for various protocol lookup tables
- *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
+ *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_refcnt: reference count
  *	@skc_tx_queue_mapping: tx queue number for this connection
  *	@skc_hash: hash value used with various protocol lookup tables
@@ -115,6 +115,7 @@ struct net;
  *	@skc_reuse: %SO_REUSEADDR setting
  *	@skc_bound_dev_if: bound device index if != 0
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
+ *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
  *	@skc_prot: protocol handlers inside a network family
  *	@skc_net: reference to the network namespace of this socket
  *
@@ -140,7 +141,10 @@ struct sock_common {
 	volatile unsigned char	skc_state;
 	unsigned char		skc_reuse;
 	int			skc_bound_dev_if;
-	struct hlist_node	skc_bind_node;
+	union {
+		struct hlist_node	skc_bind_node;
+		struct hlist_nulls_node skc_portaddr_node;
+	};
 	struct proto		*skc_prot;
 #ifdef CONFIG_NET_NS
 	struct net	 	*skc_net;
diff --git a/include/net/udp.h b/include/net/udp.h
index 9167281e47dc..af41850f742a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -63,10 +63,19 @@ struct udp_hslot {
 	spinlock_t		lock;
 } __attribute__((aligned(2 * sizeof(long))));
 
+/**
+ *	struct udp_table - UDP table
+ *
+ *	@hash:	hash table, sockets are hashed on (local port)
+ *	@hash2:	hash table, sockets are hashed on (local port, local address)
+ *	@mask:	number of slots in hash tables, minus 1
+ *	@log:	log2(number of slots in hash table)
+ */
 struct udp_table {
 	struct udp_hslot	*hash;
-	unsigned int mask;
-	unsigned int log;
+	struct udp_hslot	*hash2;
+	unsigned int		mask;
+	unsigned int		log;
 };
 extern struct udp_table udp_table;
 extern void udp_table_init(struct udp_table *, const char *);
@@ -75,6 +84,15 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
 {
 	return &table->hash[udp_hashfn(net, num, table->mask)];
 }
+/*
+ * For secondary hash, net_hash_mix() is performed before calling
+ * udp_hashslot2(), this explains difference with udp_hashslot()
+ */
+static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
+					      unsigned int hash)
+{
+	return &table->hash2[hash & table->mask];
+}
 
 /* Note: this must match 'valbool' in sock_setsockopt */
 #define UDP_CSUM_NOXMIT		1
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index af72de1c8690..5f04216f35ce 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -163,7 +163,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		       int (*saddr_comp)(const struct sock *sk1,
 					 const struct sock *sk2))
 {
-	struct udp_hslot *hslot;
+	struct udp_hslot *hslot, *hslot2;
 	struct udp_table *udptable = sk->sk_prot->h.udp_table;
 	int    error = 1;
 	struct net *net = sock_net(sk);
@@ -222,6 +222,13 @@ found:
 		sk_nulls_add_node_rcu(sk, &hslot->head);
 		hslot->count++;
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+		spin_lock(&hslot2->lock);
+		hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+					 &hslot2->head);
+		hslot2->count++;
+		spin_unlock(&hslot2->lock);
 	}
 	error = 0;
 fail_unlock:
@@ -1062,14 +1069,22 @@ void udp_lib_unhash(struct sock *sk)
 {
 	if (sk_hashed(sk)) {
 		struct udp_table *udptable = sk->sk_prot->h.udp_table;
-		struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
-						       udp_sk(sk)->udp_port_hash);
+		struct udp_hslot *hslot, *hslot2;
+
+		hslot  = udp_hashslot(udptable, sock_net(sk),
+				      udp_sk(sk)->udp_port_hash);
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
 
 		spin_lock_bh(&hslot->lock);
 		if (sk_nulls_del_node_init_rcu(sk)) {
 			hslot->count--;
 			inet_sk(sk)->inet_num = 0;
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+
+			spin_lock(&hslot2->lock);
+			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+			hslot2->count--;
+			spin_unlock(&hslot2->lock);
 		}
 		spin_unlock_bh(&hslot->lock);
 	}
@@ -1857,7 +1872,7 @@ void __init udp_table_init(struct udp_table *table, const char *name)
 
 	if (!CONFIG_BASE_SMALL)
 		table->hash = alloc_large_system_hash(name,
-			sizeof(struct udp_hslot),
+			2 * sizeof(struct udp_hslot),
 			uhash_entries,
 			21, /* one slot per 2 MB */
 			0,
@@ -1869,17 +1884,23 @@ void __init udp_table_init(struct udp_table *table, const char *name)
 	 */
 	if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
 		table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
-				      sizeof(struct udp_hslot), GFP_KERNEL);
+				      2 * sizeof(struct udp_hslot), GFP_KERNEL);
 		if (!table->hash)
 			panic(name);
 		table->log = ilog2(UDP_HTABLE_SIZE_MIN);
 		table->mask = UDP_HTABLE_SIZE_MIN - 1;
 	}
+	table->hash2 = table->hash + (table->mask + 1);
 	for (i = 0; i <= table->mask; i++) {
 		INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
 		table->hash[i].count = 0;
 		spin_lock_init(&table->hash[i].lock);
 	}
+	for (i = 0; i <= table->mask; i++) {
+		INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
+		table->hash2[i].count = 0;
+		spin_lock_init(&table->hash2[i].lock);
+	}
 }
 
 void __init udp_init(void)
-- 
cgit v1.2.3


From 7a50a240c495478179f01c9df4bd75e39cff79c7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 8 Nov 2009 20:57:03 -0800
Subject: net/compat_ioctl: support SIOCWANDEV

This adds compat_ioctl support for SIOCWANDEV, which has
always been missing.

The definition of struct compat_ifreq was missing an
ifru_settings fields that is needed to support SIOCWANDEV,
so add that and clean up the whitespace damage in the
struct definition.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compat.h | 41 ++++++++++++++++++++++++-----------------
 net/socket.c           | 23 +++++++++++++++++++++++
 2 files changed, 47 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 224c7a896172..ef68119a4fd2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -165,25 +165,32 @@ struct compat_ifmap {
 	unsigned char port;
 };
 
+struct compat_if_settings
+{
+	unsigned int type;	/* Type of physical device or protocol */
+	unsigned int size;	/* Size of the data allocated by the caller */
+	compat_uptr_t ifs_ifsu;	/* union of pointers */
+};
+
 struct compat_ifreq {
-        union {
-                char    ifrn_name[IFNAMSIZ];            /* if name, e.g. "en0" */
-        } ifr_ifrn;
-        union {
-                struct  sockaddr ifru_addr;
-                struct  sockaddr ifru_dstaddr;
-                struct  sockaddr ifru_broadaddr;
-                struct  sockaddr ifru_netmask;
-                struct  sockaddr ifru_hwaddr;
-                short   ifru_flags;
-                compat_int_t     ifru_ivalue;
-                compat_int_t     ifru_mtu;
-                struct  compat_ifmap ifru_map;
-                char    ifru_slave[IFNAMSIZ];   /* Just fits the size */
+	union {
+		char	ifrn_name[IFNAMSIZ];    /* if name, e.g. "en0" */
+	} ifr_ifrn;
+	union {
+		struct	sockaddr ifru_addr;
+		struct	sockaddr ifru_dstaddr;
+		struct	sockaddr ifru_broadaddr;
+		struct	sockaddr ifru_netmask;
+		struct	sockaddr ifru_hwaddr;
+		short	ifru_flags;
+		compat_int_t	ifru_ivalue;
+		compat_int_t	ifru_mtu;
+		struct	compat_ifmap ifru_map;
+		char	ifru_slave[IFNAMSIZ];   /* Just fits the size */
 		char	ifru_newname[IFNAMSIZ];
-                compat_caddr_t ifru_data;
-	    /* XXXX? ifru_settings should be here */
-        } ifr_ifru;
+		compat_caddr_t	ifru_data;
+		struct	compat_if_settings ifru_settings;
+	} ifr_ifru;
 };
 
 struct compat_ifconf {
diff --git a/net/socket.c b/net/socket.c
index 224e7f73fdf0..befd9f5b1620 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2627,6 +2627,27 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 	return dev_ioctl(net, SIOCETHTOOL, ifr);
 }
 
+static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
+{
+	void __user *uptr;
+	compat_uptr_t uptr32;
+	struct ifreq __user *uifr;
+
+	uifr = compat_alloc_user_space(sizeof (*uifr));
+	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
+		return -EFAULT;
+
+	if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
+		return -EFAULT;
+
+	uptr = compat_ptr(uptr32);
+
+	if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
+		return -EFAULT;
+
+	return dev_ioctl(net, SIOCWANDEV, uifr);
+}
+
 static int bond_ioctl(struct net *net, unsigned int cmd,
 			 struct compat_ifreq __user *ifr32)
 {
@@ -3058,6 +3079,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 		return dev_ifconf(net, argp);
 	case SIOCETHTOOL:
 		return ethtool_ioctl(net, argp);
+	case SIOCWANDEV:
+		return compat_siocwandev(net, argp);
 	case SIOCBONDENSLAVE:
 	case SIOCBONDRELEASE:
 	case SIOCBONDSETHWADDR:
-- 
cgit v1.2.3


From b71a8eb0fa64ec6d00175f479e3ef851703568af Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 6 Oct 2009 12:42:51 +0200
Subject: tree-wide: fix typos "selct" + "slect" -> "select"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch was generated by

	git grep -E -i -l 's(le|el)ct' | xargs -r perl -p -i -e 's/([Ss])(le|el)ct/$1elect/

with only skipping net/netfilter/xt_SECMARK.c and
include/linux/netfilter/xt_SECMARK.h which have a struct member called
selctx.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/media/video/ov772x.c       | 2 +-
 drivers/scsi/aic7xxx/aic79xx.seq   | 2 +-
 drivers/scsi/aic7xxx/aic79xx_osm.c | 2 +-
 drivers/scsi/aic7xxx/aic7xxx.seq   | 2 +-
 drivers/scsi/aic7xxx/aic7xxx_osm.c | 2 +-
 drivers/scsi/dc395x.c              | 2 +-
 include/linux/spi/spi_bitbang.h    | 2 +-
 kernel/time/clocksource.c          | 2 +-
 sound/pci/hda/patch_cirrus.c       | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index eccb40ab7fec..205229333466 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -247,7 +247,7 @@
 
 /* COM5 */
 #define AFR_ON_OFF      0x80	/* Auto frame rate control ON/OFF selection */
-#define AFR_SPPED       0x40	/* Auto frame rate control speed slection */
+#define AFR_SPPED       0x40	/* Auto frame rate control speed selection */
 				/* Auto frame rate max rate control */
 #define AFR_NO_RATE     0x00	/*     No  reduction of frame rate */
 #define AFR_1p2         0x10	/*     Max reduction to 1/2 frame rate */
diff --git a/drivers/scsi/aic7xxx/aic79xx.seq b/drivers/scsi/aic7xxx/aic79xx.seq
index 58bc17591b54..3b66b5ae3d9f 100644
--- a/drivers/scsi/aic7xxx/aic79xx.seq
+++ b/drivers/scsi/aic7xxx/aic79xx.seq
@@ -1281,7 +1281,7 @@ END_CRITICAL;
  * Is it a disconnect message?  Set a flag in the SCB to remind us
  * and await the bus going free.  If this is an untagged transaction
  * store the SCB id for it in our untagged target table for lookup on
- * a reselction.
+ * a reselection.
  */
 mesgin_disconnect:
 	/*
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 75b23317bd26..1222a7ac698a 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -2335,7 +2335,7 @@ ahd_linux_queue_abort_cmd(struct scsi_cmnd *cmd)
 			/*
 			 * The sequencer will never re-reference the
 			 * in-core SCB.  To make sure we are notified
-			 * during reslection, set the MK_MESSAGE flag in
+			 * during reselection, set the MK_MESSAGE flag in
 			 * the card's copy of the SCB.
 			 */
 			ahd_outb(ahd, SCB_CONTROL,
diff --git a/drivers/scsi/aic7xxx/aic7xxx.seq b/drivers/scsi/aic7xxx/aic7xxx.seq
index 15196390e28d..5a4cfc954a9f 100644
--- a/drivers/scsi/aic7xxx/aic7xxx.seq
+++ b/drivers/scsi/aic7xxx/aic7xxx.seq
@@ -1693,7 +1693,7 @@ if ((ahc->flags & AHC_INITIATORROLE) != 0) {
  * Is it a disconnect message?  Set a flag in the SCB to remind us
  * and await the bus going free.  If this is an untagged transaction
  * store the SCB id for it in our untagged target table for lookup on
- * a reselction.
+ * a reselection.
  */
 mesgin_disconnect:
 	/*
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index fd2b9785ff4f..8cb05dc8e6a1 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -2290,7 +2290,7 @@ ahc_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag)
 		 * In the non-paging case, the sequencer will
 		 * never re-reference the in-core SCB.
 		 * To make sure we are notified during
-		 * reslection, set the MK_MESSAGE flag in
+		 * reselection, set the MK_MESSAGE flag in
 		 * the card's copy of the SCB.
 		 */
 		if ((ahc->flags & AHC_PAGESCBS) == 0) {
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 075e2397273c..6c59c02c1ed9 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -1509,7 +1509,7 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 		 * Try anyway?
 		 *
 		 * We could, BUT: Sometimes the TRM_S1040 misses to produce a Selection
-		 * Timeout, a Disconnect or a Reselction IRQ, so we would be screwed!
+		 * Timeout, a Disconnect or a Reselection IRQ, so we would be screwed!
 		 * (This is likely to be a bug in the hardware. Obviously, most people
 		 *  only have one initiator per SCSI bus.)
 		 * Instead let this fail and have the timer make sure the command is 
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index eed4254bd503..3274c507b8a9 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -15,7 +15,7 @@
  * Some hardware works well with requests at spi_transfer scope:
  *
  *   -	Drivers leveraging smarter hardware, with fifos or DMA; or for half
- *	duplex (MicroWire) controllers.  Provide chipslect() and txrx_bufs(),
+ *	duplex (MicroWire) controllers.  Provide chipselect() and txrx_bufs(),
  *	and custom setup()/cleanup() methods.
  */
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5e18c6ab2c6a..c403567f78c0 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -580,7 +580,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
  * @count:	length of buffer
  *
  * Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
+ * clocksource selection.
  */
 static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 					  struct sysdev_attribute *attr,
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 8ba306856d38..7b0446fa6009 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -947,7 +947,7 @@ static void init_input(struct hda_codec *codec)
 		coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */
 	if (is_active_pin(codec, CS_DMIC1_PIN_NID))
 		coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0 
-				 * No effect if SPDIF_OUT2 is slected in 
+				 * No effect if SPDIF_OUT2 is selected in 
 				 * IDX_SPDIF_CTL.
 				  */
 	cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
-- 
cgit v1.2.3


From 21ae2956ce289f61f11863cc67080f9a28101ae0 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 7 Oct 2009 15:21:09 +0200
Subject: tree-wide: fix typos "aquire" -> "acquire", "cumsumed" -> "consumed"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch was generated by

	git grep -E -i -l '[Aa]quire' | xargs -r perl -p -i -e 's/([Aa])quire/$1cquire/'

and the cumsumed was found by checking the diff for aquire.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/cpuidle/governor.c             | 4 ++--
 drivers/infiniband/hw/cxgb3/cxio_hal.c | 2 +-
 drivers/media/dvb/frontends/drx397xD.c | 2 +-
 drivers/net/ps3_gelic_wireless.h       | 2 +-
 drivers/net/qla3xxx.c                  | 2 +-
 drivers/net/wireless/b43/main.c        | 2 +-
 drivers/net/wireless/b43legacy/main.c  | 2 +-
 include/linux/dm-log-userspace.h       | 2 +-
 mm/kmemleak.c                          | 4 ++--
 mm/memcontrol.c                        | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 70b59642a708..724c164d31c9 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -21,7 +21,7 @@ struct cpuidle_governor *cpuidle_curr_governor;
  * __cpuidle_find_governor - finds a governor of the specified name
  * @str: the name
  *
- * Must be called with cpuidle_lock aquired.
+ * Must be called with cpuidle_lock acquired.
  */
 static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
 {
@@ -39,7 +39,7 @@ static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
  * @gov: the new target governor
  *
  * NOTE: "gov" can be NULL to specify disabled
- * Must be called with cpuidle_lock aquired.
+ * Must be called with cpuidle_lock acquired.
  */
 int cpuidle_switch_governor(struct cpuidle_governor *gov)
 {
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 72ed3396b721..0677fc7dfd51 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -589,7 +589,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
 
 /* write len bytes of data into addr (32B aligned address)
  * If data is NULL, clear len byte of memory to zero.
- * caller aquires the ctrl_qp lock before the call
+ * caller acquires the ctrl_qp lock before the call
  */
 static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
 				      u32 len, void *data)
diff --git a/drivers/media/dvb/frontends/drx397xD.c b/drivers/media/dvb/frontends/drx397xD.c
index 010075535221..868b78bfae75 100644
--- a/drivers/media/dvb/frontends/drx397xD.c
+++ b/drivers/media/dvb/frontends/drx397xD.c
@@ -81,7 +81,7 @@ static struct {
 #include "drx397xD_fw.h"
 };
 
-/* use only with writer lock aquired */
+/* use only with writer lock acquired */
 static void _drx_release_fw(struct drx397xD_state *s, enum fw_ix ix)
 {
 	memset(&fw[ix].data[0], 0, sizeof(fw[0].data));
diff --git a/drivers/net/ps3_gelic_wireless.h b/drivers/net/ps3_gelic_wireless.h
index 5b631c6c9775..0a88b535197a 100644
--- a/drivers/net/ps3_gelic_wireless.h
+++ b/drivers/net/ps3_gelic_wireless.h
@@ -199,7 +199,7 @@ struct gelic_eurus_rssi_info {
 /* for 'stat' member of gelic_wl_info */
 enum gelic_wl_info_status_bit {
 	GELIC_WL_STAT_CONFIGURED,
-	GELIC_WL_STAT_CH_INFO,   /* ch info aquired */
+	GELIC_WL_STAT_CH_INFO,   /* ch info acquired */
 	GELIC_WL_STAT_ESSID_SET, /* ESSID specified by userspace */
 	GELIC_WL_STAT_BSSID_SET, /* BSSID specified by userspace */
 	GELIC_WL_STAT_WPA_PSK_SET, /* PMK specified by userspace */
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 4c610511eb40..f72643313bab 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -3651,7 +3651,7 @@ static int ql_adapter_up(struct ql3_adapter *qdev)
 		ql_sem_unlock(qdev, QL_DRVR_SEM_MASK);
 	} else {
 		printk(KERN_ERR PFX
-		       "%s: Could not aquire driver lock.\n",
+		       "%s: Could not acquire driver lock.\n",
 		       ndev->name);
 		goto err_lock;
 	}
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 86f35827f008..f66efea61667 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -2955,7 +2955,7 @@ static void do_periodic_work(struct b43_wldev *dev)
 /* Periodic work locking policy:
  * 	The whole periodic work handler is protected by
  * 	wl->mutex. If another lock is needed somewhere in the
- * 	pwork callchain, it's aquired in-place, where it's needed.
+ * 	pwork callchain, it's acquired in-place, where it's needed.
  */
 static void b43_periodic_work_handler(struct work_struct *work)
 {
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 4b60148a5e61..881784b18b0b 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2277,7 +2277,7 @@ static void do_periodic_work(struct b43legacy_wldev *dev)
 /* Periodic work locking policy:
  * 	The whole periodic work handler is protected by
  * 	wl->mutex. If another lock is needed somewhere in the
- * 	pwork callchain, it's aquired in-place, where it's needed.
+ * 	pwork callchain, it's acquired in-place, where it's needed.
  */
 static void b43legacy_periodic_work_handler(struct work_struct *work)
 {
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
index 8a1f972c0fe9..0c3c3a2110c4 100644
--- a/include/linux/dm-log-userspace.h
+++ b/include/linux/dm-log-userspace.h
@@ -363,7 +363,7 @@
  * various request types above.  The remaining 24-bits are currently
  * set to zero and are reserved for future use and compatibility concerns.
  *
- * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the
+ * User-space should always use DM_ULOG_REQUEST_TYPE to acquire the
  * request type from the 'request_type' field to maintain forward compatibility.
  */
 #define DM_ULOG_REQUEST_MASK 0xFF
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..13f33b3081ec 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object)
 	unsigned long flags;
 
 	/*
-	 * Once the object->lock is aquired, the corresponding memory block
-	 * cannot be freed (the same lock is aquired in delete_object).
+	 * Once the object->lock is acquired, the corresponding memory block
+	 * cannot be freed (the same lock is acquired in delete_object).
 	 */
 	spin_lock_irqsave(&object->lock, flags);
 	if (object->flags & OBJECT_NO_SCAN)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f99f5991d6bb..7226e60e52af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 /*
  * While swap-in, try_charge -> commit or cancel, the page is locked.
  * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is aquired. This refcnt will be cumsumed by
+ * struct page_cgroup is acquired. This refcnt will be consumed by
  * "commit()" or removed by "cancel()"
  */
 int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-- 
cgit v1.2.3


From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Nov 2009 16:35:32 +1100
Subject: security: report the module name to security_module_request

For SELinux to do better filtering in userspace we send the name of the
module along with the AVC denial when a program is denied module_request.

Example output:

type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null)
type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc:  denied  { module_request } for  pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h | 18 ++++++++++--------
 include/linux/security.h  |  7 ++++---
 kernel/kmod.c             |  8 ++++----
 security/capability.c     |  2 +-
 security/lsm_audit.c      |  4 ++++
 security/security.c       |  4 ++--
 security/selinux/hooks.c  | 13 +++++++++++--
 7 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 190c37854870..f78f83d7663f 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -26,14 +26,15 @@
 
 /* Auxiliary data to use in generating the audit record. */
 struct common_audit_data {
-	char    type;
-#define LSM_AUDIT_DATA_FS      1
-#define LSM_AUDIT_DATA_NET     2
-#define LSM_AUDIT_DATA_CAP     3
-#define LSM_AUDIT_DATA_IPC     4
-#define LSM_AUDIT_DATA_TASK    5
-#define LSM_AUDIT_DATA_KEY     6
-#define LSM_AUDIT_NO_AUDIT     7
+	char type;
+#define LSM_AUDIT_DATA_FS	1
+#define LSM_AUDIT_DATA_NET	2
+#define LSM_AUDIT_DATA_CAP	3
+#define LSM_AUDIT_DATA_IPC	4
+#define LSM_AUDIT_DATA_TASK	5
+#define LSM_AUDIT_DATA_KEY	6
+#define LSM_AUDIT_NO_AUDIT	7
+#define LSM_AUDIT_DATA_KMOD	8
 	struct task_struct *tsk;
 	union 	{
 		struct {
@@ -66,6 +67,7 @@ struct common_audit_data {
 			char *key_desc;
 		} key_struct;
 #endif
+		char *kmod_name;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/include/linux/security.h b/include/linux/security.h
index ed0faea60b82..466cbadbd1ef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @kernel_module_request:
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
+ *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
@@ -1577,7 +1578,7 @@ struct security_operations {
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
-	int (*kernel_module_request)(void);
+	int (*kernel_module_request)(char *kmod_name);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
@@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
-int security_kernel_module_request(void);
+int security_kernel_module_request(char *kmod_name);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
@@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
-static inline int security_kernel_module_request(void)
+static inline int security_kernel_module_request(char *kmod_name)
 {
 	return 0;
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9fcb53a11f87..25b103190364 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
-	ret = security_kernel_module_request();
-	if (ret)
-		return ret;
-
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
 	if (ret >= MODULE_NAME_LEN)
 		return -ENAMETOOLONG;
 
+	ret = security_kernel_module_request(module_name);
+	if (ret)
+		return ret;
+
 	/* If modprobe needs a service that is in a module, we get a recursive
 	 * loop.  Limit the number of running kmod threads to max_threads/2 or
 	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
diff --git a/security/capability.c b/security/capability.c
index 4f3ab476937f..5c700e1a4fd3 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -421,7 +421,7 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
-static int cap_kernel_module_request(void)
+static int cap_kernel_module_request(char *kmod_name)
 {
 	return 0;
 }
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 3bb90b6f1dd3..51bd0fd9c9f0 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -354,6 +354,10 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		}
 		break;
 #endif
+	case LSM_AUDIT_DATA_KMOD:
+		audit_log_format(ab, " kmod=");
+		audit_log_untrustedstring(ab, a->u.kmod_name);
+		break;
 	} /* switch (a->type) */
 }
 
diff --git a/security/security.c b/security/security.c
index aad71b2ca195..24e060be9fa5 100644
--- a/security/security.c
+++ b/security/security.c
@@ -764,9 +764,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return security_ops->kernel_create_files_as(new, inode);
 }
 
-int security_kernel_module_request(void)
+int security_kernel_module_request(char *kmod_name)
 {
-	return security_ops->kernel_module_request();
+	return security_ops->kernel_module_request(kmod_name);
 }
 
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a29d6612a328..c96d63ec4753 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3337,9 +3337,18 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
-static int selinux_kernel_module_request(void)
+static int selinux_kernel_module_request(char *kmod_name)
 {
-	return task_has_system(current, SYSTEM__MODULE_REQUEST);
+	u32 sid;
+	struct common_audit_data ad;
+
+	sid = task_sid(current);
+
+	COMMON_AUDIT_DATA_INIT(&ad, KMOD);
+	ad.u.kmod_name = kmod_name;
+
+	return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM,
+			    SYSTEM__MODULE_REQUEST, &ad);
 }
 
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
-- 
cgit v1.2.3


From 86b37281411cf1e9bc0a6b5406c45edb7bd9ea5d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 10 Nov 2009 11:50:21 +0100
Subject: block: Expose discard granularity

While SSDs track block usage on a per-sector basis, RAID arrays often
have allocation blocks that are bigger.  Allow the discard granularity
and alignment to be set and teach the topology stacking logic how to
handle them.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 46 ++++++++++++++++++++++++++++++++++++----------
 block/blk-sysfs.c      | 22 ++++++++++++++++++++++
 block/genhd.c          | 12 ++++++++++++
 fs/partitions/check.c  | 12 ++++++++++++
 include/linux/blkdev.h | 18 ++++++++++++++++++
 include/linux/genhd.h  |  1 +
 6 files changed, 101 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 66d4aa8799b7..7f986cafacd5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,7 +96,10 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
-	lim->max_discard_sectors = SAFE_MAX_SECTORS;
+	lim->max_discard_sectors = 0;
+	lim->discard_granularity = 0;
+	lim->discard_alignment = 0;
+	lim->discard_misaligned = 0;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -488,6 +491,16 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+static unsigned int lcm(unsigned int a, unsigned int b)
+{
+	if (a && b)
+		return (a * b) / gcd(a, b);
+	else if (b)
+		return b;
+
+	return a;
+}
+
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top)
@@ -502,6 +515,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
+	int ret;
+
+	ret = 0;
+
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
@@ -531,7 +548,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	if (offset &&
 	    (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
 		t->misaligned = 1;
-		return -1;
+		ret = -1;
+	}
+
+	if (offset &&
+	    (offset & (b->discard_granularity - 1)) != b->discard_alignment) {
+		t->discard_misaligned = 1;
+		ret = -1;
 	}
 
 	/* If top has no alignment offset, inherit from bottom */
@@ -539,23 +562,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		t->alignment_offset =
 			b->alignment_offset & (b->physical_block_size - 1);
 
+	if (!t->discard_alignment)
+		t->discard_alignment =
+			b->discard_alignment & (b->discard_granularity - 1);
+
 	/* Top device aligned on logical block boundary? */
 	if (t->alignment_offset & (t->logical_block_size - 1)) {
 		t->misaligned = 1;
-		return -1;
+		ret = -1;
 	}
 
-	/* Find lcm() of optimal I/O size */
-	if (t->io_opt && b->io_opt)
-		t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
-	else if (b->io_opt)
-		t->io_opt = b->io_opt;
+	/* Find lcm() of optimal I/O size and granularity */
+	t->io_opt = lcm(t->io_opt, b->io_opt);
+	t->discard_granularity = lcm(t->discard_granularity,
+				     b->discard_granularity);
 
 	/* Verify that optimal I/O size is a multiple of io_min */
 	if (t->io_min && t->io_opt % t->io_min)
-		return -1;
+		ret = -1;
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8a6d81afb284..3147145edc15 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -126,6 +126,16 @@ static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
 	return queue_var_show(queue_io_opt(q), page);
 }
 
+static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.discard_granularity, page);
+}
+
+static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -293,6 +303,16 @@ static struct queue_sysfs_entry queue_io_opt_entry = {
 	.show = queue_io_opt_show,
 };
 
+static struct queue_sysfs_entry queue_discard_granularity_entry = {
+	.attr = {.name = "discard_granularity", .mode = S_IRUGO },
+	.show = queue_discard_granularity_show,
+};
+
+static struct queue_sysfs_entry queue_discard_max_entry = {
+	.attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
+	.show = queue_discard_max_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nonrot_show,
@@ -328,6 +348,8 @@ static struct attribute *default_attrs[] = {
 	&queue_physical_block_size_entry.attr,
 	&queue_io_min_entry.attr,
 	&queue_io_opt_entry.attr,
+	&queue_discard_granularity_entry.attr,
+	&queue_discard_max_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/genhd.c b/block/genhd.c
index 517e4332cb37..b11a4ad7d571 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -861,12 +861,23 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_discard_alignment_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
+		   NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
@@ -887,6 +898,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
+	&dev_attr_discard_alignment.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cbad..64bc8998ac9a 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 }
 
+ssize_t part_discard_alignment_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+	return sprintf(buf, "%u\n", p->discard_alignment);
+}
+
 ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
@@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
+		   NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_start.attr,
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
+	&dev_attr_discard_alignment.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	p->start_sect = start;
 	p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
+	p->discard_alignment = queue_sector_discard_alignment(disk->queue,
+							      start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 39c601f783a0..1cc02972fbe2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -312,12 +312,15 @@ struct queue_limits {
 	unsigned int		io_min;
 	unsigned int		io_opt;
 	unsigned int		max_discard_sectors;
+	unsigned int		discard_granularity;
+	unsigned int		discard_alignment;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
 	unsigned short		max_phys_segments;
 
 	unsigned char		misaligned;
+	unsigned char		discard_misaligned;
 	unsigned char		no_cluster;
 };
 
@@ -1121,6 +1124,21 @@ static inline int bdev_alignment_offset(struct block_device *bdev)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_discard_alignment(struct request_queue *q)
+{
+	if (q->limits.discard_misaligned)
+		return -1;
+
+	return q->limits.discard_alignment;
+}
+
+static inline int queue_sector_discard_alignment(struct request_queue *q,
+						 sector_t sector)
+{
+	return ((sector << 9) - q->limits.discard_alignment)
+		& (q->limits.discard_granularity - 1);
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45ffd0a..c6c0c41af35f 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -91,6 +91,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
+	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v1.2.3


From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:16 +0900
Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook

This changes detect_intel_iommu() to set intel_iommu_init() to
iommu_init hook if detect_intel_iommu() finds the IOMMU.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: build fix for the !CONFIG_DMAR case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c |  2 --
 drivers/pci/dmar.c        |  4 ++++
 include/linux/dmar.h      | 15 ++++-----------
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 5ca44a9301a0..bed05e2e5890 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -294,8 +294,6 @@ static int __init pci_iommu_init(void)
 
 	x86_init.iommu.iommu_init();
 
-	intel_iommu_init();
-
 	no_iommu_init();
 	return 0;
 }
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 22b02c6df854..bce9cd7c755a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -616,6 +616,10 @@ void __init detect_intel_iommu(void)
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 		    !dmar_disabled)
 			iommu_detected = 1;
+#endif
+#ifdef CONFIG_X86
+		if (ret)
+			x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
 	}
 	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 4a2b162c256a..5de4c9e5856d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -208,16 +208,9 @@ struct dmar_atsr_unit {
 	u8 include_all:1;		/* include all ports */
 };
 
-/* Intel DMAR  initialization functions */
 extern int intel_iommu_init(void);
-#else
-static inline int intel_iommu_init(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	return dmar_dev_scope_init();
-#else
-	return -ENODEV;
-#endif
-}
-#endif /* !CONFIG_DMAR */
+#else /* !CONFIG_DMAR: */
+static inline int intel_iommu_init(void) { return -ENODEV; }
+#endif /* CONFIG_DMAR */
+
 #endif /* __DMAR_H__ */
-- 
cgit v1.2.3


From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:17 +0900
Subject: bootmem: Add free_bootmem_late()

Add a new function for freeing bootmem after the bootmem
allocator has been released and the unreserved pages given to
the page allocator.

This allows us to reserve bootmem and then release it if we
later discover it was not needed.

( This new API will be used by the swiotlb code to recover
  a significant amount of RAM (64MB). )

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
Cc: hannes@cmpxchg.org
Cc: tj@kernel.org
Cc: akpm@linux-foundation.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bootmem.h |  1 +
 mm/bootmem.c            | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dd97fb8408a8..b10ec49ee2dd 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 			      unsigned long addr,
 			      unsigned long size);
 extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void free_bootmem_late(unsigned long addr, unsigned long size);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 555d5d2731c6..d1dc23cc7f10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
 }
 
+/*
+ * free_bootmem_late - free bootmem pages directly to page allocator
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system.  Pages are given directly
+ * to the page allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init free_bootmem_late(unsigned long addr, unsigned long size)
+{
+	unsigned long cursor, end;
+
+	kmemleak_free_part(__va(addr), size);
+
+	cursor = PFN_UP(addr);
+	end = PFN_DOWN(addr + size);
+
+	for (; cursor < end; cursor++) {
+		__free_pages_bootmem(pfn_to_page(cursor), 0);
+		totalram_pages++;
+	}
+}
+
 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
 	int aligned;
-- 
cgit v1.2.3


From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:18 +0900
Subject: swiotlb: Add swiotlb_free() function

swiotlb_free() function frees all allocated memory for swiotlb.

We need to initialize swiotlb before IOMMU initialization (x86
and powerpc needs to allocate memory from bootmem allocator). If
IOMMU initialization is successful, we need to free swiotlb
resource (don't want to waste 64MB).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: build fix for the !CONFIG_SWIOTLB case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  6 ++++++
 lib/swiotlb.c           | 30 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 73b1f1cec423..59bafa690290 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 extern int
 swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
+#ifdef CONFIG_SWIOTLB
+extern void __init swiotlb_free(void);
+#else
+static inline void swiotlb_free(void) { }
+#endif
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ac25cd28e807..eee512b63f17 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr;
  */
 static DEFINE_SPINLOCK(io_tlb_lock);
 
+static int late_alloc;
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
@@ -262,6 +264,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 	swiotlb_print_info(bytes);
 
+	late_alloc = 1;
+
 	return 0;
 
 cleanup4:
@@ -281,6 +285,32 @@ cleanup1:
 	return -ENOMEM;
 }
 
+void __init swiotlb_free(void)
+{
+	if (!io_tlb_overflow_buffer)
+		return;
+
+	if (late_alloc) {
+		free_pages((unsigned long)io_tlb_overflow_buffer,
+			   get_order(io_tlb_overflow));
+		free_pages((unsigned long)io_tlb_orig_addr,
+			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+								 sizeof(int)));
+		free_pages((unsigned long)io_tlb_start,
+			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+	} else {
+		free_bootmem_late(__pa(io_tlb_overflow_buffer),
+				  io_tlb_overflow);
+		free_bootmem_late(__pa(io_tlb_orig_addr),
+				  io_tlb_nslabs * sizeof(phys_addr_t));
+		free_bootmem_late(__pa(io_tlb_list),
+				  io_tlb_nslabs * sizeof(int));
+		free_bootmem_late(__pa(io_tlb_start),
+				  io_tlb_nslabs << IO_TLB_SHIFT);
+	}
+}
+
 static int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= virt_to_phys(io_tlb_start) &&
-- 
cgit v1.2.3


From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:19 +0900
Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info()

This enables us to avoid printing swiotlb memory info when we
initialize swiotlb. After swiotlb initialization, we could find
that we don't need swiotlb.

This patch removes the code to print swiotlb memory info in
swiotlb_init() and exports the function to do that.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
Cc: tony.luck@intel.com
Cc: benh@kernel.crashing.org
LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: merge up conflict ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/kernel/pci-swiotlb.c |  4 ++--
 arch/powerpc/kernel/setup_32.c |  2 +-
 arch/powerpc/kernel/setup_64.c |  2 +-
 arch/x86/kernel/pci-swiotlb.c  |  3 +--
 include/linux/swiotlb.h        |  4 ++--
 lib/swiotlb.c                  | 15 ++++++++-------
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c6..53292abf846c 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = {
 void __init swiotlb_dma_init(void)
 {
 	dma_ops = &swiotlb_dma_ops;
-	swiotlb_init();
+	swiotlb_init(1);
 }
 
 void __init pci_swiotlb_init(void)
@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void)
 		swiotlb = 1;
 		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
 		machvec_init("dig");
-		swiotlb_init();
+		swiotlb_init(1);
 		dma_ops = &swiotlb_dma_ops;
 #else
 		panic("Unable to find Intel IOMMU");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 53bcf3d792db..b152de3e64d4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SWIOTLB
 	if (ppc_swiotlb_enable)
-		swiotlb_init();
+		swiotlb_init(1);
 #endif
 
 	paging_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 04f638d82fb3..df2c9e932b37 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SWIOTLB
 	if (ppc_swiotlb_enable)
-		swiotlb_init();
+		swiotlb_init(1);
 #endif
 
 	paging_init();
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index aaa6b7839f1e..ea20ef7ca523 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -52,8 +52,7 @@ void __init pci_swiotlb_init(void)
 	if (swiotlb_force)
 		swiotlb = 1;
 	if (swiotlb) {
-		printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
-		swiotlb_init();
+		swiotlb_init(0);
 		dma_ops = &swiotlb_dma_ops;
 	}
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 59bafa690290..eb9bdb4d4854 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -20,8 +20,7 @@ struct scatterlist;
  */
 #define IO_TLB_SHIFT 11
 
-extern void
-swiotlb_init(void);
+extern void swiotlb_init(int verbose);
 
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -94,4 +93,5 @@ extern void __init swiotlb_free(void);
 static inline void swiotlb_free(void) { }
 #endif
 
+extern void swiotlb_print_info(void);
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index eee512b63f17..0c12d7cce300 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
-static void swiotlb_print_info(unsigned long bytes)
+void swiotlb_print_info(void)
 {
+	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 	phys_addr_t pstart, pend;
 
 	pstart = virt_to_phys(io_tlb_start);
@@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes)
  * structures for the software IO TLB used to implement the DMA API.
  */
 void __init
-swiotlb_init_with_default_size(size_t default_size)
+swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
 	unsigned long i, bytes;
 
@@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size)
 	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
 	if (!io_tlb_overflow_buffer)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
-
-	swiotlb_print_info(bytes);
+	if (verbose)
+		swiotlb_print_info();
 }
 
 void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
 {
-	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
+	swiotlb_init_with_default_size(64 * (1<<20), verbose);	/* default to 64MB */
 }
 
 /*
@@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	if (!io_tlb_overflow_buffer)
 		goto cleanup4;
 
-	swiotlb_print_info(bytes);
+	swiotlb_print_info();
 
 	late_alloc = 1;
 
-- 
cgit v1.2.3


From cfd5324e699a2e74a44642d43dcf03d581f2a7db Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 4 Nov 2009 09:58:17 +0200
Subject: MFD: TWL4030: Add audio_mclk to the codec platform data

Add audio_mclk to the platform data struct for the
twl4030-codec MFD driver.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/i2c/twl4030.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 42d6c722bd82..c188961efbce 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -414,6 +414,7 @@ struct twl4030_codec_vibra_data {
 };
 
 struct twl4030_codec_data {
+	unsigned int	audio_mclk;
 	struct twl4030_codec_audio_data		*audio;
 	struct twl4030_codec_vibra_data		*vibra;
 };
-- 
cgit v1.2.3


From f9b4639e045c750e2bad37462476403995508350 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 4 Nov 2009 09:58:19 +0200
Subject: MFD: twl4030-codec: APLL_INFREQ handling in the MFD driver

Configure the APLL_INFREQ field in the APLL_CTL register
based on the platform data.
Provide also a function for childs to query the audio_mclk
frequency.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/twl4030-codec.c       | 35 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/twl4030-codec.h |  1 +
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
index 97103078dc2a..77b914907d7c 100644
--- a/drivers/mfd/twl4030-codec.c
+++ b/drivers/mfd/twl4030-codec.c
@@ -41,6 +41,7 @@ struct twl4030_codec_resource {
 };
 
 struct twl4030_codec {
+	unsigned int audio_mclk;
 	struct mutex mutex;
 	struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX];
 	struct mfd_cell cells[TWL4030_CODEC_CELLS];
@@ -145,12 +146,45 @@ int twl4030_codec_disable_resource(unsigned id)
 }
 EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource);
 
+unsigned int twl4030_codec_get_mclk(void)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+
+	return codec->audio_mclk;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_get_mclk);
+
 static int __devinit twl4030_codec_probe(struct platform_device *pdev)
 {
 	struct twl4030_codec *codec;
 	struct twl4030_codec_data *pdata = pdev->dev.platform_data;
 	struct mfd_cell *cell = NULL;
 	int ret, childs = 0;
+	u8 val;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Platform data is missing\n");
+		return -EINVAL;
+	}
+
+	/* Configure APLL_INFREQ and disable APLL if enabled */
+	val = 0;
+	switch (pdata->audio_mclk) {
+	case 19200000:
+		val |= TWL4030_APLL_INFREQ_19200KHZ;
+		break;
+	case 26000000:
+		val |= TWL4030_APLL_INFREQ_26000KHZ;
+		break;
+	case 38400000:
+		val |= TWL4030_APLL_INFREQ_38400KHZ;
+		break;
+	default:
+		dev_err(&pdev->dev, "Invalid audio_mclk\n");
+		return -EINVAL;
+	}
+	twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+					val, TWL4030_REG_APLL_CTL);
 
 	codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL);
 	if (!codec)
@@ -160,6 +194,7 @@ static int __devinit twl4030_codec_probe(struct platform_device *pdev)
 
 	twl4030_codec_dev = pdev;
 	mutex_init(&codec->mutex);
+	codec->audio_mclk = pdata->audio_mclk;
 
 	/* Codec power */
 	codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE;
diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h
index ef0a3041d759..2ec317c68e59 100644
--- a/include/linux/mfd/twl4030-codec.h
+++ b/include/linux/mfd/twl4030-codec.h
@@ -267,5 +267,6 @@ enum twl4030_codec_res {
 
 int twl4030_codec_disable_resource(enum twl4030_codec_res id);
 int twl4030_codec_enable_resource(enum twl4030_codec_res id);
+unsigned int twl4030_codec_get_mclk(void);
 
 #endif	/* End of __TWL4030_CODEC_H__ */
-- 
cgit v1.2.3


From 30fff9231fad757c061285e347b33c5149c2c2e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 Nov 2009 05:26:33 +0000
Subject: udp: bind() optimisation

UDP bind() can be O(N^2) in some pathological cases.

Thanks to secondary hash tables, we can make it O(N)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  6 +++++
 include/net/udp.h   |  3 ++-
 net/ipv4/udp.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++------
 net/ipv6/udp.c      | 14 +++++-----
 4 files changed, 80 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 59f0ddf2d284..03f72a2ba028 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -88,6 +88,12 @@ static inline struct udp_sock *udp_sk(const struct sock *sk)
 	return (struct udp_sock *)sk;
 }
 
+#define udp_portaddr_for_each_entry(__sk, node, list) \
+	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
+
+#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
+	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+
 #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
 
 #endif
diff --git a/include/net/udp.h b/include/net/udp.h
index af41850f742a..5348d80b25bb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -158,7 +158,8 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
 }
 
 extern int	udp_lib_get_port(struct sock *sk, unsigned short snum,
-		int (*)(const struct sock*,const struct sock*));
+		int (*)(const struct sock *,const struct sock *),
+		unsigned int hash2_nulladdr);
 
 /* net/ipv4/udp.c */
 extern int	udp_get_port(struct sock *sk, unsigned short snum,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d73e9170536b..1eaf57567ebf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -152,16 +152,49 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 	return 0;
 }
 
+/*
+ * Note: we still hold spinlock of primary hash chain, so no other writer
+ * can insert/delete a socket with local_port == num
+ */
+static int udp_lib_lport_inuse2(struct net *net, __u16 num,
+			       struct udp_hslot *hslot2,
+			       struct sock *sk,
+			       int (*saddr_comp)(const struct sock *sk1,
+						 const struct sock *sk2))
+{
+	struct sock *sk2;
+	struct hlist_nulls_node *node;
+	int res = 0;
+
+	spin_lock(&hslot2->lock);
+	udp_portaddr_for_each_entry(sk2, node, &hslot2->head)
+		if (net_eq(sock_net(sk2), net)			&&
+		    sk2 != sk					&&
+		    (udp_sk(sk2)->udp_port_hash == num)		&&
+		    (!sk2->sk_reuse || !sk->sk_reuse)		&&
+		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (*saddr_comp)(sk, sk2)) {
+			res = 1;
+			break;
+		}
+	spin_unlock(&hslot2->lock);
+	return res;
+}
+
 /**
  *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
  *
  *  @sk:          socket struct in question
  *  @snum:        port number to look up
  *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
+ *  @hash2_nulladdr: AF-dependant hash value in secondary hash chains,
+ *                   with NULL address
  */
 int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		       int (*saddr_comp)(const struct sock *sk1,
-					 const struct sock *sk2))
+					 const struct sock *sk2),
+		     unsigned int hash2_nulladdr)
 {
 	struct udp_hslot *hslot, *hslot2;
 	struct udp_table *udptable = sk->sk_prot->h.udp_table;
@@ -210,6 +243,30 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	} else {
 		hslot = udp_hashslot(udptable, net, snum);
 		spin_lock_bh(&hslot->lock);
+		if (hslot->count > 10) {
+			int exist;
+			unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
+
+			slot2          &= udptable->mask;
+			hash2_nulladdr &= udptable->mask;
+
+			hslot2 = udp_hashslot2(udptable, slot2);
+			if (hslot->count < hslot2->count)
+				goto scan_primary_hash;
+
+			exist = udp_lib_lport_inuse2(net, snum, hslot2,
+						     sk, saddr_comp);
+			if (!exist && (hash2_nulladdr != slot2)) {
+				hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
+				exist = udp_lib_lport_inuse2(net, snum, hslot2,
+							     sk, saddr_comp);
+			}
+			if (exist)
+				goto fail_unlock;
+			else
+				goto found;
+		}
+scan_primary_hash:
 		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
 					saddr_comp, 0))
 			goto fail_unlock;
@@ -255,12 +312,14 @@ static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
 
 int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
+	unsigned int hash2_nulladdr =
+		udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum);
+	unsigned int hash2_partial =
+		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
+
 	/* precompute partial secondary hash */
-	udp_sk(sk)->udp_portaddr_hash =
-		udp4_portaddr_hash(sock_net(sk),
-				   inet_sk(sk)->inet_rcv_saddr,
-				   0);
-	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
+	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
 static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
@@ -336,8 +395,6 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 	return score;
 }
 
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
-	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
 
 /* called with read_rcu_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2915e1dad726..f4c85b200051 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -100,12 +100,14 @@ static unsigned int udp6_portaddr_hash(struct net *net,
 
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
+	unsigned int hash2_nulladdr =
+		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+	unsigned int hash2_partial = 
+		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+
 	/* precompute partial secondary hash */
-	udp_sk(sk)->udp_portaddr_hash =
-		udp6_portaddr_hash(sock_net(sk),
-				   &inet6_sk(sk)->rcv_saddr,
-				   0);
-	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
+	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
 static inline int compute_score(struct sock *sk, struct net *net,
@@ -181,8 +183,6 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 	return score;
 }
 
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
-	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
 
 /* called with read_rcu_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
-- 
cgit v1.2.3


From 37e8273cd30592d3a82bcb70cbb1bdc4eaeb6b71 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 4 Nov 2009 15:29:52 +0000
Subject: usbnet: Set link down initially for drivers that update link state

Some usbnet drivers update link state while others do not due to
hardware limitations.  Add a flag to distinguish those that do, and
set the link down initially for their devices.

This is intended to fix this bug: http://bugs.debian.org/444043

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix.c      | 12 ++++++------
 drivers/net/usb/cdc_ether.c |  2 +-
 drivers/net/usb/dm9601.c    |  2 +-
 drivers/net/usb/usbnet.c    |  4 +++-
 include/linux/usb/usbnet.h  |  1 +
 5 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 6ce7f775bb74..1bef39a60a62 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -1327,7 +1327,7 @@ static const struct driver_info ax8817x_info = {
 	.status = asix_status,
 	.link_reset = ax88172_link_reset,
 	.reset = ax88172_link_reset,
-	.flags =  FLAG_ETHER,
+	.flags =  FLAG_ETHER | FLAG_LINK_INTR,
 	.data = 0x00130103,
 };
 
@@ -1337,7 +1337,7 @@ static const struct driver_info dlink_dub_e100_info = {
 	.status = asix_status,
 	.link_reset = ax88172_link_reset,
 	.reset = ax88172_link_reset,
-	.flags =  FLAG_ETHER,
+	.flags =  FLAG_ETHER | FLAG_LINK_INTR,
 	.data = 0x009f9d9f,
 };
 
@@ -1347,7 +1347,7 @@ static const struct driver_info netgear_fa120_info = {
 	.status = asix_status,
 	.link_reset = ax88172_link_reset,
 	.reset = ax88172_link_reset,
-	.flags =  FLAG_ETHER,
+	.flags =  FLAG_ETHER | FLAG_LINK_INTR,
 	.data = 0x00130103,
 };
 
@@ -1357,7 +1357,7 @@ static const struct driver_info hawking_uf200_info = {
 	.status = asix_status,
 	.link_reset = ax88172_link_reset,
 	.reset = ax88172_link_reset,
-	.flags =  FLAG_ETHER,
+	.flags =  FLAG_ETHER | FLAG_LINK_INTR,
 	.data = 0x001f1d1f,
 };
 
@@ -1367,7 +1367,7 @@ static const struct driver_info ax88772_info = {
 	.status = asix_status,
 	.link_reset = ax88772_link_reset,
 	.reset = ax88772_link_reset,
-	.flags = FLAG_ETHER | FLAG_FRAMING_AX,
+	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR,
 	.rx_fixup = asix_rx_fixup,
 	.tx_fixup = asix_tx_fixup,
 };
@@ -1378,7 +1378,7 @@ static const struct driver_info ax88178_info = {
 	.status = asix_status,
 	.link_reset = ax88178_link_reset,
 	.reset = ax88178_link_reset,
-	.flags = FLAG_ETHER | FLAG_FRAMING_AX,
+	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR,
 	.rx_fixup = asix_rx_fixup,
 	.tx_fixup = asix_tx_fixup,
 };
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 71d7ff3de99f..7ec24c9b2535 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -413,7 +413,7 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 
 static const struct driver_info	cdc_info = {
 	.description =	"CDC Ethernet Device",
-	.flags =	FLAG_ETHER,
+	.flags =	FLAG_ETHER | FLAG_LINK_INTR,
 	// .check_connect = cdc_check_connect,
 	.bind =		cdc_bind,
 	.unbind =	usbnet_cdc_unbind,
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index a2b30a10064f..3d406f9b2f29 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -611,7 +611,7 @@ static int dm9601_link_reset(struct usbnet *dev)
 
 static const struct driver_info dm9601_info = {
 	.description	= "Davicom DM9601 USB Ethernet",
-	.flags		= FLAG_ETHER,
+	.flags		= FLAG_ETHER | FLAG_LINK_INTR,
 	.bind		= dm9601_bind,
 	.rx_fixup	= dm9601_rx_fixup,
 	.tx_fixup	= dm9601_tx_fixup,
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 378da8c938fe..04f3f289e87c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1352,9 +1352,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	// ok, it's ready to go.
 	usb_set_intfdata (udev, dev);
 
-	// start as if the link is up
 	netif_device_attach (net);
 
+	if (dev->driver_info->flags & FLAG_LINK_INTR)
+		netif_carrier_off(net);
+
 	return 0;
 
 out3:
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 86c31b753266..8c84881f8478 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -92,6 +92,7 @@ struct driver_info {
 #define FLAG_SEND_ZLP	0x0200		/* hw requires ZLPs are sent */
 #define FLAG_WWAN	0x0400		/* use "wwan%d" names */
 
+#define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3


From bf3204cbff7d2606e758afb0994e8da6ae1c6c26 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 6 Nov 2009 21:39:07 -0800
Subject: Input: fix locking in memoryless force-feedback devices

Now that input core acquires dev->event_lock spinlock and disables
interrupts when propagating input events, using spin_lock_bh() in
ff-memless driver is not allowed. Actually, the timer_lock itself
is not needed anymore, we should simply use dev->event_lock
as well.

Also do a small cleanup in force-feedback core.

Reported-by: kerneloops.org
Reported-by: http://www.kerneloops.org/searchweek.php?search=ml_ff_set_gain
Reported-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/ff-core.c    | 20 +++++++++++---------
 drivers/input/ff-memless.c | 26 +++++++++++---------------
 include/linux/input.h      |  4 ++++
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c
index 72c63e5dd630..38df81fcdc3a 100644
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -337,16 +337,16 @@ int input_ff_create(struct input_dev *dev, int max_effects)
 	dev->ff = ff;
 	dev->flush = flush_effects;
 	dev->event = input_ff_event;
-	set_bit(EV_FF, dev->evbit);
+	__set_bit(EV_FF, dev->evbit);
 
 	/* Copy "true" bits into ff device bitmap */
 	for (i = 0; i <= FF_MAX; i++)
 		if (test_bit(i, dev->ffbit))
-			set_bit(i, ff->ffbit);
+			__set_bit(i, ff->ffbit);
 
 	/* we can emulate RUMBLE with periodic effects */
 	if (test_bit(FF_PERIODIC, ff->ffbit))
-		set_bit(FF_RUMBLE, dev->ffbit);
+		__set_bit(FF_RUMBLE, dev->ffbit);
 
 	return 0;
 }
@@ -362,12 +362,14 @@ EXPORT_SYMBOL_GPL(input_ff_create);
  */
 void input_ff_destroy(struct input_dev *dev)
 {
-	clear_bit(EV_FF, dev->evbit);
-	if (dev->ff) {
-		if (dev->ff->destroy)
-			dev->ff->destroy(dev->ff);
-		kfree(dev->ff->private);
-		kfree(dev->ff);
+	struct ff_device *ff = dev->ff;
+
+	__clear_bit(EV_FF, dev->evbit);
+	if (ff) {
+		if (ff->destroy)
+			ff->destroy(ff);
+		kfree(ff->private);
+		kfree(ff);
 		dev->ff = NULL;
 	}
 }
diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c
index 2d1415e16834..b483b2995fa9 100644
--- a/drivers/input/ff-memless.c
+++ b/drivers/input/ff-memless.c
@@ -61,7 +61,6 @@ struct ml_device {
 	struct ml_effect_state states[FF_MEMLESS_EFFECTS];
 	int gain;
 	struct timer_list timer;
-	spinlock_t timer_lock;
 	struct input_dev *dev;
 
 	int (*play_effect)(struct input_dev *dev, void *data,
@@ -368,38 +367,38 @@ static void ml_effect_timer(unsigned long timer_data)
 {
 	struct input_dev *dev = (struct input_dev *)timer_data;
 	struct ml_device *ml = dev->ff->private;
+	unsigned long flags;
 
 	debug("timer: updating effects");
 
-	spin_lock(&ml->timer_lock);
+	spin_lock_irqsave(&dev->event_lock, flags);
 	ml_play_effects(ml);
-	spin_unlock(&ml->timer_lock);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
+/*
+ * Sets requested gain for FF effects. Called with dev->event_lock held.
+ */
 static void ml_ff_set_gain(struct input_dev *dev, u16 gain)
 {
 	struct ml_device *ml = dev->ff->private;
 	int i;
 
-	spin_lock_bh(&ml->timer_lock);
-
 	ml->gain = gain;
 
 	for (i = 0; i < FF_MEMLESS_EFFECTS; i++)
 		__clear_bit(FF_EFFECT_PLAYING, &ml->states[i].flags);
 
 	ml_play_effects(ml);
-
-	spin_unlock_bh(&ml->timer_lock);
 }
 
+/*
+ * Start/stop specified FF effect. Called with dev->event_lock held.
+ */
 static int ml_ff_playback(struct input_dev *dev, int effect_id, int value)
 {
 	struct ml_device *ml = dev->ff->private;
 	struct ml_effect_state *state = &ml->states[effect_id];
-	unsigned long flags;
-
-	spin_lock_irqsave(&ml->timer_lock, flags);
 
 	if (value > 0) {
 		debug("initiated play");
@@ -425,8 +424,6 @@ static int ml_ff_playback(struct input_dev *dev, int effect_id, int value)
 		ml_play_effects(ml);
 	}
 
-	spin_unlock_irqrestore(&ml->timer_lock, flags);
-
 	return 0;
 }
 
@@ -436,7 +433,7 @@ static int ml_ff_upload(struct input_dev *dev,
 	struct ml_device *ml = dev->ff->private;
 	struct ml_effect_state *state = &ml->states[effect->id];
 
-	spin_lock_bh(&ml->timer_lock);
+	spin_lock_irq(&dev->event_lock);
 
 	if (test_bit(FF_EFFECT_STARTED, &state->flags)) {
 		__clear_bit(FF_EFFECT_PLAYING, &state->flags);
@@ -448,7 +445,7 @@ static int ml_ff_upload(struct input_dev *dev,
 		ml_schedule_timer(ml);
 	}
 
-	spin_unlock_bh(&ml->timer_lock);
+	spin_unlock_irq(&dev->event_lock);
 
 	return 0;
 }
@@ -482,7 +479,6 @@ int input_ff_create_memless(struct input_dev *dev, void *data,
 	ml->private = data;
 	ml->play_effect = play_effect;
 	ml->gain = 0xffff;
-	spin_lock_init(&ml->timer_lock);
 	setup_timer(&ml->timer, ml_effect_timer, (unsigned long)dev);
 
 	set_bit(FF_GAIN, dev->ffbit);
diff --git a/include/linux/input.h b/include/linux/input.h
index 0ccfc30cd40f..c2b1a7d244d9 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1377,6 +1377,10 @@ extern struct class input_class;
  * methods; erase() is optional. set_gain() and set_autocenter() need
  * only be implemented if driver sets up FF_GAIN and FF_AUTOCENTER
  * bits.
+ *
+ * Note that playback(), set_gain() and set_autocenter() are called with
+ * dev->event_lock spinlock held and interrupts off and thus may not
+ * sleep.
  */
 struct ff_device {
 	int (*upload)(struct input_dev *dev, struct ff_effect *effect,
-- 
cgit v1.2.3


From 254245d23396aca1f9100d500163d7bd6019ab6f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 10 Nov 2009 07:54:47 +0000
Subject: netdev: add netdev_continue_rcu

This adds an RCU macro for continuing search, useful for some
network devices like vlan.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 include/linux/rculist.h   | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 465add6c43e3..083b5989cecb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1079,6 +1079,8 @@ extern rwlock_t				dev_base_lock;		/* Device list lock */
 		list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_continue(net, d)		\
 		list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
+#define for_each_netdev_continue_rcu(net, d)		\
+	list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
 #define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
 
 static inline struct net_device *next_net_device(struct net_device *dev)
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 5710f43bbc9e..1bf0f708c4fc 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -261,6 +261,20 @@ static inline void list_splice_init_rcu(struct list_head *list,
 		prefetch((pos)->next), (pos) != (head); \
 		(pos) = rcu_dereference((pos)->next))
 
+/**
+ * list_for_each_entry_continue_rcu - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_rcu(pos, head, member) 		\
+	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
+	     prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
+
 /**
  * hlist_del_rcu - deletes entry from hash list without re-initialization
  * @n: the element to delete from the hash list.
-- 
cgit v1.2.3


From 2315ffa0a9f789c588c7139effa7404a387d8685 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Apr 2009 03:18:02 -0700
Subject: sysctl: Don't look at ctl_name and strategy in the generic code

The ctl_name and strategy fields are unused, now that sys_sysctl
is a compatibility wrapper around /proc/sys.  No longer looking
at them in the generic code is effectively what we are doing
now and provides the guarantee that during further cleanups
we can just remove references to those fields and everything
will work ok.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/proc_sysctl.c  |  4 ++--
 include/linux/sysctl.h | 17 ++---------------
 kernel/sysctl.c        | 29 +++++++----------------------
 3 files changed, 11 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f667e8aeabdf..6ff9981f0a18 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -48,7 +48,7 @@ out:
 static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
 {
 	int len;
-	for ( ; p->ctl_name || p->procname; p++) {
+	for ( ; p->procname; p++) {
 
 		if (!p->procname)
 			continue;
@@ -218,7 +218,7 @@ static int scan(struct ctl_table_header *head, ctl_table *table,
 		void *dirent, filldir_t filldir)
 {
 
-	for (; table->ctl_name || table->procname; table++, (*pos)++) {
+	for (; table->procname; table++, (*pos)++) {
 		int res;
 
 		/* Can't do anything without a proc name */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 82c32b89932d..7c4aabc04673 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1005,8 +1005,8 @@ extern ctl_handler sysctl_ms_jiffies;
 
 /*
  * Register a set of sysctl names by calling register_sysctl_table
- * with an initialised array of struct ctl_table's.  An entry with zero
- * ctl_name and NULL procname terminates the table.  table->de will be
+ * with an initialised array of struct ctl_table's.  An entry with 
+ * NULL procname terminates the table.  table->de will be
  * set up by the registration and need not be initialised in advance.
  *
  * sysctl names can be mirrored automatically under /proc/sys.  The
@@ -1019,24 +1019,11 @@ extern ctl_handler sysctl_ms_jiffies;
  * under /proc; non-leaf nodes will be represented by directories.  A
  * null procname disables /proc mirroring at this node.
  *
- * sysctl entries with a zero ctl_name will not be available through
- * the binary sysctl interface.
- *
  * sysctl(2) can automatically manage read and write requests through
  * the sysctl table.  The data and maxlen fields of the ctl_table
  * struct enable minimal validation of the values being written to be
  * performed, and the mode field allows minimal authentication.
  * 
- * More sophisticated management can be enabled by the provision of a
- * strategy routine with the table entry.  This will be called before
- * any automatic read or write of the data is performed.
- * 
- * The strategy routine may return:
- * <0: Error occurred (error is passed to user process)
- * 0:  OK - proceed with automatic read or write.
- * >0: OK - read or write has been done by the strategy routine, so 
- *     return immediately.
- * 
  * There must be a proc_handler routine for any terminal nodes
  * mirrored under /proc/sys (non-terminals are handled by a built-in
  * directory handler).  Several default handlers are available to
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f6dacc383c16..b6b6eb1abebb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1618,7 +1618,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
 
 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
 {
-	for (; table->ctl_name || table->procname; table++) {
+	for (; table->procname; table++) {
 		table->parent = parent;
 		if (table->child)
 			sysctl_set_parent(table, table->child);
@@ -1650,11 +1650,11 @@ static struct ctl_table *is_branch_in(struct ctl_table *branch,
 		return NULL;
 
 	/* ... and nothing else */
-	if (branch[1].procname || branch[1].ctl_name)
+	if (branch[1].procname)
 		return NULL;
 
 	/* table should contain subdirectory with the same name */
-	for (p = table; p->procname || p->ctl_name; p++) {
+	for (p = table; p->procname; p++) {
 		if (!p->child)
 			continue;
 		if (p->procname && strcmp(p->procname, s) == 0)
@@ -1699,8 +1699,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  *
  * The members of the &struct ctl_table structure are used as follows:
  *
- * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
- *            must be unique within that level of sysctl
+ * ctl_name - Dead
  *
  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
  *            enter a sysctl file
@@ -1716,7 +1715,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  *
  * proc_handler - the text handler routine (described below)
  *
- * strategy - the strategy routine (described below)
+ * strategy - Dead
  *
  * de - for internal use by the sysctl routines
  *
@@ -1730,19 +1729,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  * struct enable minimal validation of the values being written to be
  * performed, and the mode field allows minimal authentication.
  *
- * More sophisticated management can be enabled by the provision of a
- * strategy routine with the table entry.  This will be called before
- * any automatic read or write of the data is performed.
- *
- * The strategy routine may return
- *
- * < 0 - Error occurred (error is passed to user process)
- *
- * 0   - OK - proceed with automatic read or write.
- *
- * > 0 - OK - read or write has been done by the strategy routine, so
- *       return immediately.
- *
  * There must be a proc_handler routine for any terminal nodes
  * mirrored under /proc/sys (non-terminals are handled by a built-in
  * directory handler).  Several default handlers are available to
@@ -1769,13 +1755,13 @@ struct ctl_table_header *__register_sysctl_paths(
 	struct ctl_table_set *set;
 
 	/* Count the path components */
-	for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
+	for (npath = 0; path[npath].procname; ++npath)
 		;
 
 	/*
 	 * For each path component, allocate a 2-element ctl_table array.
 	 * The first array element will be filled with the sysctl entry
-	 * for this, the second will be the sentinel (ctl_name == 0).
+	 * for this, the second will be the sentinel (procname == 0).
 	 *
 	 * We allocate everything in one go so that we don't have to
 	 * worry about freeing additional memory in unregister_sysctl_table.
@@ -1792,7 +1778,6 @@ struct ctl_table_header *__register_sysctl_paths(
 	for (n = 0; n < npath; ++n, ++path) {
 		/* Copy the procname */
 		new->procname = path->procname;
-		new->ctl_name = path->ctl_name;
 		new->mode     = 0555;
 
 		*prevp = new;
-- 
cgit v1.2.3


From fe8bc91c4c30122b357d197117705cfd4fabaf28 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 16 Oct 2009 19:26:15 +0200
Subject: ext3: Wait for proper transaction commit on fsync

We cannot rely on buffer dirty bits during fsync because pdflush can come
before fsync is called and clear dirty bits without forcing a transaction
commit. What we do is that we track which transaction has last changed
the inode and which transaction last changed allocation and force it to
disk on fsync.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext3/fsync.c           | 36 ++++++++++++++++--------------------
 fs/ext3/inode.c           | 32 +++++++++++++++++++++++++++++++-
 fs/ext3/super.c           |  2 ++
 include/linux/ext3_fs_i.h |  8 ++++++++
 4 files changed, 57 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 451d166bbe93..8209f266e9ad 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -46,19 +46,21 @@
 int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
 	int ret = 0;
+	tid_t commit_tid;
+
+	if (inode->i_sb->s_flags & MS_RDONLY)
+		return 0;
 
 	J_ASSERT(ext3_journal_current_handle() == NULL);
 
 	/*
-	 * data=writeback:
+	 * data=writeback,ordered:
 	 *  The caller's filemap_fdatawrite()/wait will sync the data.
-	 *  sync_inode() will sync the metadata
-	 *
-	 * data=ordered:
-	 *  The caller's filemap_fdatawrite() will write the data and
-	 *  sync_inode() will write the inode if it is dirty.  Then the caller's
-	 *  filemap_fdatawait() will wait on the pages.
+	 *  Metadata is in the journal, we wait for a proper transaction
+	 *  to commit here.
 	 *
 	 * data=journal:
 	 *  filemap_fdatawrite won't do anything (the buffers are clean).
@@ -73,22 +75,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 		goto out;
 	}
 
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		goto flush;
+	if (datasync)
+		commit_tid = atomic_read(&ei->i_datasync_tid);
+	else
+		commit_tid = atomic_read(&ei->i_sync_tid);
 
-	/*
-	 * The VFS has written the file data.  If the inode is unaltered
-	 * then we need not start a commit.
-	 */
-	if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
-		struct writeback_control wbc = {
-			.sync_mode = WB_SYNC_ALL,
-			.nr_to_write = 0, /* sys_fsync did this */
-		};
-		ret = sync_inode(inode, &wbc);
+	if (log_start_commit(journal, commit_tid)) {
+		log_wait_commit(journal, commit_tid);
 		goto out;
 	}
-flush:
+
 	/*
 	 * In case we didn't commit a transaction, we have to flush
 	 * disk caches manually so that data really is on persistent
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 069a163393b4..354ed3b47b30 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -699,8 +699,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	int err = 0;
 	struct ext3_block_alloc_info *block_i;
 	ext3_fsblk_t current_block;
+	struct ext3_inode_info *ei = EXT3_I(inode);
 
-	block_i = EXT3_I(inode)->i_block_alloc_info;
+	block_i = ei->i_block_alloc_info;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -741,6 +742,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	ext3_mark_inode_dirty(handle, inode);
+	/* ext3_mark_inode_dirty already updated i_sync_tid */
+	atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
 
 	/* had we spliced it onto indirect block? */
 	if (where->bh) {
@@ -2754,6 +2757,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 	struct ext3_inode_info *ei;
 	struct buffer_head *bh;
 	struct inode *inode;
+	journal_t *journal = EXT3_SB(sb)->s_journal;
+	transaction_t *transaction;
 	long ret;
 	int block;
 
@@ -2831,6 +2836,30 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
 
+	/*
+	 * Set transaction id's of transactions that have to be committed
+	 * to finish f[data]sync. We set them to currently running transaction
+	 * as we cannot be sure that the inode or some of its metadata isn't
+	 * part of the transaction - the inode could have been reclaimed and
+	 * now it is reread from disk.
+	 */
+	if (journal) {
+		tid_t tid;
+
+		spin_lock(&journal->j_state_lock);
+		if (journal->j_running_transaction)
+			transaction = journal->j_running_transaction;
+		else
+			transaction = journal->j_committing_transaction;
+		if (transaction)
+			tid = transaction->t_tid;
+		else
+			tid = journal->j_commit_sequence;
+		spin_unlock(&journal->j_state_lock);
+		atomic_set(&ei->i_sync_tid, tid);
+		atomic_set(&ei->i_datasync_tid, tid);
+	}
+
 	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
 	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
 		/*
@@ -3015,6 +3044,7 @@ again:
 		err = rc;
 	ei->i_state &= ~EXT3_STATE_NEW;
 
+	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
 out_brelse:
 	brelse (bh);
 	ext3_std_error(inode->i_sb, err);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7a520a862f49..427496c4767c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -466,6 +466,8 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 		return NULL;
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
+	atomic_set(&ei->i_datasync_tid, 0);
+	atomic_set(&ei->i_sync_tid, 0);
 	return &ei->vfs_inode;
 }
 
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index ca1bfe90004f..93e7428156ba 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -137,6 +137,14 @@ struct ext3_inode_info {
 	 * by other means, so we have truncate_mutex.
 	 */
 	struct mutex truncate_mutex;
+
+	/*
+	 * Transactions that contain inode's metadata needed to complete
+	 * fsync and fdatasync, respectively.
+	 */
+	atomic_t i_sync_tid;
+	atomic_t i_datasync_tid;
+
 	struct inode vfs_inode;
 };
 
-- 
cgit v1.2.3


From 3491707a070c1183c709516b2f876f798c7a9a84 Mon Sep 17 00:00:00 2001
From: Rui Paulo <rpaulo@gmail.com>
Date: Mon, 9 Nov 2009 23:46:39 +0000
Subject: mac80211: update meshconf IE

This updates the Mesh Configuration IE according to the latest
draft (3.03).
Notable changes include the simplified protocol IDs.

Signed-off-by: Rui Paulo <rpaulo@gmail.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Andrey Yurovsky <andrey@cozybit.com>
Tested-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  2 +-
 net/mac80211/ieee80211_i.h | 10 +++----
 net/mac80211/mesh.c        | 69 +++++++++++++++++++---------------------------
 3 files changed, 35 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 0aa831467493..50c684db33c7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -115,7 +115,7 @@
 #define IEEE80211_MAX_SSID_LEN		32
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
-#define IEEE80211_MESH_CONFIG_LEN	24
+#define IEEE80211_MESH_CONFIG_LEN	7
 
 #define IEEE80211_QOS_CTL_LEN		2
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1ef767366b77..1f4f88a8f80c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -361,15 +361,15 @@ struct ieee80211_if_mesh {
 	u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
 	size_t mesh_id_len;
 	/* Active Path Selection Protocol Identifier */
-	u8 mesh_pp_id[4];
+	u8 mesh_pp_id;
 	/* Active Path Selection Metric Identifier */
-	u8 mesh_pm_id[4];
+	u8 mesh_pm_id;
 	/* Congestion Control Mode Identifier */
-	u8 mesh_cc_id[4];
+	u8 mesh_cc_id;
 	/* Synchronization Protocol Identifier */
-	u8 mesh_sp_id[4];
+	u8 mesh_sp_id;
 	/* Authentication Protocol Identifier */
-	u8 mesh_auth_id[4];
+	u8 mesh_auth_id;
 	/* Local mesh Destination Sequence Number */
 	u32 dsn;
 	/* Last used PREQ ID */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9a733890eb47..a49a3374acb1 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -15,14 +15,14 @@
 #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
 #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
 
-#define PP_OFFSET 	1		/* Path Selection Protocol */
-#define PM_OFFSET	5		/* Path Selection Metric   */
-#define CC_OFFSET	9		/* Congestion Control Mode */
-#define SP_OFFSET	13		/* Synchronization Protocol */
-#define AUTH_OFFSET	17		/* Authentication Protocol */
-#define CAPAB_OFFSET 	22
-#define CAPAB_ACCEPT_PLINKS 0x80
-#define CAPAB_FORWARDING    0x10
+#define MESHCONF_PP_OFFSET 	0		/* Path Selection Protocol */
+#define MESHCONF_PM_OFFSET	1		/* Path Selection Metric   */
+#define MESHCONF_CC_OFFSET	2		/* Congestion Control Mode */
+#define MESHCONF_SP_OFFSET	3		/* Synchronization Protocol */
+#define MESHCONF_AUTH_OFFSET	4		/* Authentication Protocol */
+#define MESHCONF_CAPAB_OFFSET 	6
+#define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01
+#define MESHCONF_CAPAB_FORWARDING    0x08
 
 #define TMR_RUNNING_HK	0
 #define TMR_RUNNING_MP	1
@@ -85,11 +85,12 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
 	 */
 	if (ifmsh->mesh_id_len == ie->mesh_id_len &&
 		memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 &&
-		memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 &&
-		memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 &&
-		memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0 &&
-		memcmp(ifmsh->mesh_sp_id, ie->mesh_config + SP_OFFSET, 4) == 0 &&
-		memcmp(ifmsh->mesh_auth_id, ie->mesh_config + AUTH_OFFSET, 4) == 0)
+		(ifmsh->mesh_pp_id == *(ie->mesh_config + MESHCONF_PP_OFFSET))&&
+		(ifmsh->mesh_pm_id == *(ie->mesh_config + MESHCONF_PM_OFFSET))&&
+		(ifmsh->mesh_cc_id == *(ie->mesh_config + MESHCONF_CC_OFFSET))&&
+		(ifmsh->mesh_sp_id == *(ie->mesh_config + MESHCONF_SP_OFFSET))&&
+		(ifmsh->mesh_auth_id == *(ie->mesh_config +
+		    MESHCONF_AUTH_OFFSET)))
 		return true;
 
 	return false;
@@ -102,7 +103,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
  */
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
-	return (*(ie->mesh_config + CAPAB_OFFSET) & CAPAB_ACCEPT_PLINKS) != 0;
+	return (*(ie->mesh_config + MESHCONF_CAPAB_OFFSET) &
+	    MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
 }
 
 /**
@@ -128,18 +130,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 
 void mesh_ids_set_default(struct ieee80211_if_mesh *sta)
 {
-	u8 oui[3] = {0x00, 0x0F, 0xAC};
-
-	memcpy(sta->mesh_pp_id, oui, sizeof(oui));
-	memcpy(sta->mesh_pm_id, oui, sizeof(oui));
-	memcpy(sta->mesh_cc_id, oui, sizeof(oui));
-	memcpy(sta->mesh_sp_id, oui, sizeof(oui));
-	memcpy(sta->mesh_auth_id, oui, sizeof(oui));
-	sta->mesh_pp_id[sizeof(oui)] = 0;
-	sta->mesh_pm_id[sizeof(oui)] = 0;
-	sta->mesh_cc_id[sizeof(oui)] = 0xff;
-	sta->mesh_sp_id[sizeof(oui)] = 0xff;
-	sta->mesh_auth_id[sizeof(oui)] = 0x0;
+	sta->mesh_pp_id = 0;	/* HWMP */
+	sta->mesh_pm_id = 0;	/* Airtime */
+	sta->mesh_cc_id = 0;	/* Disabled */
+	sta->mesh_sp_id = 0;	/* Neighbor Offset */
+	sta->mesh_auth_id = 0;	/* Disabled */
 }
 
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
@@ -260,28 +255,21 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN);
 	*pos++ = WLAN_EID_MESH_CONFIG;
 	*pos++ = IEEE80211_MESH_CONFIG_LEN;
-	/* Version */
-	*pos++ = 1;
 
 	/* Active path selection protocol ID */
-	memcpy(pos, sdata->u.mesh.mesh_pp_id, 4);
-	pos += 4;
+	*pos++ = sdata->u.mesh.mesh_pp_id;
 
 	/* Active path selection metric ID   */
-	memcpy(pos, sdata->u.mesh.mesh_pm_id, 4);
-	pos += 4;
+	*pos++ = sdata->u.mesh.mesh_pm_id;
 
 	/* Congestion control mode identifier */
-	memcpy(pos, sdata->u.mesh.mesh_cc_id, 4);
-	pos += 4;
+	*pos++ = sdata->u.mesh.mesh_cc_id;
 
 	/* Synchronization protocol identifier */
-	memcpy(pos, sdata->u.mesh.mesh_sp_id, 4);
-	pos += 4;
+	*pos++ = sdata->u.mesh.mesh_sp_id;
 
 	/* Authentication Protocol identifier */
-	memcpy(pos, sdata->u.mesh.mesh_auth_id, 4);
-	pos += 4;
+	*pos++ = sdata->u.mesh.mesh_auth_id;
 
 	/* Mesh Formation Info */
 	memset(pos, 0x00, 1);
@@ -289,8 +277,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 
 	/* Mesh capability */
 	sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata);
-	*pos = CAPAB_FORWARDING;
-	*pos++ |= sdata->u.mesh.accepting_plinks ? CAPAB_ACCEPT_PLINKS : 0x00;
+	*pos = MESHCONF_CAPAB_FORWARDING;
+	*pos++ |= sdata->u.mesh.accepting_plinks ?
+	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
 
 	return;
-- 
cgit v1.2.3


From 8b787643ca0a5130c647109d77fe512f89cfa611 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 10 Nov 2009 18:53:10 +0100
Subject: nl80211: add a parameter for using 4-address frames on virtual
 interfaces

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/cfg80211.h  |  2 ++
 net/wireless/nl80211.c  | 11 +++++++++++
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 50afca3dcff1..203adef972cf 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -584,6 +584,8 @@ enum nl80211_commands {
  *	changed then the list changed and the dump should be repeated
  *	completely from scratch.
  *
+ * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -714,6 +716,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PID,
 
+	NL80211_ATTR_4ADDR,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ff67865de231..1ee41e4a92e2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -206,10 +206,12 @@ struct ieee80211_supported_band {
  * struct vif_params - describes virtual interface parameters
  * @mesh_id: mesh ID to use
  * @mesh_id_len: length of the mesh ID
+ * @use_4addr: use 4-address frames
  */
 struct vif_params {
        u8 *mesh_id;
        int mesh_id_len;
+       int use_4addr;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8ed62b6c172b..8c8e4eae6a17 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -138,6 +138,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 	[NL80211_ATTR_PID] = { .type = NLA_U32 },
+	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
 };
 
 /* policy for the attributes */
@@ -987,6 +988,13 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		change = true;
 	}
 
+	if (info->attrs[NL80211_ATTR_4ADDR]) {
+		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
+		change = true;
+	} else {
+		params.use_4addr = -1;
+	}
+
 	if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
 		if (ntype != NL80211_IFTYPE_MONITOR) {
 			err = -EINVAL;
@@ -1053,6 +1061,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
 	}
 
+	if (info->attrs[NL80211_ATTR_4ADDR])
+		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
+
 	err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
 				  info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
 				  &flags);
-- 
cgit v1.2.3


From 87ec0e98cfdd8b68da6a7f9e70142ffc0e404fbb Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 12 Oct 2009 20:49:25 +0400
Subject: spi_mpc8xxx: Turn qe_mode into flags

Soon there will be more flags introduced in subsequent patches, so
let's turn qe_mode into flags.

Also introduce mpc8xxx_spi_strmode() and print current SPI mode.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 drivers/spi/spi_mpc8xxx.c   | 30 +++++++++++++++++++-----------
 include/linux/fsl_devices.h |  2 +-
 2 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c
index 4b119eaf4e6b..80374dfa9708 100644
--- a/drivers/spi/spi_mpc8xxx.c
+++ b/drivers/spi/spi_mpc8xxx.c
@@ -96,7 +96,8 @@ struct mpc8xxx_spi {
 	u32 rx_shift;		/* RX data reg shift when in qe mode */
 	u32 tx_shift;		/* TX data reg shift when in qe mode */
 
-	bool qe_mode;
+	unsigned int flags;
+#define SPI_QE_CPU_MODE		(1 << 0) /* QE CPU ("PIO") mode */
 
 	struct workqueue_struct *workqueue;
 	struct work_struct work;
@@ -235,14 +236,14 @@ int mpc8xxx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 	if (bits_per_word <= 8) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u8;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u8;
-		if (mpc8xxx_spi->qe_mode) {
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 			cs->rx_shift = 16;
 			cs->tx_shift = 24;
 		}
 	} else if (bits_per_word <= 16) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u16;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u16;
-		if (mpc8xxx_spi->qe_mode) {
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 			cs->rx_shift = 16;
 			cs->tx_shift = 16;
 		}
@@ -252,7 +253,8 @@ int mpc8xxx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 	} else
 		return -EINVAL;
 
-	if (mpc8xxx_spi->qe_mode && spi->mode & SPI_LSB_FIRST) {
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE &&
+			spi->mode & SPI_LSB_FIRST) {
 		cs->tx_shift = 0;
 		if (bits_per_word <= 8)
 			cs->rx_shift = 8;
@@ -518,6 +520,13 @@ static void mpc8xxx_spi_cleanup(struct spi_device *spi)
 	kfree(spi->controller_state);
 }
 
+static const char *mpc8xxx_spi_strmode(unsigned int flags)
+{
+	if (flags & SPI_QE_CPU_MODE)
+		return "QE CPU";
+	return "CPU";
+}
+
 static struct spi_master * __devinit
 mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 {
@@ -544,14 +553,14 @@ mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 	master->cleanup = mpc8xxx_spi_cleanup;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
-	mpc8xxx_spi->qe_mode = pdata->qe_mode;
 	mpc8xxx_spi->get_rx = mpc8xxx_spi_rx_buf_u8;
 	mpc8xxx_spi->get_tx = mpc8xxx_spi_tx_buf_u8;
+	mpc8xxx_spi->flags = pdata->flags;
 	mpc8xxx_spi->spibrg = pdata->sysclk;
 
 	mpc8xxx_spi->rx_shift = 0;
 	mpc8xxx_spi->tx_shift = 0;
-	if (mpc8xxx_spi->qe_mode) {
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 		mpc8xxx_spi->rx_shift = 16;
 		mpc8xxx_spi->tx_shift = 24;
 	}
@@ -584,7 +593,7 @@ mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 
 	/* Enable SPI interface */
 	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
-	if (pdata->qe_mode)
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
 		regval |= SPMODE_OP;
 
 	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->mode, regval);
@@ -604,9 +613,8 @@ mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 	if (ret < 0)
 		goto unreg_master;
 
-	printk(KERN_INFO
-	       "%s: MPC8xxx SPI Controller driver at 0x%p (irq = %d)\n",
-	       dev_name(dev), mpc8xxx_spi->base, mpc8xxx_spi->irq);
+	dev_info(dev, "at 0x%p (irq = %d), %s mode\n", mpc8xxx_spi->base,
+		 mpc8xxx_spi->irq, mpc8xxx_spi_strmode(mpc8xxx_spi->flags));
 
 	return master;
 
@@ -797,7 +805,7 @@ static int __devinit of_mpc8xxx_spi_probe(struct of_device *ofdev,
 
 	prop = of_get_property(np, "mode", NULL);
 	if (prop && !strcmp(prop, "cpu-qe"))
-		pdata->qe_mode = 1;
+		pdata->flags = SPI_QE_CPU_MODE;
 
 	ret = of_mpc8xxx_spi_get_chipselects(dev);
 	if (ret)
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 43fc95d822d5..39fd94681e74 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -74,7 +74,7 @@ struct spi_device;
 struct fsl_spi_platform_data {
 	u32 	initial_spmode;	/* initial SPMODE value */
 	s16	bus_num;
-	bool	qe_mode;
+	unsigned int flags;
 	/* board specific information */
 	u16	max_chipselect;
 	void	(*cs_control)(struct spi_device *spi, bool on);
-- 
cgit v1.2.3


From 2e9d546eda5888962a441da1e96bbf92cb5b1cbb Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 23 Sep 2009 22:52:38 +0400
Subject: powerpc/fsl: Make fsl_deep_sleep() usable w/ modules and non-83xx
 builds

Export is needed for modular builds, and a static inline stub is needed
for non-MPC83xx builds.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/83xx/suspend.c | 1 +
 include/linux/fsl_devices.h           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 08e65fc8b98c..d306f07b9aa1 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -96,6 +96,7 @@ int fsl_deep_sleep(void)
 {
 	return deep_sleeping;
 }
+EXPORT_SYMBOL(fsl_deep_sleep);
 
 static int mpc83xx_change_state(void)
 {
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 39fd94681e74..47188d512b8f 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -90,6 +90,10 @@ struct mpc8xx_pcmcia_ops {
  * lead to a deep sleep (i.e. power removed from the core,
  * instead of just the clock).
  */
+#if defined(CONFIG_PPC_83xx) && defined(CONFIG_SUSPEND)
 int fsl_deep_sleep(void);
+#else
+static inline int fsl_deep_sleep(void) { return 0; }
+#endif
 
 #endif /* _FSL_DEVICE_H_ */
-- 
cgit v1.2.3


From 4739a9748e1bd7459f22f7e94e7d85710ca83954 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Apr 2009 05:36:01 -0700
Subject: sysctl: Remove the last of the generic binary sysctl support

Now that all of the users stopped using ctl_name and strategy it
is safe to remove the fields from struct ctl_table, and it is safe
to remove the stub strategy routines as well.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sysctl.h |  3 ---
 kernel/sysctl.c        | 44 --------------------------------------------
 2 files changed, 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7c4aabc04673..4e40442777cf 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1033,7 +1033,6 @@ extern ctl_handler sysctl_ms_jiffies;
 /* A sysctl table is an array of struct ctl_table: */
 struct ctl_table 
 {
-	int ctl_name;			/* Binary ID */
 	const char *procname;		/* Text ID for /proc/sys, or zero */
 	void *data;
 	int maxlen;
@@ -1041,7 +1040,6 @@ struct ctl_table
 	struct ctl_table *child;
 	struct ctl_table *parent;	/* Automatically set */
 	proc_handler *proc_handler;	/* Callback for text formatting */
-	ctl_handler *strategy;		/* Callback function for all r/w */
 	void *extra1;
 	void *extra2;
 };
@@ -1075,7 +1073,6 @@ struct ctl_table_header
 /* struct ctl_path describes where in the hierarchy a table is added */
 struct ctl_path {
 	const char *procname;
-	int ctl_name;
 };
 
 void register_sysctl_root(struct ctl_table_root *root);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b6b6eb1abebb..b4a5763d6dc8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1699,8 +1699,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  *
  * The members of the &struct ctl_table structure are used as follows:
  *
- * ctl_name - Dead
- *
  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
  *            enter a sysctl file
  *
@@ -1715,8 +1713,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  *
  * proc_handler - the text handler routine (described below)
  *
- * strategy - Dead
- *
  * de - for internal use by the sysctl routines
  *
  * extra1, extra2 - extra pointers usable by the proc handler routines
@@ -2639,41 +2635,6 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 
 #endif /* CONFIG_PROC_FS */
 
-int sysctl_data(struct ctl_table *table,
-		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
-int sysctl_string(struct ctl_table *table,
-		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
-int sysctl_intvec(struct ctl_table *table,
-		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
-int sysctl_jiffies(struct ctl_table *table,
-		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
-int sysctl_ms_jiffies(struct ctl_table *table,
-		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
 /*
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
@@ -2688,9 +2649,4 @@ EXPORT_SYMBOL(proc_doulongvec_minmax);
 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
 EXPORT_SYMBOL(register_sysctl_table);
 EXPORT_SYMBOL(register_sysctl_paths);
-EXPORT_SYMBOL(sysctl_intvec);
-EXPORT_SYMBOL(sysctl_jiffies);
-EXPORT_SYMBOL(sysctl_ms_jiffies);
-EXPORT_SYMBOL(sysctl_string);
-EXPORT_SYMBOL(sysctl_data);
 EXPORT_SYMBOL(unregister_sysctl_table);
-- 
cgit v1.2.3


From f5c15d0b37ab1cd3969b8ce7828ab41c79f36f77 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 11 Nov 2009 14:26:22 -0800
Subject: fb: remove fb_save_state() and fb_restore_state operations

Remove fb_save_state() and fb_restore_state operations from frame buffer layer.
They are used only in two drivers:
1. savagefb  - and cause bug #11248
2. uvesafb

Usage of these operations is misunderstood in both drivers so kill these
operations, fix the bug #11248 and avoid confusion in the future.

Tested on Savage 3D/MV card and the patch fixes the bug #11248.

The frame buffer layer uses these funtions during switch between graphics
and text mode of the console, but these drivers saves state before
switching of the frame buffer (in the fb_open) and after releasing it (in
the fb_release).  This defeats the purpose of these operations.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=11248

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Reported-by: Jochen Hein <jochen@jochen.org>
Tested-by: Jochen Hein <jochen@jochen.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Michal Januszewski <spock@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/atafb.c                  |  6 ------
 drivers/video/console/fbcon.c          |  5 +----
 drivers/video/savage/savagefb_driver.c | 18 ------------------
 drivers/video/uvesafb.c                | 28 ----------------------------
 include/linux/fb.h                     |  6 ------
 5 files changed, 1 insertion(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 8cd279be74e5..37624f74e88b 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -329,12 +329,6 @@ extern unsigned char fontdata_8x16[];
  *
  *	* perform fb specific mmap *
  *	int (*fb_mmap)(struct fb_info *info, struct vm_area_struct *vma);
- *
- *	* save current hardware state *
- *	void (*fb_save_state)(struct fb_info *info);
- *
- *	* restore saved state *
- *	void (*fb_restore_state)(struct fb_info *info);
  * } ;
  */
 
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 5a686cea23f4..3681c6a88212 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2311,14 +2311,11 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
 		ops->graphics = 1;
 
 		if (!blank) {
-			if (info->fbops->fb_save_state)
-				info->fbops->fb_save_state(info);
 			var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
 			ops->graphics = 0;
 			ops->var = info->var;
-		} else if (info->fbops->fb_restore_state)
-			info->fbops->fb_restore_state(info);
+		}
 	}
 
  	if (!fbcon_is_inactive(vc, info)) {
diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c
index 37b135d5d12e..0857b3b8c495 100644
--- a/drivers/video/savage/savagefb_driver.c
+++ b/drivers/video/savage/savagefb_driver.c
@@ -1606,22 +1606,6 @@ static int savagefb_blank(int blank, struct fb_info *info)
 	return (blank == FB_BLANK_NORMAL) ? 1 : 0;
 }
 
-static void savagefb_save_state(struct fb_info *info)
-{
-	struct savagefb_par *par = info->par;
-
-	savage_get_default_par(par, &par->save);
-}
-
-static void savagefb_restore_state(struct fb_info *info)
-{
-	struct savagefb_par *par = info->par;
-
-	savagefb_blank(FB_BLANK_POWERDOWN, info);
-	savage_set_default_par(par, &par->save);
-	savagefb_blank(FB_BLANK_UNBLANK, info);
-}
-
 static int savagefb_open(struct fb_info *info, int user)
 {
 	struct savagefb_par *par = info->par;
@@ -1667,8 +1651,6 @@ static struct fb_ops savagefb_ops = {
 	.fb_setcolreg   = savagefb_setcolreg,
 	.fb_pan_display = savagefb_pan_display,
 	.fb_blank       = savagefb_blank,
-	.fb_save_state  = savagefb_save_state,
-	.fb_restore_state = savagefb_restore_state,
 #if defined(CONFIG_FB_SAVAGE_ACCEL)
 	.fb_fillrect    = savagefb_fillrect,
 	.fb_copyarea    = savagefb_copyarea,
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index e35232a18571..54fbb2995a5f 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -1411,23 +1411,6 @@ static int uvesafb_check_var(struct fb_var_screeninfo *var,
 	return 0;
 }
 
-static void uvesafb_save_state(struct fb_info *info)
-{
-	struct uvesafb_par *par = info->par;
-
-	if (par->vbe_state_saved)
-		kfree(par->vbe_state_saved);
-
-	par->vbe_state_saved = uvesafb_vbe_state_save(par);
-}
-
-static void uvesafb_restore_state(struct fb_info *info)
-{
-	struct uvesafb_par *par = info->par;
-
-	uvesafb_vbe_state_restore(par, par->vbe_state_saved);
-}
-
 static struct fb_ops uvesafb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_open	= uvesafb_open,
@@ -1441,8 +1424,6 @@ static struct fb_ops uvesafb_ops = {
 	.fb_imageblit	= cfb_imageblit,
 	.fb_check_var	= uvesafb_check_var,
 	.fb_set_par	= uvesafb_set_par,
-	.fb_save_state	= uvesafb_save_state,
-	.fb_restore_state = uvesafb_restore_state,
 };
 
 static void __devinit uvesafb_init_info(struct fb_info *info,
@@ -1459,15 +1440,6 @@ static void __devinit uvesafb_init_info(struct fb_info *info,
 	info->fix.ypanstep = par->ypan ? 1 : 0;
 	info->fix.ywrapstep = (par->ypan > 1) ? 1 : 0;
 
-	/*
-	 * If we were unable to get the state buffer size, disable
-	 * functions for saving and restoring the hardware state.
-	 */
-	if (par->vbe_state_size == 0) {
-		info->fbops->fb_save_state = NULL;
-		info->fbops->fb_restore_state = NULL;
-	}
-
 	/* Disable blanking if the user requested so. */
 	if (!blank)
 		info->fbops->fb_blank = NULL;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index a34bdf5a9d23..de9c722e7b90 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -669,12 +669,6 @@ struct fb_ops {
 	/* perform fb specific mmap */
 	int (*fb_mmap)(struct fb_info *info, struct vm_area_struct *vma);
 
-	/* save current hardware state */
-	void (*fb_save_state)(struct fb_info *info);
-
-	/* restore saved state */
-	void (*fb_restore_state)(struct fb_info *info);
-
 	/* get capability given var */
 	void (*fb_get_caps)(struct fb_info *info, struct fb_blit_caps *caps,
 			    struct fb_var_screeninfo *var);
-- 
cgit v1.2.3


From b87e5e2b8ed9336566100c8c796ab6dd52436881 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Wed, 11 Nov 2009 14:26:42 -0800
Subject: serial: add support for the Lava Quattro PCI quad-port 16550A card

This seems to be a different model (with a different PCI ID) than the
"Quatro" card that is also in the list.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 6 ++++++
 include/linux/pci_ids.h   | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index b2d65b208e55..b28af13c45a1 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -3139,6 +3139,12 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATRO_B,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_bt_2_115200 },
+	{	PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_A,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_bt_2_115200 },
+	{	PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_B,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_bt_2_115200 },
 	{	PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_OCTO_A,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_bt_4_460800 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9ceb392cb984..84cf1f3b7838 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1956,6 +1956,8 @@
 #define PCI_DEVICE_ID_LAVA_DSERIAL	0x0100 /* 2x 16550 */
 #define PCI_DEVICE_ID_LAVA_QUATRO_A	0x0101 /* 2x 16550, half of 4 port */
 #define PCI_DEVICE_ID_LAVA_QUATRO_B	0x0102 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUATTRO_A	0x0120 /* 2x 16550A, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUATTRO_B	0x0121 /* 2x 16550A, half of 4 port */
 #define PCI_DEVICE_ID_LAVA_OCTO_A	0x0180 /* 4x 16550A, half of 8 port */
 #define PCI_DEVICE_ID_LAVA_OCTO_B	0x0181 /* 4x 16550A, half of 8 port */
 #define PCI_DEVICE_ID_LAVA_PORT_PLUS	0x0200 /* 2x 16650 */
-- 
cgit v1.2.3


From 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Thu, 12 Nov 2009 11:19:44 -0800
Subject: IB/mlx4: Remove limitation on LSO header size

Current code has a limitation: an LSO header is not allowed to cross a
64 byte boundary.  This patch removes this limitation by setting the
WQE RR for large headers thus allowing LSO headers of any size.  The
extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64
to 128 bytes, assuming this is reasonable upper limit for header
length.  Also, this patch will cause IB_DEVICE_UD_TSO to be set only
for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version
2.6.000 and higher.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c |  2 +-
 drivers/infiniband/hw/mlx4/qp.c   | 24 ++++++++++++------------
 drivers/net/mlx4/fw.c             |  1 +
 include/linux/mlx4/device.h       |  1 +
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47a10b8..e596537ff353 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
-	if (dev->dev->caps.max_gso_sz)
+	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 518d561970aa..847030c89a8d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@ enum {
 	/*
 	 * Largest possible UD header: send with GRH and immediate data.
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 72
+	MLX4_IB_UD_HEADER_SIZE		= 72,
+	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
 struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
 };
 
 enum {
-	MLX4_IB_MIN_SQ_STRIDE = 6
+	MLX4_IB_MIN_SQ_STRIDE	= 6,
+	MLX4_IB_CACHE_LINE_SIZE	= 64,
 };
 
 static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
 	case IB_QPT_UD:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_datagram_seg) +
-			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
 	case IB_QPT_UC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
@@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 
 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz)
+			 __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
 
-	/*
-	 * This is a temporary limitation and will be removed in
-	 * a forthcoming FW release:
-	 */
-	if (unlikely(halign > 64))
-		return -EINVAL;
+	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+		*blh = cpu_to_be32(1 << 6);
 
 	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
 		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	__be32 dummy;
 	__be32 *lso_wqe;
 	__be32 uninitialized_var(lso_hdr_sz);
+	__be32 blh;
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		lso_wqe = &dummy;
+		blh = 0;
 
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 			err = -ENOMEM;
@@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
 			if (wr->opcode == IB_WR_LSO) {
-				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
 				if (unlikely(err)) {
 					*bad_wr = wr;
 					goto out;
@@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		}
 
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
-			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
 		stamp = ind + qp->sq_spare_wqes;
 		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602172fc..7194be3a2894 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
 		[ 9] = "Q_Key violation counter",
 		[10] = "VMM",
 		[12] = "DPDP",
+		[15] = "Big LSO headers",
 		[16] = "MW support",
 		[17] = "APM support",
 		[18] = "Atomic ops support",
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c7bcbb..e92d1bfdb330 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@ enum {
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
 	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
 	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
+	MLX4_DEV_CAP_FLAG_BLH		= 1 << 15,
 	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
 	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
-- 
cgit v1.2.3


From 0a3adadee42f2865bb867b8c5f4955b7def9baad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 4 Nov 2009 18:12:35 -0500
Subject: nfsd: make fs/nfsd/vfs.h for common includes

None of this stuff is used outside nfsd, so move it out of the common
linux include directory.

Actually, probably none of the stuff in include/linux/nfsd/nfsd.h really
belongs there, so later we may remove that file entirely.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/lockd.c           |  1 +
 fs/nfsd/nfs2acl.c         |  1 +
 fs/nfsd/nfs3acl.c         |  1 +
 fs/nfsd/nfs3proc.c        |  1 +
 fs/nfsd/nfs4proc.c        |  1 +
 fs/nfsd/nfs4recover.c     |  1 +
 fs/nfsd/nfs4state.c       |  1 +
 fs/nfsd/nfs4xdr.c         |  1 +
 fs/nfsd/nfsfh.c           |  1 +
 fs/nfsd/nfsproc.c         |  1 +
 fs/nfsd/nfssvc.c          |  1 +
 fs/nfsd/vfs.c             |  1 +
 fs/nfsd/vfs.h             | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/nfsd.h | 87 +----------------------------------------
 14 files changed, 111 insertions(+), 86 deletions(-)
 create mode 100644 fs/nfsd/vfs.h

(limited to 'include/linux')

diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..812bc64874f6 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index e2a17f0a96a7..38c883d48b02 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -14,6 +14,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index ff73596eb550..526d85a65f76 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -13,6 +13,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..1a259d313e14 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -25,6 +25,7 @@
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/nfs3.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..60a93cdefef5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/nfsd/xdr4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..c7a6b245c7ad 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -47,6 +47,7 @@
 #include <linux/crypto.h>
 #include <linux/sched.h>
 #include <linux/mount.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c8b621a120cd..850960e5d626 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -56,6 +56,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..db0fc55670b3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -57,6 +57,7 @@
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..d0d8a217a3ea 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/nfsd/nfsd.h>
+#include "vfs.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FH
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c5393d1b8955..6c967e1ba37b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -24,6 +24,7 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
+#include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
 typedef struct svc_buf	svc_buf;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..2944b31dcbe6 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -35,6 +35,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 638573968dcf..a7038ede671a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -56,6 +56,7 @@
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
 #include <linux/ima.h>
+#include "vfs.h"
 
 #include <asm/uaccess.h>
 
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..b8011fd2fcab
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_VFS_H
+#define LINUX_NFSD_VFS_H
+
+/*
+ * Flags for nfsd_permission
+ */
+#define NFSD_MAY_NOP		0
+#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
+#define NFSD_MAY_READ		4 /* == MAY_READ */
+#define NFSD_MAY_SATTR		8
+#define NFSD_MAY_TRUNC		16
+#define NFSD_MAY_LOCK		32
+#define NFSD_MAY_OWNER_OVERRIDE	64
+#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
+#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+
+#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+
+/*
+ * Callback function for readdir
+ */
+typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
+/* nfsd/vfs.c */
+int		fh_lock_parent(struct svc_fh *, struct dentry *);
+int		nfsd_racache_init(int);
+void		nfsd_racache_shutdown(void);
+int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+		                struct svc_export **expp);
+__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int, struct svc_fh *);
+__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int,
+				struct svc_export **, struct dentry **);
+__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+				struct iattr *, int, time_t);
+#ifdef CONFIG_NFSD_V4
+__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
+                    struct nfs4_acl *);
+int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
+#endif /* CONFIG_NFSD_V4 */
+__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				int type, dev_t rdev, struct svc_fh *res);
+#ifdef CONFIG_NFSD_V3
+__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				struct svc_fh *res, int createmode,
+				u32 *verifier, int *truncp, int *created);
+__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
+				loff_t, unsigned long);
+#endif /* CONFIG_NFSD_V3 */
+__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+				int, struct file **);
+void		nfsd_close(struct file *);
+__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
+				loff_t, struct kvec *, int, unsigned long *);
+__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+				loff_t, struct kvec *,int, unsigned long *, int *);
+__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+				char *, int *);
+__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, char *path, int plen,
+				struct svc_fh *res, struct iattr *);
+__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
+				char *, int, struct svc_fh *);
+__be32		nfsd_rename(struct svc_rqst *,
+				struct svc_fh *, char *, int,
+				struct svc_fh *, char *, int);
+__be32		nfsd_remove(struct svc_rqst *,
+				struct svc_fh *, char *, int);
+__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
+				char *name, int len);
+int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
+				unsigned long size);
+__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
+			     loff_t *, struct readdir_cd *, filldir_t);
+__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+				struct kstatfs *, int access);
+
+int		nfsd_notify_change(struct inode *, struct iattr *);
+__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
+				struct dentry *, int);
+int		nfsd_sync_dir(struct dentry *dp);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
+#endif /* LINUX_NFSD_VFS_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 510ffdd5020e..e4518d090a8c 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -25,30 +25,10 @@
  */
 #define NFSD_SUPPORTED_MINOR_VERSION	1
 
-/*
- * Flags for nfsd_permission
- */
-#define NFSD_MAY_NOP		0
-#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
-#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
-#define NFSD_MAY_READ		4 /* == MAY_READ */
-#define NFSD_MAY_SATTR		8
-#define NFSD_MAY_TRUNC		16
-#define NFSD_MAY_LOCK		32
-#define NFSD_MAY_OWNER_OVERRIDE	64
-#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
-#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
-
-#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
-#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
-
-/*
- * Callback function for readdir
- */
 struct readdir_cd {
 	__be32			err;	/* 0, nfserr, or nfserr_eof */
 };
-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
 
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
@@ -73,69 +53,6 @@ int		nfsd_nrpools(void);
 int		nfsd_get_nrthreads(int n, int *);
 int		nfsd_set_nrthreads(int n, int *);
 
-/* nfsd/vfs.c */
-int		fh_lock_parent(struct svc_fh *, struct dentry *);
-int		nfsd_racache_init(int);
-void		nfsd_racache_shutdown(void);
-int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
-		                struct svc_export **expp);
-__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int, struct svc_fh *);
-__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int,
-				struct svc_export **, struct dentry **);
-__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
-				struct iattr *, int, time_t);
-#ifdef CONFIG_NFSD_V4
-__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
-                    struct nfs4_acl *);
-int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
-#endif /* CONFIG_NFSD_V4 */
-__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
-__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				struct svc_fh *res, int createmode,
-				u32 *verifier, int *truncp, int *created);
-__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
-				loff_t, unsigned long);
-#endif /* CONFIG_NFSD_V3 */
-__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
-				int, struct file **);
-void		nfsd_close(struct file *);
-__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
-				loff_t, struct kvec *, int, unsigned long *);
-__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-				loff_t, struct kvec *,int, unsigned long *, int *);
-__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
-				char *, int *);
-__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, char *path, int plen,
-				struct svc_fh *res, struct iattr *);
-__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
-				char *, int, struct svc_fh *);
-__be32		nfsd_rename(struct svc_rqst *,
-				struct svc_fh *, char *, int,
-				struct svc_fh *, char *, int);
-__be32		nfsd_remove(struct svc_rqst *,
-				struct svc_fh *, char *, int);
-__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
-				char *name, int len);
-int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
-				unsigned long size);
-__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
-			     loff_t *, struct readdir_cd *, filldir_t);
-__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
-				struct kstatfs *, int access);
-
-int		nfsd_notify_change(struct inode *, struct iattr *);
-__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
-				struct dentry *, int);
-int		nfsd_sync_dir(struct dentry *dp);
-
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
 extern struct svc_version nfsd_acl_version2;
@@ -147,8 +64,6 @@ extern struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
-struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
-int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 #endif
 
 enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-- 
cgit v1.2.3


From 23af368e9a904f59256c27d371ce223d6cee0430 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 11 Nov 2009 14:05:25 +0000
Subject: clockevents: Use u32 for mult and shift factors

The mult and shift factors of clock events differ in their data type
from those of clock sources for no reason. u32 is sufficient for
both. shift is always <= 32 and mult is limited to 2^32-1 to avoid
64bit multiplication overflows in the conversion.

Preparatory patch for a generic mult/shift factor calculation
function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Cc: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <20091111134229.725664788@linutronix.de>
---
 include/linux/clockchips.h | 4 ++--
 kernel/time/timer_list.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3ae..3b5841016276 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -79,8 +79,8 @@ struct clock_event_device {
 	unsigned int		features;
 	unsigned long		max_delta_ns;
 	unsigned long		min_delta_ns;
-	unsigned long		mult;
-	int			shift;
+	u32			mult;
+	u32			shift;
 	int			rating;
 	int			irq;
 	const struct cpumask	*cpumask;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1b5b7aa2fdfd..fa00da108a14 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -206,8 +206,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	SEQ_printf(m, "%s\n", dev->name);
 	SEQ_printf(m, " max_delta_ns:   %lu\n", dev->max_delta_ns);
 	SEQ_printf(m, " min_delta_ns:   %lu\n", dev->min_delta_ns);
-	SEQ_printf(m, " mult:           %lu\n", dev->mult);
-	SEQ_printf(m, " shift:          %d\n", dev->shift);
+	SEQ_printf(m, " mult:           %u\n", dev->mult);
+	SEQ_printf(m, " shift:          %u\n", dev->shift);
 	SEQ_printf(m, " mode:           %d\n", dev->mode);
 	SEQ_printf(m, " next_event:     %Ld nsecs\n",
 		   (unsigned long long) ktime_to_ns(dev->next_event));
-- 
cgit v1.2.3


From 7d2f944a2b836c69a9d260a0a5f0d1720d57fdff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 11 Nov 2009 14:05:29 +0000
Subject: clocksource: Provide a generic mult/shift factor calculation

MIPS has two functions to calculcate the mult/shift factors for clock
sources and clock events at run time. ARM needs such functions as
well.

Implement a function which calculates the mult/shift factors based on
the frequencies to which and from which is converted. The function
also has a parameter to specify the minimum conversion range in
seconds. This range is guaranteed not to produce a 64bit overflow when
a value is multiplied with the calculated mult factor. The larger the
conversion range the less becomes the conversion accuracy.

Provide two inline wrappers which handle clock events and clock
sources. For clock events the "from" frequency is nano seconds per
second which corresponds to 1GHz and "to" is the device frequency. For
clock sources "from" is the device frequency and "to" is nano seconds
per second.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mikael Pettersson <mikpe@it.uu.se>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Cc: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <20091111134229.766673305@linutronix.de>
---
 include/linux/clockchips.h  |  7 ++++++
 include/linux/clocksource.h | 10 +++++++++
 kernel/time/clocksource.c   | 53 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3b5841016276..4d438b0bc10a 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -130,6 +130,13 @@ extern int clockevents_program_event(struct clock_event_device *dev,
 
 extern void clockevents_handle_noop(struct clock_event_device *dev);
 
+static inline void
+clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
+{
+	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC,
+				      freq, minsec);
+}
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
 #else
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 83d2fbd81b93..f57f88250526 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -279,6 +279,16 @@ extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
+extern void
+clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
+
+static inline void
+clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
+{
+	return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+				      NSEC_PER_SEC, minsec);
+}
+
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
 extern void update_vsyscall_tz(void);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5e18c6ab2c6a..407c0894ef37 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc,
 }
 EXPORT_SYMBOL(timecounter_cyc2time);
 
+/**
+ * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
+ * @mult:	pointer to mult variable
+ * @shift:	pointer to shift variable
+ * @from:	frequency to convert from
+ * @to:		frequency to convert to
+ * @minsec:	guaranteed runtime conversion range in seconds
+ *
+ * The function evaluates the shift/mult pair for the scaled math
+ * operations of clocksources and clockevents.
+ *
+ * @to and @from are frequency values in HZ. For clock sources @to is
+ * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
+ * event @to is the counter frequency and @from is NSEC_PER_SEC.
+ *
+ * The @minsec conversion range argument controls the time frame in
+ * seconds which must be covered by the runtime conversion with the
+ * calculated mult and shift factors. This guarantees that no 64bit
+ * overflow happens when the input value of the conversion is
+ * multiplied with the calculated mult factor. Larger ranges may
+ * reduce the conversion accuracy by chosing smaller mult and shift
+ * factors.
+ */
+void
+clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
+{
+	u64 tmp;
+	u32 sft, sftacc= 32;
+
+	/*
+	 * Calculate the shift factor which is limiting the conversion
+	 * range:
+	 */
+	tmp = ((u64)minsec * from) >> 32;
+	while (tmp) {
+		tmp >>=1;
+		sftacc--;
+	}
+
+	/*
+	 * Find the conversion shift/mult pair which has the best
+	 * accuracy and fits the maxsec conversion range:
+	 */
+	for (sft = 32; sft > 0; sft--) {
+		tmp = (u64) to << sft;
+		do_div(tmp, from);
+		if ((tmp >> sftacc) == 0)
+			break;
+	}
+	*mult = tmp;
+	*shift = sft;
+}
+
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
-- 
cgit v1.2.3


From 98962465ed9e6ea99c38e0af63fe1dcb5a79dc25 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Tue, 18 Aug 2009 12:45:10 -0500
Subject: nohz: Prevent clocksource wrapping during idle

The dynamic tick allows the kernel to sleep for periods longer than a
single tick, but it does not limit the sleep time currently. In the
worst case the kernel could sleep longer than the wrap around time of
the time keeping clock source which would result in losing track of
time.

Prevent this by limiting it to the safe maximum sleep time of the
current time keeping clock source. The value is calculated when the
clock source is registered.

[ tglx: simplified the code a bit and massaged the commit msg ]

Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1250617512-23567-2-git-send-email-jon-hunter@ti.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  2 ++
 include/linux/time.h        |  1 +
 kernel/time/clocksource.c   | 44 ++++++++++++++++++++++++++++++++++++++
 kernel/time/tick-sched.c    | 52 +++++++++++++++++++++++++++++++++------------
 kernel/time/timekeeping.c   | 11 ++++++++++
 5 files changed, 96 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f57f88250526..279c5478e8a6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  *			subtraction of non 64 bit counters
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
+ * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
  * @resume:		resume function for the clocksource, if necessary
@@ -168,6 +169,7 @@ struct clocksource {
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
+	u64 max_idle_ns;
 	unsigned long flags;
 	cycle_t (*vread)(void);
 	void (*resume)(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index fe04e5ef6a59..6e026e45a179 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
+extern u64 timekeeping_max_deferment(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 extern void timekeeping_leap_insert(int leapsecond);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 407c0894ef37..b65b242f04dd 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -468,6 +468,47 @@ void clocksource_touch_watchdog(void)
 
 #ifdef CONFIG_GENERIC_TIME
 
+/**
+ * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * @cs:         Pointer to clocksource
+ *
+ */
+static u64 clocksource_max_deferment(struct clocksource *cs)
+{
+	u64 max_nsecs, max_cycles;
+
+	/*
+	 * Calculate the maximum number of cycles that we can pass to the
+	 * cyc2ns function without overflowing a 64-bit signed result. The
+	 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
+	 * is equivalent to the below.
+	 * max_cycles < (2^63)/cs->mult
+	 * max_cycles < 2^(log2((2^63)/cs->mult))
+	 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
+	 * max_cycles < 2^(63 - log2(cs->mult))
+	 * max_cycles < 1 << (63 - log2(cs->mult))
+	 * Please note that we add 1 to the result of the log2 to account for
+	 * any rounding errors, ensure the above inequality is satisfied and
+	 * no overflow will occur.
+	 */
+	max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+
+	/*
+	 * The actual maximum number of cycles we can defer the clocksource is
+	 * determined by the minimum of max_cycles and cs->mask.
+	 */
+	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
+	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+
+	/*
+	 * To ensure that the clocksource does not wrap whilst we are idle,
+	 * limit the time the clocksource can be deferred by 12.5%. Please
+	 * note a margin of 12.5% is used because this can be computed with
+	 * a shift, versus say 10% which would require division.
+	 */
+	return max_nsecs - (max_nsecs >> 5);
+}
+
 /**
  * clocksource_select - Select the best clocksource available
  *
@@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
  */
 int clocksource_register(struct clocksource *cs)
 {
+	/* calculate max idle time permitted for this clocksource */
+	cs->max_idle_ns = clocksource_max_deferment(cs);
+
 	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
 	clocksource_select();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c65ba0faa98f..a80b4644fe6b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	struct tick_sched *ts;
 	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+	u64 time_delta;
 	int cpu;
 
 	local_irq_save(flags);
@@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle)
 		seq = read_seqbegin(&xtime_lock);
 		last_update = last_jiffies_update;
 		last_jiffies = jiffies;
+
+		/*
+		 * On SMP we really should only care for the CPU which
+		 * has the do_timer duty assigned. All other CPUs can
+		 * sleep as long as they want.
+		 */
+		if (cpu == tick_do_timer_cpu ||
+		    tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+			time_delta = timekeeping_max_deferment();
+		else
+			time_delta = KTIME_MAX;
 	} while (read_seqretry(&xtime_lock, seq));
 
 	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
@@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle)
 	if ((long)delta_jiffies >= 1) {
 
 		/*
-		* calculate the expiry time for the next timer wheel
-		* timer
-		*/
-		expires = ktime_add_ns(last_update, tick_period.tv64 *
-				   delta_jiffies);
+		 * calculate the expiry time for the next timer wheel
+		 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
+		 * that there is no timer pending or at least extremely
+		 * far into the future (12 days for HZ=1000). In this
+		 * case we set the expiry to the end of time.
+		 */
+		if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
+			/*
+			 * Calculate the time delta for the next timer event.
+			 * If the time delta exceeds the maximum time delta
+			 * permitted by the current clocksource then adjust
+			 * the time delta accordingly to ensure the
+			 * clocksource does not wrap.
+			 */
+			time_delta = min_t(u64, time_delta,
+					   tick_period.tv64 * delta_jiffies);
+			expires = ktime_add_ns(last_update, time_delta);
+		} else {
+			expires.tv64 = KTIME_MAX;
+		}
 
 		/*
 		 * If this cpu is the one which updates jiffies, then
@@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle)
 
 		ts->idle_sleeps++;
 
+		/* Mark expires */
+		ts->idle_expires = expires;
+
 		/*
-		 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
-		 * there is no timer pending or at least extremly far
-		 * into the future (12 days for HZ=1000). In this case
-		 * we simply stop the tick timer:
+		 * If the expiration time == KTIME_MAX, then
+		 * in this case we simply stop the tick timer.
 		 */
-		if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
-			ts->idle_expires.tv64 = KTIME_MAX;
+		 if (unlikely(expires.tv64 == KTIME_MAX)) {
 			if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
 				hrtimer_cancel(&ts->sched_timer);
 			goto out;
 		}
 
-		/* Mark expiries */
-		ts->idle_expires = expires;
-
 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
 			hrtimer_start(&ts->sched_timer, expires,
 				      HRTIMER_MODE_ABS_PINNED);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 96b3f0dfa5dc..5d4d4239a0aa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -477,6 +477,17 @@ int timekeeping_valid_for_hres(void)
 	return ret;
 }
 
+/**
+ * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ *
+ * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ * ensure that the clocksource does not change!
+ */
+u64 timekeeping_max_deferment(void)
+{
+	return timekeeper.clock->max_idle_ns;
+}
+
 /**
  * read_persistent_clock -  Return time from the persistent clock.
  *
-- 
cgit v1.2.3


From 27185016b806d5a1181ff501cae120582b2b27dd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 12 Nov 2009 22:12:06 +0100
Subject: nohz: Track last do_timer() cpu

The previous patch which limits the sleep time to the maximum
deferment time of the time keeping clocksource has some limitations on
SMP machines: if all CPUs are idle then for all CPUs the maximum sleep
time is limited.

Solve this by keeping track of which cpu had the do_timer() duty
assigned last and limit the sleep time only for this cpu.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
---
 include/linux/tick.h     |  2 ++
 kernel/time/tick-sched.c | 52 ++++++++++++++++++++++++++----------------------
 2 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 8dc082194b22..d2ae79e21be3 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -43,6 +43,7 @@ enum tick_nohz_mode {
  * @idle_exittime:	Time when the idle state was left
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
  * @sleep_length:	Duration of the current idle sleep
+ * @do_timer_lst:	CPU was the last one doing do_timer before going idle
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -64,6 +65,7 @@ struct tick_sched {
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
 	ktime_t				idle_expires;
+	int				do_timer_last;
 };
 
 extern void __init tick_init(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a80b4644fe6b..df133bc29f89 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 		seq = read_seqbegin(&xtime_lock);
 		last_update = last_jiffies_update;
 		last_jiffies = jiffies;
-
-		/*
-		 * On SMP we really should only care for the CPU which
-		 * has the do_timer duty assigned. All other CPUs can
-		 * sleep as long as they want.
-		 */
-		if (cpu == tick_do_timer_cpu ||
-		    tick_do_timer_cpu == TICK_DO_TIMER_NONE)
-			time_delta = timekeeping_max_deferment();
-		else
-			time_delta = KTIME_MAX;
+		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&xtime_lock, seq));
 
 	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
@@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle)
 	/* Schedule the tick, if we are at least one jiffie off */
 	if ((long)delta_jiffies >= 1) {
 
+		/*
+		 * If this cpu is the one which updates jiffies, then
+		 * give up the assignment and let it be taken by the
+		 * cpu which runs the tick timer next, which might be
+		 * this cpu as well. If we don't drop this here the
+		 * jiffies might be stale and do_timer() never
+		 * invoked. Keep track of the fact that it was the one
+		 * which had the do_timer() duty last. If this cpu is
+		 * the one which had the do_timer() duty last, we
+		 * limit the sleep time to the timekeeping
+		 * max_deferement value which we retrieved
+		 * above. Otherwise we can sleep as long as we want.
+		 */
+		if (cpu == tick_do_timer_cpu) {
+			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+			ts->do_timer_last = 1;
+		} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+			time_delta = KTIME_MAX;
+			ts->do_timer_last = 0;
+		} else if (!ts->do_timer_last) {
+			time_delta = KTIME_MAX;
+		}
+
 		/*
 		 * calculate the expiry time for the next timer wheel
 		 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
@@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle)
 			 */
 			time_delta = min_t(u64, time_delta,
 					   tick_period.tv64 * delta_jiffies);
-			expires = ktime_add_ns(last_update, time_delta);
-		} else {
-			expires.tv64 = KTIME_MAX;
 		}
 
-		/*
-		 * If this cpu is the one which updates jiffies, then
-		 * give up the assignment and let it be taken by the
-		 * cpu which runs the tick timer next, which might be
-		 * this cpu as well. If we don't drop this here the
-		 * jiffies might be stale and do_timer() never
-		 * invoked.
-		 */
-		if (cpu == tick_do_timer_cpu)
-			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		if (time_delta < KTIME_MAX)
+			expires = ktime_add_ns(last_update, time_delta);
+		else
+			expires.tv64 = KTIME_MAX;
 
 		if (delta_jiffies > 1)
 			cpumask_set_cpu(cpu, nohz_cpu_mask);
-- 
cgit v1.2.3


From 97813f2fe77804a4464564c75ba8d8826377feea Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Tue, 18 Aug 2009 12:45:11 -0500
Subject: nohz: Allow 32-bit machines to sleep for more than 2.15 seconds

In the dynamic tick code, "max_delta_ns" (member of the
"clock_event_device" structure) represents the maximum sleep time
that can occur between timer events in nanoseconds.

The variable, "max_delta_ns", is defined as an unsigned long
which is a 32-bit integer for 32-bit machines and a 64-bit
integer for 64-bit machines (if -m64 option is used for gcc).
The value of max_delta_ns is set by calling the function
"clockevent_delta2ns()" which returns a maximum value of LONG_MAX.
For a 32-bit machine LONG_MAX is equal to 0x7fffffff and in
nanoseconds this equates to ~2.15 seconds. Hence, the maximum
sleep time for a 32-bit machine is ~2.15 seconds, where as for
a 64-bit machine it will be many years.

This patch changes the type of max_delta_ns to be "u64" instead of
"unsigned long" so that this variable is a 64-bit type for both 32-bit
and 64-bit machines. It also changes the maximum value returned by
clockevent_delta2ns() to KTIME_MAX.  Hence this allows a 32-bit
machine to sleep for longer than ~2.15 seconds. Please note that this
patch also changes "min_delta_ns" to be "u64" too and although this is
unnecessary, it makes the patch simpler as it avoids to fixup all
callers of clockevent_delta2ns().

[ tglx: changed "unsigned long long" to u64 as we use this data type
  	through out the time code ]

Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1250617512-23567-3-git-send-email-jon-hunter@ti.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h |  8 ++++----
 kernel/hrtimer.c           |  3 ++-
 kernel/time/clockevents.c  | 11 +++++------
 kernel/time/tick-oneshot.c |  4 ++--
 kernel/time/timer_list.c   |  6 ++++--
 5 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 4d438b0bc10a..0cf725bdd2a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -77,8 +77,8 @@ enum clock_event_nofitiers {
 struct clock_event_device {
 	const char		*name;
 	unsigned int		features;
-	unsigned long		max_delta_ns;
-	unsigned long		min_delta_ns;
+	u64			max_delta_ns;
+	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
 	int			rating;
@@ -116,8 +116,8 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
 }
 
 /* Clock event layer functions */
-extern unsigned long clockevent_delta2ns(unsigned long latch,
-					 struct clock_event_device *evt);
+extern u64 clockevent_delta2ns(unsigned long latch,
+			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
 extern void clockevents_exchange_device(struct clock_event_device *old,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6d7020490f94..c215b74cd953 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1240,7 +1240,8 @@ hrtimer_interrupt_hanging(struct clock_event_device *dev,
 	force_clock_reprogram = 1;
 	dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
 	printk(KERN_WARNING "hrtimer: interrupt too slow, "
-		"forcing clock min delta to %lu ns\n", dev->min_delta_ns);
+	       "forcing clock min delta to %llu ns\n",
+	       (unsigned long long) dev->min_delta_ns);
 }
 /*
  * High resolution timer interrupt
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc32..05e8aeedcdf3 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -37,10 +37,9 @@ static DEFINE_SPINLOCK(clockevents_lock);
  *
  * Math helper, returns latch value converted to nanoseconds (bound checked)
  */
-unsigned long clockevent_delta2ns(unsigned long latch,
-				  struct clock_event_device *evt)
+u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 {
-	u64 clc = ((u64) latch << evt->shift);
+	u64 clc = (u64) latch << evt->shift;
 
 	if (unlikely(!evt->mult)) {
 		evt->mult = 1;
@@ -50,10 +49,10 @@ unsigned long clockevent_delta2ns(unsigned long latch,
 	do_div(clc, evt->mult);
 	if (clc < 1000)
 		clc = 1000;
-	if (clc > LONG_MAX)
-		clc = LONG_MAX;
+	if (clc > KTIME_MAX)
+		clc = KTIME_MAX;
 
-	return (unsigned long) clc;
+	return clc;
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index a96c0e2b89cf..0a8a213016f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
 				dev->min_delta_ns += dev->min_delta_ns >> 1;
 
 			printk(KERN_WARNING
-			       "CE: %s increasing min_delta_ns to %lu nsec\n",
+			       "CE: %s increasing min_delta_ns to %llu nsec\n",
 			       dev->name ? dev->name : "?",
-			       dev->min_delta_ns << 1);
+			       (unsigned long long) dev->min_delta_ns << 1);
 
 			i = 0;
 		}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index fa00da108a14..665c76edbf17 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -204,8 +204,10 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 		return;
 	}
 	SEQ_printf(m, "%s\n", dev->name);
-	SEQ_printf(m, " max_delta_ns:   %lu\n", dev->max_delta_ns);
-	SEQ_printf(m, " min_delta_ns:   %lu\n", dev->min_delta_ns);
+	SEQ_printf(m, " max_delta_ns:   %llu\n",
+		   (unsigned long long) dev->max_delta_ns);
+	SEQ_printf(m, " min_delta_ns:   %llu\n",
+		   (unsigned long long) dev->min_delta_ns);
 	SEQ_printf(m, " mult:           %u\n", dev->mult);
 	SEQ_printf(m, " shift:          %u\n", dev->shift);
 	SEQ_printf(m, " mode:           %d\n", dev->mode);
-- 
cgit v1.2.3


From 6beb000923882f6204ea2cfcd932e568e900803f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Nov 2009 15:21:34 +0000
Subject: locking: Make inlining decision Kconfig based

commit 892a7c67 (locking: Allow arch-inlined spinlocks) implements the
selection of which lock functions are inlined based on defines in
arch/.../spinlock.h: #define __always_inline__LOCK_FUNCTION

Despite of the name __always_inline__* the lock functions can be built
out of line depending on config options. Also if the arch does not set
some inline defines the generic code might set them; again depending on
config options.

This makes it unnecessary hard to figure out when and which lock
functions are inlined. Aside of that it makes it way harder and
messier for -rt to manipulate the lock functions.

Convert the inlining decision to CONFIG switches. Each lock function
is inlined depending on CONFIG_INLINE_*. The configs implement the
existing dependencies. The architecture code can select ARCH_INLINE_*
to signal that it wants the corresponding lock function inlined.
ARCH_INLINE_* is necessary as Kconfig ignores "depends on"
restrictions when a config element is selected.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091109151428.504477141@linutronix.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/s390/Kconfig                |  28 ++++++
 arch/s390/include/asm/spinlock.h |  29 ------
 include/linux/spinlock_api_smp.h |  75 ++++++---------
 init/Kconfig                     |   1 +
 kernel/Kconfig.locks             | 199 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |  56 +++++------
 6 files changed, 284 insertions(+), 104 deletions(-)
 create mode 100644 kernel/Kconfig.locks

(limited to 'include/linux')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 43c0acad7160..16c673096a22 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,6 +95,34 @@ config S390
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
 	select HAVE_PERF_EVENTS
+	select ARCH_INLINE_SPIN_TRYLOCK
+	select ARCH_INLINE_SPIN_TRYLOCK_BH
+	select ARCH_INLINE_SPIN_LOCK
+	select ARCH_INLINE_SPIN_LOCK_BH
+	select ARCH_INLINE_SPIN_LOCK_IRQ
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	select ARCH_INLINE_SPIN_UNLOCK
+	select ARCH_INLINE_SPIN_UNLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_READ_TRYLOCK
+	select ARCH_INLINE_READ_LOCK
+	select ARCH_INLINE_READ_LOCK_BH
+	select ARCH_INLINE_READ_LOCK_IRQ
+	select ARCH_INLINE_READ_LOCK_IRQSAVE
+	select ARCH_INLINE_READ_UNLOCK
+	select ARCH_INLINE_READ_UNLOCK_BH
+	select ARCH_INLINE_READ_UNLOCK_IRQ
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_WRITE_TRYLOCK
+	select ARCH_INLINE_WRITE_LOCK
+	select ARCH_INLINE_WRITE_LOCK_BH
+	select ARCH_INLINE_WRITE_LOCK_IRQ
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	select ARCH_INLINE_WRITE_UNLOCK
+	select ARCH_INLINE_WRITE_UNLOCK_BH
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 41ce6861174e..c9af0d19c7ab 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
-#define __always_inline__spin_lock
-#define __always_inline__read_lock
-#define __always_inline__write_lock
-#define __always_inline__spin_lock_bh
-#define __always_inline__read_lock_bh
-#define __always_inline__write_lock_bh
-#define __always_inline__spin_lock_irq
-#define __always_inline__read_lock_irq
-#define __always_inline__write_lock_irq
-#define __always_inline__spin_lock_irqsave
-#define __always_inline__read_lock_irqsave
-#define __always_inline__write_lock_irqsave
-#define __always_inline__spin_trylock
-#define __always_inline__read_trylock
-#define __always_inline__write_trylock
-#define __always_inline__spin_trylock_bh
-#define __always_inline__spin_unlock
-#define __always_inline__read_unlock
-#define __always_inline__write_unlock
-#define __always_inline__spin_unlock_bh
-#define __always_inline__read_unlock_bh
-#define __always_inline__write_unlock_bh
-#define __always_inline__spin_unlock_irq
-#define __always_inline__read_unlock_irq
-#define __always_inline__write_unlock_irq
-#define __always_inline__spin_unlock_irqrestore
-#define __always_inline__read_unlock_irqrestore
-#define __always_inline__write_unlock_irqrestore
-
 #endif /* __ASM_SPINLOCK_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 7a7e18fc2415..8264a7f459bc 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
-/*
- * We inline the unlock functions in the nondebug case:
- */
-#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT)
-#define __always_inline__spin_unlock
-#define __always_inline__read_unlock
-#define __always_inline__write_unlock
-#define __always_inline__spin_unlock_irq
-#define __always_inline__read_unlock_irq
-#define __always_inline__write_unlock_irq
-#endif
-
-#ifndef CONFIG_DEBUG_SPINLOCK
-#ifndef CONFIG_GENERIC_LOCKBREAK
-
-#ifdef __always_inline__spin_lock
+#ifdef CONFIG_INLINE_SPIN_LOCK
 #define _spin_lock(lock) __spin_lock(lock)
 #endif
 
-#ifdef __always_inline__read_lock
+#ifdef CONFIG_INLINE_READ_LOCK
 #define _read_lock(lock) __read_lock(lock)
 #endif
 
-#ifdef __always_inline__write_lock
+#ifdef CONFIG_INLINE_WRITE_LOCK
 #define _write_lock(lock) __write_lock(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_bh
+#ifdef CONFIG_INLINE_SPIN_LOCK_BH
 #define _spin_lock_bh(lock) __spin_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__read_lock_bh
+#ifdef CONFIG_INLINE_READ_LOCK_BH
 #define _read_lock_bh(lock) __read_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__write_lock_bh
+#ifdef CONFIG_INLINE_WRITE_LOCK_BH
 #define _write_lock_bh(lock) __write_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_irq
+#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ
 #define _spin_lock_irq(lock) __spin_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__read_lock_irq
+#ifdef CONFIG_INLINE_READ_LOCK_IRQ
 #define _read_lock_irq(lock) __read_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__write_lock_irq
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ
 #define _write_lock_irq(lock) __write_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_irqsave
+#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
 #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
 #endif
 
-#ifdef __always_inline__read_lock_irqsave
+#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE
 #define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
 #endif
 
-#ifdef __always_inline__write_lock_irqsave
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
 #define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
 #endif
 
-#endif /* !CONFIG_GENERIC_LOCKBREAK */
-
-#ifdef __always_inline__spin_trylock
+#ifdef CONFIG_INLINE_SPIN_TRYLOCK
 #define _spin_trylock(lock) __spin_trylock(lock)
 #endif
 
-#ifdef __always_inline__read_trylock
+#ifdef CONFIG_INLINE_READ_TRYLOCK
 #define _read_trylock(lock) __read_trylock(lock)
 #endif
 
-#ifdef __always_inline__write_trylock
+#ifdef CONFIG_INLINE_WRITE_TRYLOCK
 #define _write_trylock(lock) __write_trylock(lock)
 #endif
 
-#ifdef __always_inline__spin_trylock_bh
+#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH
 #define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock
+#ifdef CONFIG_INLINE_SPIN_UNLOCK
 #define _spin_unlock(lock) __spin_unlock(lock)
 #endif
 
-#ifdef __always_inline__read_unlock
+#ifdef CONFIG_INLINE_READ_UNLOCK
 #define _read_unlock(lock) __read_unlock(lock)
 #endif
 
-#ifdef __always_inline__write_unlock
+#ifdef CONFIG_INLINE_WRITE_UNLOCK
 #define _write_unlock(lock) __write_unlock(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_bh
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH
 #define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__read_unlock_bh
+#ifdef CONFIG_INLINE_READ_UNLOCK_BH
 #define _read_unlock_bh(lock) __read_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__write_unlock_bh
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH
 #define _write_unlock_bh(lock) __write_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_irq
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
 #define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__read_unlock_irq
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ
 #define _read_unlock_irq(lock) __read_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__write_unlock_irq
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ
 #define _write_unlock_irq(lock) __write_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_irqrestore
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
 #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
 #endif
 
-#ifdef __always_inline__read_unlock_irqrestore
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
 #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
 #endif
 
-#ifdef __always_inline__write_unlock_irqrestore
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
 #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
 #endif
 
-#endif /* CONFIG_DEBUG_SPINLOCK */
-
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
diff --git a/init/Kconfig b/init/Kconfig
index 9e03ef8b311e..06863dd7bf49 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1210,3 +1210,4 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
+source "kernel/Kconfig.locks"
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
new file mode 100644
index 000000000000..d1f86db71451
--- /dev/null
+++ b/kernel/Kconfig.locks
@@ -0,0 +1,199 @@
+#
+# The ARCH_INLINE foo is necessary because select ignores "depends on"
+#
+config ARCH_INLINE_SPIN_TRYLOCK
+	bool
+
+config ARCH_INLINE_SPIN_TRYLOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_LOCK
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	bool
+
+
+config ARCH_INLINE_READ_TRYLOCK
+	bool
+
+config ARCH_INLINE_READ_LOCK
+	bool
+
+config ARCH_INLINE_READ_LOCK_BH
+	bool
+
+config ARCH_INLINE_READ_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_READ_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_READ_UNLOCK
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	bool
+
+
+config ARCH_INLINE_WRITE_TRYLOCK
+	bool
+
+config ARCH_INLINE_WRITE_LOCK
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_BH
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	bool
+
+#
+# lock_* functions are inlined when:
+#   - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y
+#
+# trylock_* functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#
+# unlock and unlock_irq functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#  or
+#   - DEBUG_SPINLOCK=n and PREEMPT=n
+#
+# unlock_bh and unlock_irqrestore functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#
+
+config INLINE_SPIN_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK
+
+config INLINE_SPIN_TRYLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH
+
+config INLINE_SPIN_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK
+
+config INLINE_SPIN_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_BH
+
+config INLINE_SPIN_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_IRQ
+
+config INLINE_SPIN_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_IRQSAVE
+
+config INLINE_SPIN_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK)
+
+config INLINE_SPIN_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
+
+config INLINE_SPIN_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH)
+
+config INLINE_SPIN_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+
+
+config INLINE_READ_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK
+
+config INLINE_READ_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK
+
+config INLINE_READ_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_BH
+
+config INLINE_READ_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_IRQ
+
+config INLINE_READ_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_IRQSAVE
+
+config INLINE_READ_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK)
+
+config INLINE_READ_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH
+
+config INLINE_READ_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH)
+
+config INLINE_READ_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+
+
+config INLINE_WRITE_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK
+
+config INLINE_WRITE_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK
+
+config INLINE_WRITE_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_BH
+
+config INLINE_WRITE_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_IRQ
+
+config INLINE_WRITE_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_IRQSAVE
+
+config INLINE_WRITE_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK)
+
+config INLINE_WRITE_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH
+
+config INLINE_WRITE_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH)
+
+config INLINE_WRITE_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 5ddab730cb2f..235a9579a875 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -21,7 +21,7 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
-#ifndef _spin_trylock
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
@@ -29,7 +29,7 @@ int __lockfunc _spin_trylock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_trylock);
 #endif
 
-#ifndef _read_trylock
+#ifndef CONFIG_INLINE_READ_TRYLOCK
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	return __read_trylock(lock);
@@ -37,7 +37,7 @@ int __lockfunc _read_trylock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_trylock);
 #endif
 
-#ifndef _write_trylock
+#ifndef CONFIG_INLINE_WRITE_TRYLOCK
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	return __write_trylock(lock);
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(_write_trylock);
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-#ifndef _read_lock
+#ifndef CONFIG_INLINE_READ_LOCK
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	__read_lock(lock);
@@ -60,7 +60,7 @@ void __lockfunc _read_lock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock);
 #endif
 
-#ifndef _spin_lock_irqsave
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
@@ -68,7 +68,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_irqsave);
 #endif
 
-#ifndef _spin_lock_irq
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
@@ -76,7 +76,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_irq);
 #endif
 
-#ifndef _spin_lock_bh
+#ifndef CONFIG_INLINE_SPIN_LOCK_BH
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
@@ -84,7 +84,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_bh);
 #endif
 
-#ifndef _read_lock_irqsave
+#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
 	return __read_lock_irqsave(lock);
@@ -92,7 +92,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_irqsave);
 #endif
 
-#ifndef _read_lock_irq
+#ifndef CONFIG_INLINE_READ_LOCK_IRQ
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	__read_lock_irq(lock);
@@ -100,7 +100,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_irq);
 #endif
 
-#ifndef _read_lock_bh
+#ifndef CONFIG_INLINE_READ_LOCK_BH
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	__read_lock_bh(lock);
@@ -108,7 +108,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_bh);
 #endif
 
-#ifndef _write_lock_irqsave
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
 	return __write_lock_irqsave(lock);
@@ -116,7 +116,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_irqsave);
 #endif
 
-#ifndef _write_lock_irq
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	__write_lock_irq(lock);
@@ -124,7 +124,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_irq);
 #endif
 
-#ifndef _write_lock_bh
+#ifndef CONFIG_INLINE_WRITE_LOCK_BH
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	__write_lock_bh(lock);
@@ -132,7 +132,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_bh);
 #endif
 
-#ifndef _spin_lock
+#ifndef CONFIG_INLINE_SPIN_LOCK
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
@@ -140,7 +140,7 @@ void __lockfunc _spin_lock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock);
 #endif
 
-#ifndef _write_lock
+#ifndef CONFIG_INLINE_WRITE_LOCK
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	__write_lock(lock);
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 #endif
 
-#ifndef _spin_unlock
+#ifndef CONFIG_INLINE_SPIN_UNLOCK
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
 	__spin_unlock(lock);
@@ -280,7 +280,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock);
 #endif
 
-#ifndef _write_unlock
+#ifndef CONFIG_INLINE_WRITE_UNLOCK
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
 	__write_unlock(lock);
@@ -288,7 +288,7 @@ void __lockfunc _write_unlock(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock);
 #endif
 
-#ifndef _read_unlock
+#ifndef CONFIG_INLINE_READ_UNLOCK
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
 	__read_unlock(lock);
@@ -296,7 +296,7 @@ void __lockfunc _read_unlock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock);
 #endif
 
-#ifndef _spin_unlock_irqrestore
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
@@ -304,7 +304,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
 #endif
 
-#ifndef _spin_unlock_irq
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
@@ -312,7 +312,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock_irq);
 #endif
 
-#ifndef _spin_unlock_bh
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
@@ -320,7 +320,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock_bh);
 #endif
 
-#ifndef _read_unlock_irqrestore
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__read_unlock_irqrestore(lock, flags);
@@ -328,7 +328,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_read_unlock_irqrestore);
 #endif
 
-#ifndef _read_unlock_irq
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
 	__read_unlock_irq(lock);
@@ -336,7 +336,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock_irq);
 #endif
 
-#ifndef _read_unlock_bh
+#ifndef CONFIG_INLINE_READ_UNLOCK_BH
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	__read_unlock_bh(lock);
@@ -344,7 +344,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock_bh);
 #endif
 
-#ifndef _write_unlock_irqrestore
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__write_unlock_irqrestore(lock, flags);
@@ -352,7 +352,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_write_unlock_irqrestore);
 #endif
 
-#ifndef _write_unlock_irq
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
 	__write_unlock_irq(lock);
@@ -360,7 +360,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock_irq);
 #endif
 
-#ifndef _write_unlock_bh
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	__write_unlock_bh(lock);
@@ -368,7 +368,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock_bh);
 #endif
 
-#ifndef _spin_trylock_bh
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
-- 
cgit v1.2.3


From 572a9d7b6fc7f20f573664063324c086be310c42 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 10 Nov 2009 06:14:14 +0000
Subject: net: allow to propagate errors through ->ndo_hard_start_xmit()

Currently the ->ndo_hard_start_xmit() callbacks are only permitted to return
one of the NETDEV_TX codes. This prevents any kind of error propagation for
virtual devices, like queue congestion of the underlying device in case of
layered devices, or unreachability in case of tunnels.

This patches changes the NET_XMIT codes to avoid clashes with the NETDEV_TX
codes and changes the two callers of dev_hard_start_xmit() to expect either
errno codes, NET_XMIT codes or NETDEV_TX codes as return value.

In case of qdisc_restart(), all non NETDEV_TX codes are mapped to NETDEV_TX_OK
since no error propagation is possible when using qdiscs. In case of
dev_queue_xmit(), the error is propagated upwards.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 43 ++++++++++++++++++++++++++++++++-----------
 net/core/dev.c            | 32 ++++++++++++++++++++++++++------
 net/sched/sch_generic.c   |  9 ++++++++-
 3 files changed, 66 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 083b5989cecb..8b266390b9e2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -63,27 +63,48 @@ struct wireless_dev;
 #define HAVE_FREE_NETDEV		/* free_netdev() */
 #define HAVE_NETDEV_PRIV		/* netdev_priv() */
 
-#define NET_XMIT_SUCCESS	0
-#define NET_XMIT_DROP		1	/* skb dropped			*/
-#define NET_XMIT_CN		2	/* congestion notification	*/
-#define NET_XMIT_POLICED	3	/* skb is shot by police	*/
-#define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
+/*
+ * Transmit return codes: transmit return codes originate from three different
+ * namespaces:
+ *
+ * - qdisc return codes
+ * - driver transmit return codes
+ * - errno values
+ *
+ * Drivers are allowed to return any one of those in their hard_start_xmit()
+ * function. Real network devices commonly used with qdiscs should only return
+ * the driver transmit return codes though - when qdiscs are used, the actual
+ * transmission happens asynchronously, so the value is not propagated to
+ * higher layers. Virtual network devices transmit synchronously, in this case
+ * the driver transmit return codes are consumed by dev_queue_xmit(), all
+ * others are propagated to higher layers.
+ */
+
+/* qdisc ->enqueue() return codes. */
+#define NET_XMIT_SUCCESS	0x00
+#define NET_XMIT_DROP		0x10	/* skb dropped			*/
+#define NET_XMIT_CN		0x20	/* congestion notification	*/
+#define NET_XMIT_POLICED	0x30	/* skb is shot by police	*/
+#define NET_XMIT_MASK		0xf0	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
-#define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
-#define NET_RX_DROP		1  /* packet dropped */
+#define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
+#define NET_RX_DROP		1	/* packet dropped */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
  * indicates that the device will soon be dropping packets, or already drops
  * some packets of the same priority; prompting us to send less aggressively. */
-#define net_xmit_eval(e)	((e) == NET_XMIT_CN? 0 : (e))
+#define net_xmit_eval(e)	((e) == NET_XMIT_CN ? 0 : (e))
 #define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)
 
 /* Driver transmit return codes */
+#define NETDEV_TX_MASK		0xf
+
 enum netdev_tx {
-	NETDEV_TX_OK = 0,	/* driver took care of packet */
-	NETDEV_TX_BUSY,		/* driver tx path was busy*/
-	NETDEV_TX_LOCKED = -1,	/* driver tx lock was already taken */
+	__NETDEV_TX_MIN	 = INT_MIN,	/* make sure enum is signed */
+	NETDEV_TX_OK	 = 0,		/* driver took care of packet */
+	NETDEV_TX_BUSY	 = 1,		/* driver tx path was busy*/
+	NETDEV_TX_LOCKED = 2,		/* driver tx lock was already taken */
 };
 typedef enum netdev_tx netdev_tx_t;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ad8e320ceba7..548340b57296 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1757,7 +1757,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	int rc;
+	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
@@ -1805,6 +1805,8 @@ gso:
 		nskb->next = NULL;
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
+			if (rc & ~NETDEV_TX_MASK)
+				goto out_kfree_gso_skb;
 			nskb->next = skb->next;
 			skb->next = nskb;
 			return rc;
@@ -1814,11 +1816,12 @@ gso:
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
-	skb->destructor = DEV_GSO_CB(skb)->destructor;
-
+out_kfree_gso_skb:
+	if (likely(skb->next == NULL))
+		skb->destructor = DEV_GSO_CB(skb)->destructor;
 out_kfree_skb:
 	kfree_skb(skb);
-	return NETDEV_TX_OK;
+	return rc;
 }
 
 static u32 skb_tx_hashrnd;
@@ -1906,6 +1909,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
+static inline bool dev_xmit_complete(int rc)
+{
+	/* successful transmission */
+	if (rc == NETDEV_TX_OK)
+		return true;
+
+	/* error while transmitting, driver consumed skb */
+	if (rc < 0)
+		return true;
+
+	/* error while queueing to a different device, driver consumed skb */
+	if (rc & NET_XMIT_MASK)
+		return true;
+
+	return false;
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -2003,8 +2023,8 @@ gso:
 			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_tx_queue_stopped(txq)) {
-				rc = NET_XMIT_SUCCESS;
-				if (!dev_hard_start_xmit(skb, dev, txq)) {
+				rc = dev_hard_start_xmit(skb, dev, txq);
+				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4ae6aa562f2b..b13821ad2fb6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -120,8 +120,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_tx_queue_stopped(txq) &&
-	    !netif_tx_queue_frozen(txq))
+	    !netif_tx_queue_frozen(txq)) {
 		ret = dev_hard_start_xmit(skb, dev, txq);
+
+		/* an error implies that the skb was consumed */
+		if (ret < 0)
+			ret = NETDEV_TX_OK;
+		/* all NET_XMIT codes map to NETDEV_TX_OK */
+		ret &= ~NET_XMIT_MASK;
+	}
 	HARD_TX_UNLOCK(dev, txq);
 
 	spin_lock(root_lock);
-- 
cgit v1.2.3


From d9b263528e01bfbaf716b51f38606b3dfe5ac1e9 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 13 Nov 2009 14:57:00 -0500
Subject: x86, setup: Store the boot cursor state

Add a field to store the boot cursor state and implement this for VGA on
x86. This can then be used to set the default policy for the boot console.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
LKML-Reference: <1258142222-16092-1-git-send-email-mjg@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/video.c       | 6 ++++++
 include/linux/screen_info.h | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index d42da3802499..f767164cd5df 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -27,6 +27,12 @@ static void store_cursor_position(void)
 
 	boot_params.screen_info.orig_x = oreg.dl;
 	boot_params.screen_info.orig_y = oreg.dh;
+
+	if (oreg.ch & 0x20)
+		boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+
+	if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
+		boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
 }
 
 static void store_video_mode(void)
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 1ee2c05142f6..899fbb487c94 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -14,7 +14,8 @@ struct screen_info {
 	__u16 orig_video_page;	/* 0x04 */
 	__u8  orig_video_mode;	/* 0x06 */
 	__u8  orig_video_cols;	/* 0x07 */
-	__u16 unused2;		/* 0x08 */
+	__u8  flags;		/* 0x08 */
+	__u8  unused2;		/* 0x09 */
 	__u16 orig_video_ega_bx;/* 0x0a */
 	__u16 unused3;		/* 0x0c */
 	__u8  orig_video_lines;	/* 0x0e */
@@ -65,6 +66,8 @@ struct screen_info {
 
 #define VIDEO_TYPE_EFI		0x70	/* EFI graphic mode		*/
 
+#define VIDEO_FLAGS_NOCURSOR	(1 << 0) /* The video mode has no cursor set */
+
 #ifdef __KERNEL__
 extern struct screen_info screen_info;
 
-- 
cgit v1.2.3


From 90a5e16992fa6105f7ebf3f29f5cf5feb1bbf7dc Mon Sep 17 00:00:00 2001
From: Rui Paulo <rpaulo@gmail.com>
Date: Wed, 11 Nov 2009 00:01:31 +0000
Subject: mac80211: implement RANN processing and forwarding

Process the RANN (Root Annoucement) Frame and try to find the HWMP
root station by sending a PREQ.

Signed-off-by: Rui Paulo <rpaulo@gmail.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Andrey Yurovsky <andrey@cozybit.com>
Tested-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 15 +++++++++
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mesh_hwmp.c   | 79 +++++++++++++++++++++++++++++++++++++++++-----
 net/mac80211/util.c        |  4 +++
 4 files changed, 91 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 50c684db33c7..49b1abd2fe97 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -554,6 +554,20 @@ struct ieee80211_tim_ie {
 	u8 virtual_map[1];
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_rann_ie
+ *
+ * This structure refers to "Root Announcement information element"
+ */
+struct ieee80211_rann_ie {
+	u8 rann_flags;
+	u8 rann_hopcount;
+	u8 rann_ttl;
+	u8 rann_addr[6];
+	u32 rann_seq;
+	u32 rann_metric;
+} __attribute__ ((packed));
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
@@ -1070,6 +1084,7 @@ enum ieee80211_eid {
 	WLAN_EID_PREQ = 68,
 	WLAN_EID_PREP = 69,
 	WLAN_EID_PERR = 70,
+	WLAN_EID_RANN = 49,	/* compatible with FreeBSD */
 	/* 802.11h */
 	WLAN_EID_PWR_CONSTRAINT = 32,
 	WLAN_EID_PWR_CAPABILITY = 33,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 19b0c128d940..2a7d3d4067e1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -804,6 +804,7 @@ struct ieee802_11_elems {
 	u8 *preq;
 	u8 *prep;
 	u8 *perr;
+	struct ieee80211_rann_ie *rann;
 	u8 *ch_switch_elem;
 	u8 *country_elem;
 	u8 *pwr_constr_elem;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index db1a33098a88..7b9dd87cf9f2 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -28,6 +28,8 @@
 /* Reply and forward */
 #define MP_F_RF	0x2
 
+static void mesh_queue_preq(struct mesh_path *, u8);
+
 static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
@@ -81,7 +83,8 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
 enum mpath_frame_type {
 	MPATH_PREQ = 0,
 	MPATH_PREP,
-	MPATH_PERR
+	MPATH_PERR,
+	MPATH_RANN
 };
 
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
@@ -109,7 +112,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	/* BSSID is left zeroed, wildcard value */
+	/* BSSID == SA */
+	memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 
@@ -126,6 +130,12 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 		pos = skb_put(skb, 2 + ie_len);
 		*pos++ = WLAN_EID_PREP;
 		break;
+	case MPATH_RANN:
+		mhwmp_dbg("sending RANN from %pM\n", orig_addr);
+		ie_len = sizeof(struct ieee80211_rann_ie);
+		pos = skb_put(skb, 2 + ie_len);
+		*pos++ = WLAN_EID_RANN;
+		break;
 	default:
 		kfree_skb(skb);
 		return -ENOTSUPP;
@@ -143,8 +153,10 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	pos += ETH_ALEN;
 	memcpy(pos, &orig_dsn, 4);
 	pos += 4;
-	memcpy(pos, &lifetime, 4);
-	pos += 4;
+	if (action != MPATH_RANN) {
+		memcpy(pos, &lifetime, 4);
+		pos += 4;
+	}
 	memcpy(pos, &metric, 4);
 	pos += 4;
 	if (action == MPATH_PREQ) {
@@ -152,9 +164,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 		*pos++ = 1;
 		*pos++ = dst_flags;
 	}
-	memcpy(pos, dst, ETH_ALEN);
-	pos += ETH_ALEN;
-	memcpy(pos, &dst_dsn, 4);
+	if (action != MPATH_RANN) {
+		memcpy(pos, dst, ETH_ALEN);
+		pos += ETH_ALEN;
+		memcpy(pos, &dst_dsn, 4);
+	}
 
 	ieee80211_tx_skb(sdata, skb, 1);
 	return 0;
@@ -610,6 +624,54 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 }
 
+static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
+				struct ieee80211_mgmt *mgmt,
+				struct ieee80211_rann_ie *rann)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct mesh_path *mpath;
+	u8 *ta;
+	u8 ttl, flags, hopcount;
+	u8 *orig_addr;
+	u32 orig_dsn, metric;
+
+	ta = mgmt->sa;
+	ttl = rann->rann_ttl;
+	if (ttl <= 1) {
+		ifmsh->mshstats.dropped_frames_ttl++;
+		return;
+	}
+	ttl--;
+	flags = rann->rann_flags;
+	orig_addr = rann->rann_addr;
+	orig_dsn = rann->rann_seq;
+	hopcount = rann->rann_hopcount;
+	metric = rann->rann_metric;
+	mhwmp_dbg("received RANN from %pM\n", orig_addr);
+
+	rcu_read_lock();
+	mpath = mesh_path_lookup(orig_addr, sdata);
+	if (!mpath) {
+		mesh_path_add(orig_addr, sdata);
+		mpath = mesh_path_lookup(orig_addr, sdata);
+		if (!mpath) {
+			rcu_read_unlock();
+			sdata->u.mesh.mshstats.dropped_frames_no_route++;
+			return;
+		}
+		mesh_queue_preq(mpath,
+				PREQ_Q_F_START | PREQ_Q_F_REFRESH);
+	}
+	if (mpath->dsn < orig_dsn) {
+		mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
+				       cpu_to_le32(orig_dsn),
+				       0, NULL, 0, sdata->dev->broadcast,
+				       hopcount, ttl, 0, cpu_to_le32(metric),
+				       0, sdata);
+		mpath->dsn = orig_dsn;
+	}
+	rcu_read_unlock();
+}
 
 
 void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
@@ -654,7 +716,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 			return;
 		hwmp_perr_frame_process(sdata, mgmt, elems.perr);
 	}
-
+	if (elems.rann)
+		hwmp_rann_frame_process(sdata, mgmt, elems.rann);
 }
 
 /**
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index aedbaaa067e6..da86e1592f8c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -685,6 +685,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->perr = pos;
 			elems->perr_len = elen;
 			break;
+		case WLAN_EID_RANN:
+			if (elen >= sizeof(struct ieee80211_rann_ie))
+				elems->rann = (void *)pos;
+			break;
 		case WLAN_EID_CHANNEL_SWITCH:
 			elems->ch_switch_elem = pos;
 			elems->ch_switch_elem_len = elen;
-- 
cgit v1.2.3


From d19b3bf6384e66ac6e11a61ee31ed2cfe149f4d8 Mon Sep 17 00:00:00 2001
From: Rui Paulo <rpaulo@gmail.com>
Date: Mon, 9 Nov 2009 23:46:55 +0000
Subject: mac80211: replace "destination" with "target" to follow the spec

Resulting object files have the same MD5 as before.

Signed-off-by: Rui Paulo <rpaulo@gmail.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Andrey Yurovsky <andrey@cozybit.com>
Tested-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h     |   8 +-
 include/net/cfg80211.h      |   8 +-
 net/mac80211/cfg.c          |   8 +-
 net/mac80211/ieee80211_i.h  |  10 +-
 net/mac80211/mesh.c         |   2 +-
 net/mac80211/mesh.h         |  10 +-
 net/mac80211/mesh_hwmp.c    | 221 ++++++++++++++++++++++----------------------
 net/mac80211/mesh_pathtbl.c |  12 +--
 net/wireless/nl80211.c      |   6 +-
 9 files changed, 144 insertions(+), 141 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 203adef972cf..7a0bd6ea6f63 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -899,14 +899,14 @@ enum nl80211_sta_info {
  *
  * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active
  * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running
- * @NL80211_MPATH_FLAG_DSN_VALID: the mesh path contains a valid DSN
+ * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN
  * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set
  * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded
  */
 enum nl80211_mpath_flags {
 	NL80211_MPATH_FLAG_ACTIVE =	1<<0,
 	NL80211_MPATH_FLAG_RESOLVING =	1<<1,
-	NL80211_MPATH_FLAG_DSN_VALID =	1<<2,
+	NL80211_MPATH_FLAG_SN_VALID =	1<<2,
 	NL80211_MPATH_FLAG_FIXED =	1<<3,
 	NL80211_MPATH_FLAG_RESOLVED =	1<<4,
 };
@@ -919,7 +919,7 @@ enum nl80211_mpath_flags {
  *
  * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved
  * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination
- * @NL80211_ATTR_MPATH_DSN: destination sequence number
+ * @NL80211_ATTR_MPATH_SN: destination sequence number
  * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path
  * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now
  * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in
@@ -930,7 +930,7 @@ enum nl80211_mpath_flags {
 enum nl80211_mpath_info {
 	__NL80211_MPATH_INFO_INVALID,
 	NL80211_MPATH_INFO_FRAME_QLEN,
-	NL80211_MPATH_INFO_DSN,
+	NL80211_MPATH_INFO_SN,
 	NL80211_MPATH_INFO_METRIC,
 	NL80211_MPATH_INFO_EXPTIME,
 	NL80211_MPATH_INFO_FLAGS,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1ee41e4a92e2..7d0ae9c18286 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -420,7 +420,7 @@ enum monitor_flags {
  * in during get_station() or dump_station().
  *
  * MPATH_INFO_FRAME_QLEN: @frame_qlen filled
- * MPATH_INFO_DSN: @dsn filled
+ * MPATH_INFO_SN: @sn filled
  * MPATH_INFO_METRIC: @metric filled
  * MPATH_INFO_EXPTIME: @exptime filled
  * MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled
@@ -429,7 +429,7 @@ enum monitor_flags {
  */
 enum mpath_info_flags {
 	MPATH_INFO_FRAME_QLEN		= BIT(0),
-	MPATH_INFO_DSN			= BIT(1),
+	MPATH_INFO_SN			= BIT(1),
 	MPATH_INFO_METRIC		= BIT(2),
 	MPATH_INFO_EXPTIME		= BIT(3),
 	MPATH_INFO_DISCOVERY_TIMEOUT	= BIT(4),
@@ -444,7 +444,7 @@ enum mpath_info_flags {
  *
  * @filled: bitfield of flags from &enum mpath_info_flags
  * @frame_qlen: number of queued frames for this destination
- * @dsn: destination sequence number
+ * @sn: target sequence number
  * @metric: metric (cost) of this mesh path
  * @exptime: expiration time for the mesh path from now, in msecs
  * @flags: mesh path flags
@@ -458,7 +458,7 @@ enum mpath_info_flags {
 struct mpath_info {
 	u32 filled;
 	u32 frame_qlen;
-	u32 dsn;
+	u32 sn;
 	u32 metric;
 	u32 exptime;
 	u32 discovery_timeout;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 576b86f81d1b..81053587e72b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -935,7 +935,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 	pinfo->generation = mesh_paths_generation;
 
 	pinfo->filled = MPATH_INFO_FRAME_QLEN |
-			MPATH_INFO_DSN |
+			MPATH_INFO_SN |
 			MPATH_INFO_METRIC |
 			MPATH_INFO_EXPTIME |
 			MPATH_INFO_DISCOVERY_TIMEOUT |
@@ -943,7 +943,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 			MPATH_INFO_FLAGS;
 
 	pinfo->frame_qlen = mpath->frame_queue.qlen;
-	pinfo->dsn = mpath->dsn;
+	pinfo->sn = mpath->sn;
 	pinfo->metric = mpath->metric;
 	if (time_before(jiffies, mpath->exp_time))
 		pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies);
@@ -955,8 +955,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 		pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE;
 	if (mpath->flags & MESH_PATH_RESOLVING)
 		pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING;
-	if (mpath->flags & MESH_PATH_DSN_VALID)
-		pinfo->flags |= NL80211_MPATH_FLAG_DSN_VALID;
+	if (mpath->flags & MESH_PATH_SN_VALID)
+		pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID;
 	if (mpath->flags & MESH_PATH_FIXED)
 		pinfo->flags |= NL80211_MPATH_FLAG_FIXED;
 	if (mpath->flags & MESH_PATH_RESOLVING)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2a7d3d4067e1..fb54ddb9b27d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -373,14 +373,14 @@ struct ieee80211_if_mesh {
 	u8 mesh_sp_id;
 	/* Authentication Protocol Identifier */
 	u8 mesh_auth_id;
-	/* Local mesh Destination Sequence Number */
-	u32 dsn;
+	/* Local mesh Sequence Number */
+	u32 sn;
 	/* Last used PREQ ID */
 	u32 preq_id;
 	atomic_t mpaths;
-	/* Timestamp of last DSN update */
-	unsigned long last_dsn_update;
-	/* Timestamp of last DSN sent */
+	/* Timestamp of last SN update */
+	unsigned long last_sn_update;
+	/* Timestamp of last SN sent */
 	unsigned long last_preq;
 	struct mesh_rmc *rmc;
 	spinlock_t mesh_preq_queue_lock;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 222a31338390..0f3e1147ec4f 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -672,7 +672,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		MESH_MIN_DISCOVERY_TIMEOUT;
 	ifmsh->accepting_plinks = true;
 	ifmsh->preq_id = 0;
-	ifmsh->dsn = 0;
+	ifmsh->sn = 0;
 	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
 	ifmsh->last_preq = jiffies;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 1d534c702406..ee687add3927 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -26,7 +26,7 @@
  *
  * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding
  * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path
- * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence
+ * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence
  * 	number
  * @MESH_PATH_FIXED: the mesh path has been manually set and should not be
  * 	modified
@@ -38,7 +38,7 @@
 enum mesh_path_flags {
 	MESH_PATH_ACTIVE =	BIT(0),
 	MESH_PATH_RESOLVING =	BIT(1),
-	MESH_PATH_DSN_VALID =	BIT(2),
+	MESH_PATH_SN_VALID =	BIT(2),
 	MESH_PATH_FIXED	=	BIT(3),
 	MESH_PATH_RESOLVED =	BIT(4),
 };
@@ -70,7 +70,7 @@ enum mesh_deferred_task_flags {
  * @timer: mesh path discovery timer
  * @frame_queue: pending queue for frames sent to this destination while the
  * 	path is unresolved
- * @dsn: destination sequence number of the destination
+ * @sn: target sequence number
  * @metric: current metric to this destination
  * @hop_count: hops to destination
  * @exp_time: in jiffies, when the path will expire or when it expired
@@ -94,7 +94,7 @@ struct mesh_path {
 	struct timer_list timer;
 	struct sk_buff_head frame_queue;
 	struct rcu_head rcu;
-	u32 dsn;
+	u32 sn;
 	u32 metric;
 	u8 hop_count;
 	unsigned long exp_time;
@@ -280,7 +280,7 @@ void mesh_mpp_table_grow(void);
 u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
 		struct mesh_table *tbl);
 /* Mesh paths */
-int mesh_path_error_tx(u8 ttl, u8 *dest, __le32 dest_dsn, __le16 dest_rcode,
+int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode,
 		u8 *ra, struct ieee80211_sub_if_data *sdata);
 void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
 void mesh_path_flush_pending(struct mesh_path *mpath);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 7d36f3a741a5..f03a27dfd616 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -56,33 +56,33 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae)
 #define PREQ_IE_TTL(x)		(*(x + 2))
 #define PREQ_IE_PREQ_ID(x)	u32_field_get(x, 3, 0)
 #define PREQ_IE_ORIG_ADDR(x)	(x + 7)
-#define PREQ_IE_ORIG_DSN(x)	u32_field_get(x, 13, 0);
+#define PREQ_IE_ORIG_SN(x)	u32_field_get(x, 13, 0);
 #define PREQ_IE_LIFETIME(x)	u32_field_get(x, 17, AE_F_SET(x));
 #define PREQ_IE_METRIC(x) 	u32_field_get(x, 21, AE_F_SET(x));
-#define PREQ_IE_DST_F(x)	(*(AE_F_SET(x) ? x + 32 : x + 26))
-#define PREQ_IE_DST_ADDR(x) 	(AE_F_SET(x) ? x + 33 : x + 27)
-#define PREQ_IE_DST_DSN(x) 	u32_field_get(x, 33, AE_F_SET(x));
+#define PREQ_IE_TARGET_F(x)	(*(AE_F_SET(x) ? x + 32 : x + 26))
+#define PREQ_IE_TARGET_ADDR(x) 	(AE_F_SET(x) ? x + 33 : x + 27)
+#define PREQ_IE_TARGET_SN(x) 	u32_field_get(x, 33, AE_F_SET(x));
 
 
 #define PREP_IE_FLAGS(x)	PREQ_IE_FLAGS(x)
 #define PREP_IE_HOPCOUNT(x)	PREQ_IE_HOPCOUNT(x)
 #define PREP_IE_TTL(x)		PREQ_IE_TTL(x)
 #define PREP_IE_ORIG_ADDR(x)	(x + 3)
-#define PREP_IE_ORIG_DSN(x)	u32_field_get(x, 9, 0);
+#define PREP_IE_ORIG_SN(x)	u32_field_get(x, 9, 0);
 #define PREP_IE_LIFETIME(x)	u32_field_get(x, 13, AE_F_SET(x));
 #define PREP_IE_METRIC(x)	u32_field_get(x, 17, AE_F_SET(x));
-#define PREP_IE_DST_ADDR(x)	(AE_F_SET(x) ? x + 27 : x + 21)
-#define PREP_IE_DST_DSN(x)	u32_field_get(x, 27, AE_F_SET(x));
+#define PREP_IE_TARGET_ADDR(x)	(AE_F_SET(x) ? x + 27 : x + 21)
+#define PREP_IE_TARGET_SN(x)	u32_field_get(x, 27, AE_F_SET(x));
 
 #define PERR_IE_TTL(x)		(*(x))
-#define PERR_IE_DST_FLAGS(x)	(*(x + 2))
-#define PERR_IE_DST_ADDR(x)	(x + 3)
-#define PERR_IE_DST_DSN(x)	u32_field_get(x, 9, 0);
-#define PERR_IE_DST_RCODE(x)	u16_field_get(x, 13, 0);
+#define PERR_IE_TARGET_FLAGS(x)	(*(x + 2))
+#define PERR_IE_TARGET_ADDR(x)	(x + 3)
+#define PERR_IE_TARGET_SN(x)	u32_field_get(x, 9, 0);
+#define PERR_IE_TARGET_RCODE(x)	u16_field_get(x, 13, 0);
 
 #define MSEC_TO_TU(x) (x*1000/1024)
-#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0)
-#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0)
+#define SN_GT(x, y) ((long) (y) - (long) (x) < 0)
+#define SN_LT(x, y) ((long) (x) - (long) (y) < 0)
 
 #define net_traversal_jiffies(s) \
 	msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime)
@@ -102,9 +102,10 @@ enum mpath_frame_type {
 };
 
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
-		u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst,
-		__le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime,
-		__le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata)
+		u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target,
+		__le32 target_sn, u8 *da, u8 hop_count, u8 ttl,__le32 lifetime,
+		__le32 metric, __le32 preq_id,
+		struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
@@ -133,13 +134,13 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 
 	switch (action) {
 	case MPATH_PREQ:
-		mhwmp_dbg("sending PREQ to %pM\n", dst);
+		mhwmp_dbg("sending PREQ to %pM\n", target);
 		ie_len = 37;
 		pos = skb_put(skb, 2 + ie_len);
 		*pos++ = WLAN_EID_PREQ;
 		break;
 	case MPATH_PREP:
-		mhwmp_dbg("sending PREP to %pM\n", dst);
+		mhwmp_dbg("sending PREP to %pM\n", target);
 		ie_len = 31;
 		pos = skb_put(skb, 2 + ie_len);
 		*pos++ = WLAN_EID_PREP;
@@ -165,7 +166,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	}
 	memcpy(pos, orig_addr, ETH_ALEN);
 	pos += ETH_ALEN;
-	memcpy(pos, &orig_dsn, 4);
+	memcpy(pos, &orig_sn, 4);
 	pos += 4;
 	if (action != MPATH_RANN) {
 		memcpy(pos, &lifetime, 4);
@@ -176,12 +177,12 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	if (action == MPATH_PREQ) {
 		/* destination count */
 		*pos++ = 1;
-		*pos++ = dst_flags;
+		*pos++ = target_flags;
 	}
 	if (action != MPATH_RANN) {
-		memcpy(pos, dst, ETH_ALEN);
+		memcpy(pos, target, ETH_ALEN);
 		pos += ETH_ALEN;
-		memcpy(pos, &dst_dsn, 4);
+		memcpy(pos, &target_sn, 4);
 	}
 
 	ieee80211_tx_skb(sdata, skb, 1);
@@ -191,12 +192,14 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 /**
  * mesh_send_path error - Sends a PERR mesh management frame
  *
- * @dst: broken destination
- * @dst_dsn: dsn of the broken destination
+ * @target: broken destination
+ * @target_sn: SN of the broken destination
+ * @target_rcode: reason code for this PERR
  * @ra: node this frame is addressed to
  */
-int mesh_path_error_tx(u8 ttl, u8 *dst, __le32 dst_dsn, __le16 dst_rcode,
-		u8 *ra, struct ieee80211_sub_if_data *sdata)
+int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
+		__le16 target_rcode, u8 *ra,
+		struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
@@ -234,16 +237,16 @@ int mesh_path_error_tx(u8 ttl, u8 *dst, __le32 dst_dsn, __le16 dst_rcode,
 	 * bit 2 is set if we have a reason code
 	 */
 	*pos = 0;
-	if (!dst_dsn)
+	if (!target_sn)
 		*pos |= MP_F_USN;
-	if (dst_rcode)
+	if (target_rcode)
 		*pos |= MP_F_RCODE;
 	pos++;
-	memcpy(pos, dst, ETH_ALEN);
+	memcpy(pos, target, ETH_ALEN);
 	pos += ETH_ALEN;
-	memcpy(pos, &dst_dsn, 4);
+	memcpy(pos, &target_sn, 4);
 	pos += 4;
-	memcpy(pos, &dst_rcode, 2);
+	memcpy(pos, &target_rcode, 2);
 
 	ieee80211_tx_skb(sdata, skb, 1);
 	return 0;
@@ -324,7 +327,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 	bool fresh_info;
 	u8 *orig_addr, *ta;
-	u32 orig_dsn, orig_metric;
+	u32 orig_sn, orig_metric;
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
 	bool process = true;
@@ -343,7 +346,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 	switch (action) {
 	case MPATH_PREQ:
 		orig_addr = PREQ_IE_ORIG_ADDR(hwmp_ie);
-		orig_dsn = PREQ_IE_ORIG_DSN(hwmp_ie);
+		orig_sn = PREQ_IE_ORIG_SN(hwmp_ie);
 		orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREQ_IE_METRIC(hwmp_ie);
 		break;
@@ -356,7 +359,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		 * information from both PREQ and PREP frames.
 		 */
 		orig_addr = PREP_IE_ORIG_ADDR(hwmp_ie);
-		orig_dsn = PREP_IE_ORIG_DSN(hwmp_ie);
+		orig_sn = PREP_IE_ORIG_SN(hwmp_ie);
 		orig_lifetime = PREP_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREP_IE_METRIC(hwmp_ie);
 		break;
@@ -382,9 +385,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 			if (mpath->flags & MESH_PATH_FIXED)
 				fresh_info = false;
 			else if ((mpath->flags & MESH_PATH_ACTIVE) &&
-			    (mpath->flags & MESH_PATH_DSN_VALID)) {
-				if (DSN_GT(mpath->dsn, orig_dsn) ||
-				    (mpath->dsn == orig_dsn &&
+			    (mpath->flags & MESH_PATH_SN_VALID)) {
+				if (SN_GT(mpath->sn, orig_sn) ||
+				    (mpath->sn == orig_sn &&
 				     action == MPATH_PREQ &&
 				     new_metric > mpath->metric)) {
 					process = false;
@@ -403,9 +406,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 
 		if (fresh_info) {
 			mesh_path_assign_nexthop(mpath, sta);
-			mpath->flags |= MESH_PATH_DSN_VALID;
+			mpath->flags |= MESH_PATH_SN_VALID;
 			mpath->metric = new_metric;
-			mpath->dsn = orig_dsn;
+			mpath->sn = orig_sn;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
 			mesh_path_activate(mpath);
@@ -444,7 +447,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 
 		if (fresh_info) {
 			mesh_path_assign_nexthop(mpath, sta);
-			mpath->flags &= ~MESH_PATH_DSN_VALID;
+			mpath->flags &= ~MESH_PATH_SN_VALID;
 			mpath->metric = last_hop_metric;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
@@ -466,47 +469,47 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
-	u8 *dst_addr, *orig_addr;
-	u8 dst_flags, ttl;
-	u32 orig_dsn, dst_dsn, lifetime;
+	u8 *target_addr, *orig_addr;
+	u8 target_flags, ttl;
+	u32 orig_sn, target_sn, lifetime;
 	bool reply = false;
 	bool forward = true;
 
-	/* Update destination DSN, if present */
-	dst_addr = PREQ_IE_DST_ADDR(preq_elem);
+	/* Update target SN, if present */
+	target_addr = PREQ_IE_TARGET_ADDR(preq_elem);
 	orig_addr = PREQ_IE_ORIG_ADDR(preq_elem);
-	dst_dsn = PREQ_IE_DST_DSN(preq_elem);
-	orig_dsn = PREQ_IE_ORIG_DSN(preq_elem);
-	dst_flags = PREQ_IE_DST_F(preq_elem);
+	target_sn = PREQ_IE_TARGET_SN(preq_elem);
+	orig_sn = PREQ_IE_ORIG_SN(preq_elem);
+	target_flags = PREQ_IE_TARGET_F(preq_elem);
 
 	mhwmp_dbg("received PREQ from %pM\n", orig_addr);
 
-	if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
+	if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
 		mhwmp_dbg("PREQ is for us\n");
 		forward = false;
 		reply = true;
 		metric = 0;
-		if (time_after(jiffies, ifmsh->last_dsn_update +
+		if (time_after(jiffies, ifmsh->last_sn_update +
 					net_traversal_jiffies(sdata)) ||
-		    time_before(jiffies, ifmsh->last_dsn_update)) {
-			dst_dsn = ++ifmsh->dsn;
-			ifmsh->last_dsn_update = jiffies;
+		    time_before(jiffies, ifmsh->last_sn_update)) {
+			target_sn = ++ifmsh->sn;
+			ifmsh->last_sn_update = jiffies;
 		}
 	} else {
 		rcu_read_lock();
-		mpath = mesh_path_lookup(dst_addr, sdata);
+		mpath = mesh_path_lookup(target_addr, sdata);
 		if (mpath) {
-			if ((!(mpath->flags & MESH_PATH_DSN_VALID)) ||
-					DSN_LT(mpath->dsn, dst_dsn)) {
-				mpath->dsn = dst_dsn;
-				mpath->flags |= MESH_PATH_DSN_VALID;
-			} else if ((!(dst_flags & MP_F_DO)) &&
+			if ((!(mpath->flags & MESH_PATH_SN_VALID)) ||
+					SN_LT(mpath->sn, target_sn)) {
+				mpath->sn = target_sn;
+				mpath->flags |= MESH_PATH_SN_VALID;
+			} else if ((!(target_flags & MP_F_DO)) &&
 					(mpath->flags & MESH_PATH_ACTIVE)) {
 				reply = true;
 				metric = mpath->metric;
-				dst_dsn = mpath->dsn;
-				if (dst_flags & MP_F_RF)
-					dst_flags |= MP_F_DO;
+				target_sn = mpath->sn;
+				if (target_flags & MP_F_RF)
+					target_flags |= MP_F_DO;
 				else
 					forward = false;
 			}
@@ -519,9 +522,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		ttl = ifmsh->mshcfg.dot11MeshTTL;
 		if (ttl != 0) {
 			mhwmp_dbg("replying to the PREQ\n");
-			mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr,
-				cpu_to_le32(dst_dsn), 0, orig_addr,
-				cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl,
+			mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr,
+				cpu_to_le32(target_sn), 0, orig_addr,
+				cpu_to_le32(orig_sn), mgmt->sa, 0, ttl,
 				cpu_to_le32(lifetime), cpu_to_le32(metric),
 				0, sdata);
 		} else
@@ -544,8 +547,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		preq_id = PREQ_IE_PREQ_ID(preq_elem);
 		hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1;
 		mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr,
-				cpu_to_le32(orig_dsn), dst_flags, dst_addr,
-				cpu_to_le32(dst_dsn), sdata->dev->broadcast,
+				cpu_to_le32(orig_sn), target_flags, target_addr,
+				cpu_to_le32(target_sn), sdata->dev->broadcast,
 				hopcount, ttl, cpu_to_le32(lifetime),
 				cpu_to_le32(metric), cpu_to_le32(preq_id),
 				sdata);
@@ -560,10 +563,10 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 				    u8 *prep_elem, u32 metric)
 {
 	struct mesh_path *mpath;
-	u8 *dst_addr, *orig_addr;
+	u8 *target_addr, *orig_addr;
 	u8 ttl, hopcount, flags;
 	u8 next_hop[ETH_ALEN];
-	u32 dst_dsn, orig_dsn, lifetime;
+	u32 target_sn, orig_sn, lifetime;
 
 	mhwmp_dbg("received PREP from %pM\n", PREP_IE_ORIG_ADDR(prep_elem));
 
@@ -573,8 +576,8 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 	 * which corresponds with the originator of the PREQ which this PREP
 	 * replies
 	 */
-	dst_addr = PREP_IE_DST_ADDR(prep_elem);
-	if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0)
+	target_addr = PREP_IE_TARGET_ADDR(prep_elem);
+	if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0)
 		/* destination, no forwarding required */
 		return;
 
@@ -585,7 +588,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 	}
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, sdata);
+	mpath = mesh_path_lookup(target_addr, sdata);
 	if (mpath)
 		spin_lock_bh(&mpath->state_lock);
 	else
@@ -601,13 +604,13 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 	lifetime = PREP_IE_LIFETIME(prep_elem);
 	hopcount = PREP_IE_HOPCOUNT(prep_elem) + 1;
 	orig_addr = PREP_IE_ORIG_ADDR(prep_elem);
-	dst_dsn = PREP_IE_DST_DSN(prep_elem);
-	orig_dsn = PREP_IE_ORIG_DSN(prep_elem);
+	target_sn = PREP_IE_TARGET_SN(prep_elem);
+	orig_sn = PREP_IE_ORIG_SN(prep_elem);
 
 	mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr,
-		cpu_to_le32(orig_dsn), 0, dst_addr,
-		cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl,
-		cpu_to_le32(lifetime), cpu_to_le32(metric),
+		cpu_to_le32(orig_sn), 0, target_addr,
+		cpu_to_le32(target_sn), mpath->next_hop->sta.addr, hopcount,
+		ttl, cpu_to_le32(lifetime), cpu_to_le32(metric),
 		0, sdata);
 	rcu_read_unlock();
 
@@ -627,10 +630,10 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
 	u8 ttl;
-	u8 *ta, *dst_addr;
-	u8 dst_flags;
-	u32 dst_dsn;
-	u16 dst_rcode;
+	u8 *ta, *target_addr;
+	u8 target_flags;
+	u32 target_sn;
+	u16 target_rcode;
 
 	ta = mgmt->sa;
 	ttl = PERR_IE_TTL(perr_elem);
@@ -639,24 +642,24 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 	ttl--;
-	dst_flags = PERR_IE_DST_FLAGS(perr_elem);
-	dst_addr = PERR_IE_DST_ADDR(perr_elem);
-	dst_dsn = PERR_IE_DST_DSN(perr_elem);
-	dst_rcode = PERR_IE_DST_RCODE(perr_elem);
+	target_flags = PERR_IE_TARGET_FLAGS(perr_elem);
+	target_addr = PERR_IE_TARGET_ADDR(perr_elem);
+	target_sn = PERR_IE_TARGET_SN(perr_elem);
+	target_rcode = PERR_IE_TARGET_RCODE(perr_elem);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, sdata);
+	mpath = mesh_path_lookup(target_addr, sdata);
 	if (mpath) {
 		spin_lock_bh(&mpath->state_lock);
 		if (mpath->flags & MESH_PATH_ACTIVE &&
 		    memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 &&
-		    (!(mpath->flags & MESH_PATH_DSN_VALID) ||
-		    DSN_GT(dst_dsn, mpath->dsn))) {
+		    (!(mpath->flags & MESH_PATH_SN_VALID) ||
+		    SN_GT(target_sn, mpath->sn))) {
 			mpath->flags &= ~MESH_PATH_ACTIVE;
-			mpath->dsn = dst_dsn;
+			mpath->sn = target_sn;
 			spin_unlock_bh(&mpath->state_lock);
-			mesh_path_error_tx(ttl, dst_addr, cpu_to_le32(dst_dsn),
-					   cpu_to_le16(dst_rcode),
+			mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn),
+					   cpu_to_le16(target_rcode),
 					   sdata->dev->broadcast, sdata);
 		} else
 			spin_unlock_bh(&mpath->state_lock);
@@ -673,7 +676,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
 	u8 *ta;
 	u8 ttl, flags, hopcount;
 	u8 *orig_addr;
-	u32 orig_dsn, metric;
+	u32 orig_sn, metric;
 
 	ta = mgmt->sa;
 	ttl = rann->rann_ttl;
@@ -684,7 +687,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
 	ttl--;
 	flags = rann->rann_flags;
 	orig_addr = rann->rann_addr;
-	orig_dsn = rann->rann_seq;
+	orig_sn = rann->rann_seq;
 	hopcount = rann->rann_hopcount;
 	hopcount++;
 	metric = rann->rann_metric;
@@ -703,14 +706,14 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
 		mesh_queue_preq(mpath,
 				PREQ_Q_F_START | PREQ_Q_F_REFRESH);
 	}
-	if (mpath->dsn < orig_dsn) {
+	if (mpath->sn < orig_sn) {
 		mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
-				       cpu_to_le32(orig_dsn),
+				       cpu_to_le32(orig_sn),
 				       0, NULL, 0, sdata->dev->broadcast,
 				       hopcount, ttl, 0,
 				       cpu_to_le32(metric + mpath->metric),
 				       0, sdata);
-		mpath->dsn = orig_dsn;
+		mpath->sn = orig_sn;
 	}
 	rcu_read_unlock();
 }
@@ -823,7 +826,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_preq_queue *preq_node;
 	struct mesh_path *mpath;
-	u8 ttl, dst_flags;
+	u8 ttl, target_flags;
 	u32 lifetime;
 
 	spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
@@ -865,11 +868,11 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 
 	ifmsh->last_preq = jiffies;
 
-	if (time_after(jiffies, ifmsh->last_dsn_update +
+	if (time_after(jiffies, ifmsh->last_sn_update +
 				net_traversal_jiffies(sdata)) ||
-	    time_before(jiffies, ifmsh->last_dsn_update)) {
-		++ifmsh->dsn;
-		sdata->u.mesh.last_dsn_update = jiffies;
+	    time_before(jiffies, ifmsh->last_sn_update)) {
+		++ifmsh->sn;
+		sdata->u.mesh.last_sn_update = jiffies;
 	}
 	lifetime = default_lifetime(sdata);
 	ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
@@ -880,14 +883,14 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 	}
 
 	if (preq_node->flags & PREQ_Q_F_REFRESH)
-		dst_flags = MP_F_DO;
+		target_flags = MP_F_DO;
 	else
-		dst_flags = MP_F_RF;
+		target_flags = MP_F_RF;
 
 	spin_unlock_bh(&mpath->state_lock);
 	mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr,
-			cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst,
-			cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0,
+			cpu_to_le32(ifmsh->sn), target_flags, mpath->dst,
+			cpu_to_le32(mpath->sn), sdata->dev->broadcast, 0,
 			ttl, cpu_to_le32(lifetime), 0,
 			cpu_to_le32(ifmsh->preq_id++), sdata);
 	mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
@@ -914,15 +917,15 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 	struct sk_buff *skb_to_free = NULL;
 	struct mesh_path *mpath;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u8 *dst_addr = hdr->addr3;
+	u8 *target_addr = hdr->addr3;
 	int err = 0;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, sdata);
+	mpath = mesh_path_lookup(target_addr, sdata);
 
 	if (!mpath) {
-		mesh_path_add(dst_addr, sdata);
-		mpath = mesh_path_lookup(dst_addr, sdata);
+		mesh_path_add(target_addr, sdata);
+		mpath = mesh_path_lookup(target_addr, sdata);
 		if (!mpath) {
 			sdata->u.mesh.mshstats.dropped_frames_no_route++;
 			err = -ENOSPC;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 5d541235ca2f..2cebdf52b7bc 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -463,10 +463,10 @@ void mesh_plink_broken(struct sta_info *sta)
 		    mpath->flags & MESH_PATH_ACTIVE &&
 		    !(mpath->flags & MESH_PATH_FIXED)) {
 			mpath->flags &= ~MESH_PATH_ACTIVE;
-			++mpath->dsn;
+			++mpath->sn;
 			spin_unlock_bh(&mpath->state_lock);
 			mesh_path_error_tx(MESH_TTL, mpath->dst,
-					cpu_to_le32(mpath->dsn),
+					cpu_to_le32(mpath->sn),
 					PERR_RCODE_DEST_UNREACH,
 					sdata->dev->broadcast, sdata);
 		} else
@@ -602,7 +602,7 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct mesh_path *mpath;
-	u32 dsn = 0;
+	u32 sn = 0;
 
 	if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) {
 		u8 *ra, *da;
@@ -611,8 +611,8 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 		ra = hdr->addr1;
 		mpath = mesh_path_lookup(da, sdata);
 		if (mpath)
-			dsn = ++mpath->dsn;
-		mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(dsn),
+			sn = ++mpath->sn;
+		mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn),
 				   PERR_RCODE_NO_ROUTE, ra, sdata);
 	}
 
@@ -648,7 +648,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
 {
 	spin_lock_bh(&mpath->state_lock);
 	mesh_path_assign_nexthop(mpath, next_hop);
-	mpath->dsn = 0xffff;
+	mpath->sn = 0xffff;
 	mpath->metric = 0;
 	mpath->hop_count = 0;
 	mpath->exp_time = 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8c8e4eae6a17..7c1999872503 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2116,9 +2116,9 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 	if (pinfo->filled & MPATH_INFO_FRAME_QLEN)
 		NLA_PUT_U32(msg, NL80211_MPATH_INFO_FRAME_QLEN,
 			    pinfo->frame_qlen);
-	if (pinfo->filled & MPATH_INFO_DSN)
-		NLA_PUT_U32(msg, NL80211_MPATH_INFO_DSN,
-			    pinfo->dsn);
+	if (pinfo->filled & MPATH_INFO_SN)
+		NLA_PUT_U32(msg, NL80211_MPATH_INFO_SN,
+			    pinfo->sn);
 	if (pinfo->filled & MPATH_INFO_METRIC)
 		NLA_PUT_U32(msg, NL80211_MPATH_INFO_METRIC,
 			    pinfo->metric);
-- 
cgit v1.2.3


From 63c5723bc3af8d4e86984dd4ff0c78218de418d0 Mon Sep 17 00:00:00 2001
From: Rui Paulo <rpaulo@gmail.com>
Date: Mon, 9 Nov 2009 23:46:57 +0000
Subject: mac80211: add nl80211/cfg80211 handling of the new mesh root mode
 option.

Signed-off-by: Rui Paulo <rpaulo@gmail.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Andrey Yurovsky <andrey@cozybit.com>
Tested-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h       |  3 +++
 include/net/cfg80211.h        |  1 +
 net/mac80211/cfg.c            |  7 +++++++
 net/mac80211/debugfs_netdev.c |  2 ++
 net/mac80211/mesh.c           | 13 +++++++++++++
 net/mac80211/mesh.h           |  1 +
 net/wireless/nl80211.c        |  6 ++++++
 7 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7a0bd6ea6f63..d2f276de9abe 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1200,6 +1200,8 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs)
  * that it takes for an HWMP information element to propagate across the mesh
  *
+ * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not
+ *
  * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
@@ -1219,6 +1221,7 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
 	NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 	NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+	NL80211_MESHCONF_HWMP_ROOTMODE,
 
 	/* keep last */
 	__NL80211_MESHCONF_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7d0ae9c18286..0e4c51fc63e5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -508,6 +508,7 @@ struct mesh_config {
 	u32 dot11MeshHWMPactivePathTimeout;
 	u16 dot11MeshHWMPpreqMinInterval;
 	u16 dot11MeshHWMPnetDiameterTraversalTime;
+	u8  dot11MeshHWMPRootMode;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 81053587e72b..7f18c8fa1880 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1029,7 +1029,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 {
 	struct mesh_config *conf;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_mesh *ifmsh;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	ifmsh = &sdata->u.mesh;
 
 	/* Set the config options which we are interested in setting */
 	conf = &(sdata->u.mesh.mshcfg);
@@ -1064,6 +1067,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 			   mask))
 		conf->dot11MeshHWMPnetDiameterTraversalTime =
 			nconf->dot11MeshHWMPnetDiameterTraversalTime;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) {
+		conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode;
+		ieee80211_mesh_root_setup(ifmsh);
+	}
 	return 0;
 }
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 8782264f49e7..472b2039906c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -149,6 +149,8 @@ IEEE80211_IF_FILE(path_refresh_time,
 		u.mesh.mshcfg.path_refresh_time, DEC);
 IEEE80211_IF_FILE(min_discovery_timeout,
 		u.mesh.mshcfg.min_discovery_timeout, DEC);
+IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
+		u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC);
 #endif
 
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 88dcfe3030b1..05955dc6b3d3 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -373,6 +373,17 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
 	ieee80211_queue_work(&local->hw, &ifmsh->work);
 }
 
+void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
+{
+	if (ifmsh->mshcfg.dot11MeshHWMPRootMode)
+		set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
+	else {
+		clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
+		/* stop running timer */
+		del_timer_sync(&ifmsh->mesh_path_root_timer);
+	}
+}
+
 /**
  * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame
  * @hdr:    	802.11 frame header
@@ -503,6 +514,7 @@ void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
 		add_timer(&ifmsh->mesh_path_timer);
 	if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running))
 		add_timer(&ifmsh->mesh_path_root_timer);
+	ieee80211_mesh_root_setup(ifmsh);
 }
 #endif
 
@@ -512,6 +524,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 
 	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
+	ieee80211_mesh_root_setup(ifmsh);
 	ieee80211_queue_work(&local->hw, &ifmsh->work);
 	sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL;
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 00ee84258196..5ad6975bf28c 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -242,6 +242,7 @@ ieee80211_rx_result
 ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
 
 /* Mesh paths */
 int mesh_nexthop_lookup(struct sk_buff *skb,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7c1999872503..17bcad69428d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2616,6 +2616,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 			cur_params.dot11MeshHWMPpreqMinInterval);
 	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			cur_params.dot11MeshHWMPnetDiameterTraversalTime);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE,
+			cur_params.dot11MeshHWMPRootMode);
 	nla_nest_end(msg, pinfoattr);
 	genlmsg_end(msg, hdr);
 	err = genlmsg_reply(msg, info);
@@ -2726,6 +2728,10 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 			dot11MeshHWMPnetDiameterTraversalTime,
 			mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
+			dot11MeshHWMPRootMode, mask,
+			NL80211_MESHCONF_HWMP_ROOTMODE,
+			nla_get_u8);
 
 	/* Apply changes */
 	err = rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
-- 
cgit v1.2.3


From 61fa713c751683da915fa0c1aa502be85822c357 Mon Sep 17 00:00:00 2001
From: Holger Schurig <holgerschurig@gmail.com>
Date: Wed, 11 Nov 2009 12:25:40 +0100
Subject: cfg80211: return channel noise via survey API

This patch implements the NL80211_CMD_GET_SURVEY command and an get_survey()
ops that a driver can implement. The goal of this command is to allow a
drivers to report channel survey data (e.g. channel noise, channel
occupation).

For now, only the mechanism to report back channel noise has been
implemented.

In future, there will either be a survey-trigger command --- or the existing
scan-trigger command will be enhanced. This will allow user-space to
request survey for arbitrary channels.

Note: any driver that cannot report channel noise should not report
any value at all, e.g. made-up -92 dBm.

Signed-off-by: Holger Schurig <holgerschurig@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  34 ++++++++++++++++
 include/net/cfg80211.h  |  34 ++++++++++++++++
 net/wireless/nl80211.c  | 105 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 173 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index d2f276de9abe..45db17f81aa3 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -160,6 +160,11 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
+ * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation
+ *      or noise level
+ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to
+ *	NL80211_CMD_GET_SURVEY and on the "scan" multicast group)
+ *
  * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain
  * 	has been changed and provides details of the request information
  * 	that caused the change such as who initiated the regulatory request
@@ -341,6 +346,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_WIPHY_NETNS,
 
+	NL80211_CMD_GET_SURVEY,
+	NL80211_CMD_NEW_SURVEY_RESULTS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -586,6 +594,10 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface
  *
+ * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of
+ *      the survey response for %NL80211_CMD_GET_SURVEY, nested attribute
+ *      containing info as possible, see &enum survey_info.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -718,6 +730,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_4ADDR,
 
+	NL80211_ATTR_SURVEY_INFO,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1120,6 +1134,26 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_IBSS		= 1<<8,
 };
 
+/**
+ * enum nl80211_survey_info - survey information
+ *
+ * These attribute types are used with %NL80211_ATTR_SURVEY_INFO
+ * when getting information about a survey.
+ *
+ * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
+ * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
+ */
+enum nl80211_survey_info {
+	__NL80211_SURVEY_INFO_INVALID,
+	NL80211_SURVEY_INFO_FREQUENCY,
+	NL80211_SURVEY_INFO_NOISE,
+
+	/* keep last */
+	__NL80211_SURVEY_INFO_AFTER_LAST,
+	NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_mntr_flags - monitor configuration flags
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0e4c51fc63e5..21710fc17eaf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -234,6 +234,35 @@ struct key_params {
 	u32 cipher;
 };
 
+/**
+ * enum survey_info_flags - survey information flags
+ *
+ * Used by the driver to indicate which info in &struct survey_info
+ * it has filled in during the get_survey().
+ */
+enum survey_info_flags {
+	SURVEY_INFO_NOISE_DBM = 1<<0,
+};
+
+/**
+ * struct survey_info - channel survey response
+ *
+ * Used by dump_survey() to report back per-channel survey information.
+ *
+ * @channel: the channel this survey record reports, mandatory
+ * @filled: bitflag of flags from &enum survey_info_flags
+ * @noise: channel noise in dBm. This and all following fields are
+ *     optional
+ *
+ * This structure can later be expanded with things like
+ * channel duty cycle etc.
+ */
+struct survey_info {
+	struct ieee80211_channel *channel;
+	u32 filled;
+	s8 noise;
+};
+
 /**
  * struct beacon_parameters - beacon parameters
  *
@@ -944,6 +973,8 @@ struct cfg80211_bitrate_mask {
  * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
  *	functions to adjust rfkill hw state
  *
+ * @dump_survey: get site survey information.
+ *
  * @testmode_cmd: run a test mode command
  */
 struct cfg80211_ops {
@@ -1063,6 +1094,9 @@ struct cfg80211_ops {
 				    const u8 *peer,
 				    const struct cfg80211_bitrate_mask *mask);
 
+	int	(*dump_survey)(struct wiphy *wiphy, struct net_device *netdev,
+			int idx, struct survey_info *info);
+
 	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4dc139cdba5c..5c8b3bfada4b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3245,6 +3245,106 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
+				int flags, struct net_device *dev,
+				struct survey_info *survey)
+{
+	void *hdr;
+	struct nlattr *infoattr;
+
+	/* Survey without a channel doesn't make sense */
+	if (!survey->channel)
+		return -EINVAL;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags,
+			     NL80211_CMD_NEW_SURVEY_RESULTS);
+	if (!hdr)
+		return -ENOMEM;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+
+	infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO);
+	if (!infoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_SURVEY_INFO_FREQUENCY,
+		    survey->channel->center_freq);
+	if (survey->filled & SURVEY_INFO_NOISE_DBM)
+		NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE,
+			    survey->noise);
+
+	nla_nest_end(msg, infoattr);
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int nl80211_dump_survey(struct sk_buff *skb,
+			struct netlink_callback *cb)
+{
+	struct survey_info survey;
+	struct cfg80211_registered_device *dev;
+	struct net_device *netdev;
+	int ifidx = cb->args[0];
+	int survey_idx = cb->args[1];
+	int res;
+
+	if (!ifidx)
+		ifidx = nl80211_get_ifidx(cb);
+	if (ifidx < 0)
+		return ifidx;
+	cb->args[0] = ifidx;
+
+	rtnl_lock();
+
+	netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
+	if (!netdev) {
+		res = -ENODEV;
+		goto out_rtnl;
+	}
+
+	dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
+	if (IS_ERR(dev)) {
+		res = PTR_ERR(dev);
+		goto out_rtnl;
+	}
+
+	if (!dev->ops->dump_survey) {
+		res = -EOPNOTSUPP;
+		goto out_err;
+	}
+
+	while (1) {
+		res = dev->ops->dump_survey(&dev->wiphy, netdev, survey_idx,
+					    &survey);
+		if (res == -ENOENT)
+			break;
+		if (res)
+			goto out_err;
+
+		if (nl80211_send_survey(skb,
+				NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				netdev,
+				&survey) < 0)
+			goto out;
+		survey_idx++;
+	}
+
+ out:
+	cb->args[1] = survey_idx;
+	res = skb->len;
+ out_err:
+	cfg80211_unlock_rdev(dev);
+ out_rtnl:
+	rtnl_unlock();
+
+	return res;
+}
+
 static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type)
 {
 	return auth_type <= NL80211_AUTHTYPE_MAX;
@@ -4322,6 +4422,11 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_GET_SURVEY,
+		.policy = nl80211_policy,
+		.dumpit = nl80211_dump_survey,
+	},
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
-- 
cgit v1.2.3


From 687b16fb617bd446439425a368ad7c7bbd202c73 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 13 Nov 2009 13:16:15 +0100
Subject: hw-breakpoints: Provide an off-case for counter_arch_bp()

If an arch doesn't support the hw breakpoints, counter_arch_bp()
has no off case to cover the missing breakpoint info structure
from the perf event. The result is a build error in non-x86
configs.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/hw_breakpoint.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7eba9b92e5f3..18710e0c84bd 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,11 +16,6 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
-static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
-{
-	return &bp->hw.info;
-}
-
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -83,6 +78,11 @@ extern void release_bp_slot(struct perf_event *bp);
 
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return &bp->hw.info;
+}
+
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
 static inline struct perf_event *
@@ -126,6 +126,11 @@ static inline void release_bp_slot(struct perf_event *bp) 		{ }
 
 static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk)	{ }
 
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 #endif /* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3


From f6c06b6807ff9281295989ebad72523865325a4f Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 13 Nov 2009 15:14:11 -0500
Subject: vc: Add support for hiding the cursor when creating VTs

Add support for setting a global default for whether or not a visible
cursor should be enabled when creating VCs. The default will be to do so,
unless overridden by the user at boot time or by a driver.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
LKML-Reference: <1258143251-5818-1-git-send-email-mjg@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 Documentation/kernel-parameters.txt |  9 +++++++++
 drivers/char/vt.c                   | 11 ++++++++++-
 drivers/video/console/vgacon.c      |  2 ++
 include/linux/vt_kern.h             |  3 +++
 4 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9107b387e91f..dd42eb65fd27 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2729,6 +2729,15 @@ and is between 256 and 4096 characters. It is defined in the file
 			Default is 1, i.e. UTF-8 mode is enabled for all
 			newly opened terminals.
 
+	vt.global_cursor_default=
+			[VT]
+			Format=<-1|0|1>
+			Set system-wide default for whether a cursor
+			is shown on new VTs. Default is -1,
+			i.e. cursors will be created by default unless
+			overridden by individual drivers. 0 will hide
+			cursors, 1 will display them.
+
 	waveartist=	[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>
 
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 0c80c68cd047..1e3d728dbf7e 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -161,6 +161,8 @@ static void set_palette(struct vc_data *vc);
 static int printable;		/* Is console ready for printing? */
 int default_utf8 = true;
 module_param(default_utf8, int, S_IRUGO | S_IWUSR);
+int global_cursor_default = -1;
+module_param(global_cursor_default, int, S_IRUGO | S_IWUSR);
 
 /*
  * ignore_poke: don't unblank the screen when things are typed.  This is
@@ -775,6 +777,12 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 		vc_cons[currcons].d = NULL;
 		return -ENOMEM;
 	    }
+
+	    /* If no drivers have overridden us and the user didn't pass a
+	       boot option, default to displaying the cursor */
+	    if (global_cursor_default == -1)
+		    global_cursor_default = 1;
+
 	    vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
 	    vcs_make_sysfs(currcons);
 	    atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
@@ -1616,7 +1624,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear)
 	vc->vc_decscnm		= 0;
 	vc->vc_decom		= 0;
 	vc->vc_decawm		= 1;
-	vc->vc_deccm		= 1;
+	vc->vc_deccm		= global_cursor_default;
 	vc->vc_decim		= 0;
 
 	set_kbd(vc, decarm);
@@ -4078,6 +4086,7 @@ EXPORT_SYMBOL(fg_console);
 EXPORT_SYMBOL(console_blank_hook);
 EXPORT_SYMBOL(console_blanked);
 EXPORT_SYMBOL(vc_cons);
+EXPORT_SYMBOL(global_cursor_default);
 #ifndef VT_SINGLE_DRIVER
 EXPORT_SYMBOL(take_over_console);
 EXPORT_SYMBOL(give_up_console);
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index da55ccaf4d55..564643edb3c1 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -585,6 +585,8 @@ static void vgacon_init(struct vc_data *c, int init)
 	vgacon_uni_pagedir[1]++;
 	if (!vgacon_uni_pagedir[0] && p)
 		con_set_default_unimap(c);
+
+	hide_boot_cursor(screen_info.flags & VIDEO_FLAGS_NOCURSOR);
 }
 
 static void vgacon_deinit(struct vc_data *c)
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index c0c4e1103a73..7f56db4a79f0 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -110,6 +110,7 @@ extern char con_buf[CON_BUF_SIZE];
 extern struct mutex con_buf_mtx;
 extern char vt_dont_switch;
 extern int default_utf8;
+extern int global_cursor_default;
 
 struct vt_spawn_console {
 	spinlock_t lock;
@@ -130,4 +131,6 @@ struct vt_notifier_param {
 extern int register_vt_notifier(struct notifier_block *nb);
 extern int unregister_vt_notifier(struct notifier_block *nb);
 
+extern void hide_boot_cursor(bool hide);
+
 #endif /* _VT_KERN_H */
-- 
cgit v1.2.3


From 688bcaff291cf2fe2734e43f2793d4d05b850518 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 14 Nov 2009 01:12:47 +0100
Subject: hw-breakpoints: Fix build on !perf architectures

the arch/alpha build fails with:

 In file included from tip/kernel/exit.c:52:
 tip/include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr':
 tip/include/linux/hw_breakpoint.h:21: error: 'struct perf_event' has no member named 'attr'
 [...]

Move these helper inlines inside the CONFIG_HAVE_HW_BREAKPOINT ifdef.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 18710e0c84bd..0b98cbf76da7 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,6 +16,8 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -31,7 +33,6 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
 	return bp->attr.bp_len;
 }
 
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 extern struct perf_event *
 register_user_hw_breakpoint(unsigned long addr,
 			    int len,
-- 
cgit v1.2.3


From bee7ca9ec03a26676ea2b1c28dc4039348eff3e1 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Tue, 10 Nov 2009 09:51:18 +0000
Subject: net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED

Define two symbols needed in both kernel and user space.

Remove old (somewhat incorrect) kernel variant that wasn't used in
most cases.  Default should apply to both RMSS and SMSS (RFC2581).

Replace numeric constants with defined symbols.

Stand-alone patch, originally developed for TCPCT.

Signed-off-by: William.Allen.Simpson@gmail.com
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 6 ++++++
 include/net/tcp.h        | 3 ---
 net/ipv4/tcp_input.c     | 4 ++--
 net/ipv4/tcp_ipv4.c      | 6 +++---
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 6 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eeecb8547a2a..32d7d77b4a01 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -81,6 +81,12 @@ enum {
 	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
 }; 
 
+/*
+ * TCP general constants
+ */
+#define TCP_MSS_DEFAULT		 536U	/* IPv4 (RFC1122, RFC2581) */
+#define TCP_MSS_DESIRED		1220U	/* IPv6 (tunneled), EDNS0 (RFC3226) */
+
 /* TCP socket options */
 #define TCP_NODELAY		1	/* Turn off Nagle's algorithm. */
 #define TCP_MAXSEG		2	/* Limit MSS */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bf20f88fd033..325bfcf5c934 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -62,9 +62,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
 #define TCP_MIN_MSS		88U
 
-/* Minimal RCV_MSS. */
-#define TCP_MIN_RCVMSS		536U
-
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		512
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be0c5bf7bfca..cc306ac6eb51 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 		 * "len" is invariant segment length, including TCP header.
 		 */
 		len += skb->data - skb_transport_header(skb);
-		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
+		if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
 		    /* If PSH is not set, packet should be
 		     * full sized, provided peer TCP is not badly broken.
 		     * This observation (if it is correct 8)) allows
@@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
 	hint = min(hint, tp->rcv_wnd / 2);
-	hint = min(hint, TCP_MIN_RCVMSS);
+	hint = min(hint, TCP_MSS_DEFAULT);
 	hint = max(hint, TCP_MIN_MSS);
 
 	inet_csk(sk)->icsk_ack.rcv_mss = hint;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 657ae334f125..cf7f2086e6e0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -217,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (inet->opt)
 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 
-	tp->rx_opt.mss_clamp = 536;
+	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 
 	/* Socket identity is still unknown (sport may be zero).
 	 * However we set state to SYN-SENT and not releasing socket
@@ -1268,7 +1268,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	tcp_clear_options(&tmp_opt);
-	tmp_opt.mss_clamp = 536;
+	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
 
 	tcp_parse_options(skb, &tmp_opt, 0, dst);
@@ -1815,7 +1815,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	 */
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache = 536;
+	tp->mss_cache = TCP_MSS_DEFAULT;
 
 	tp->reordering = sysctl_tcp_reordering;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a9d34e224cb6..4be22280e6b3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -476,7 +476,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		if (newtp->af_specific->md5_lookup(sk, newsk))
 			newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
+		if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
 		TCP_ECN_openreq_child(newtp, req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 696a22f034e8..de709091b26d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1851,7 +1851,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	 */
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache = 536;
+	tp->mss_cache = TCP_MSS_DEFAULT;
 
 	tp->reordering = sysctl_tcp_reordering;
 
-- 
cgit v1.2.3


From ce81b76a39835a721cd168e0c0bcfe7132f1f66b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 11 Nov 2009 17:34:30 +0000
Subject: ipv6: use RCU to walk list of network devices

No longer need read_lock(&dev_base_lock), use RCU instead.
We also can avoid taking references on inet6_dev structs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 net/ipv6/anycast.c        | 29 +++++++++++++--------------
 net/ipv6/mcast.c          | 51 +++++++++++++++++++++--------------------------
 3 files changed, 47 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8b266390b9e2..61425d0c6123 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1114,6 +1114,16 @@ static inline struct net_device *next_net_device(struct net_device *dev)
 	return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
 }
 
+static inline struct net_device *next_net_device_rcu(struct net_device *dev)
+{
+	struct list_head *lh;
+	struct net *net;
+
+	net = dev_net(dev);
+	lh = rcu_dereference(dev->dev_list.next);
+	return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
+}
+
 static inline struct net_device *first_net_device(struct net *net)
 {
 	return list_empty(&net->dev_base_head) ? NULL :
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 2f00ca83f049..f1c74c8ef9de 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	state->idev = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 			break;
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return im;
 }
@@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
 
 	im = im->aca_next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = next_net_device(state->dev);
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return ac6_get_idx(seq, *pos);
 }
 
 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifacaddr6 *im;
-	im = ac6_get_next(seq, v);
+	struct ifacaddr6 *im = ac6_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void ac6_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct ac6_iter_state *state = ac6_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
+		state->idev = NULL;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int ac6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f9fcf690bd5d..1f9c44442e65 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	state->idev = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 			break;
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return im;
 }
@@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 
 	im = im->next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = next_net_device(state->dev);
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return igmp6_mc_get_idx(seq, *pos);
 }
 
 static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifmcaddr6 *im;
-	im = igmp6_mc_get_next(seq, v);
+	struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
@@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(net, state->dev) {
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 			spin_unlock_bh(&im->mca_lock);
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return psf;
 }
@@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 		spin_unlock_bh(&state->im->mca_lock);
 		state->im = state->im->next;
 		while (!state->im) {
-			if (likely(state->idev != NULL)) {
+			if (likely(state->idev != NULL))
 				read_unlock_bh(&state->idev->lock);
-				in6_dev_put(state->idev);
-			}
-			state->dev = next_net_device(state->dev);
+
+			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
 				goto out;
 			}
-			state->idev = in6_dev_get(state->dev);
+			state->idev = __in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
 			read_lock_bh(&state->idev->lock);
@@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 	if (likely(state->im != NULL)) {
@@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
 	}
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.3


From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 13 Nov 2009 21:47:33 -0800
Subject: perf_event: Fix invalid type in ioctl definition

u64 is invalid in userspace headers, including ioctl
definitions; use __u64 instead

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: <stable@kernel.org>
LKML-Reference: <20091113214733.7cd76be9@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45b56faf5cdc..ec3768a81058 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -219,7 +219,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
 #define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
-- 
cgit v1.2.3


From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sat, 14 Nov 2009 20:46:38 +0900
Subject: swiotlb: Remove duplicate swiotlb_force extern declarations

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/swiotlb.h | 2 --
 arch/x86/include/asm/swiotlb.h  | 4 ----
 include/linux/swiotlb.h         | 2 ++
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
index dcbaea7ce128..f0acde68aaea 100644
--- a/arch/ia64/include/asm/swiotlb.h
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -4,8 +4,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/swiotlb.h>
 
-extern int swiotlb_force;
-
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
 extern void pci_swiotlb_init(void);
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 940f13a213f8..87ffcb12a1b8 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -3,10 +3,6 @@
 
 #include <linux/swiotlb.h>
 
-/* SWIOTLB interface */
-
-extern int swiotlb_force;
-
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
 extern int pci_swiotlb_init(void);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index eb9bdb4d4854..febedcf67c7e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,8 @@ struct device;
 struct dma_attrs;
 struct scatterlist;
 
+extern int swiotlb_force;
+
 /*
  * Maximum allowable number of contiguous slabs to map,
  * must be a power of 2.  What is the appropriate value ?
-- 
cgit v1.2.3


From 1a7af63108f07637504300671a72432c34e10021 Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Sun, 15 Nov 2009 13:49:44 +0900
Subject: nilfs2: deleted struct nilfs_dat_group_desc

struct nilfs_dat_group_desc is not used both in kernel and user spaces.
struct nilfs_palloc_group_desc is used instead.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 79fec6af3f9f..ce520402e840 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -424,15 +424,6 @@ struct nilfs_dat_entry {
 	__le64 de_rsv;
 };
 
-/**
- * struct nilfs_dat_group_desc - block group descriptor
- * @dg_nfrees: number of free virtual block numbers in block group
- */
-struct nilfs_dat_group_desc {
-	__le32 dg_nfrees;
-};
-
-
 /**
  * struct nilfs_snapshot_list - snapshot list
  * @ssl_next: next checkpoint number on snapshot list
-- 
cgit v1.2.3


From dc186ad741c12ae9ecac8b89e317ef706fdaf8f6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 01:09:48 +0900
Subject: workqueue: Add debugobjects support

Add debugobject support to track the life time of work_structs.

While at it, remove duplicate definition of
INIT_DELAYED_WORK_ON_STACK().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/smpboot.c |   4 +-
 include/linux/workqueue.h |  38 ++++++++++----
 kernel/workqueue.c        | 131 ++++++++++++++++++++++++++++++++++++++++++++--
 lib/Kconfig.debug         |   8 +++
 4 files changed, 166 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920e..ba43dfed353d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -687,7 +687,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 
-	INIT_WORK(&c_idle.work, do_fork_idle);
+	INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
 
 	alternatives_smp_switch(1);
 
@@ -713,6 +713,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 	if (IS_ERR(c_idle.idle)) {
 		printk("failed fork for CPU %d\n", cpu);
+		destroy_work_on_stack(&c_idle.work);
 		return PTR_ERR(c_idle.idle);
 	}
 
@@ -831,6 +832,7 @@ do_rest:
 		smpboot_restore_warm_reset_vector();
 	}
 
+	destroy_work_on_stack(&c_idle.work);
 	return boot_error;
 }
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cf24c20de9e4..9466e860d8c2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,6 +25,7 @@ typedef void (*work_func_t)(struct work_struct *work);
 struct work_struct {
 	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_STATIC  1		/* static initializer (debugobjects) */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
@@ -35,6 +36,7 @@ struct work_struct {
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
+#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(2)
 
 struct delayed_work {
 	struct work_struct work;
@@ -63,7 +65,7 @@ struct execute_work {
 #endif
 
 #define __WORK_INITIALIZER(n, f) {				\
-	.data = WORK_DATA_INIT(),				\
+	.data = WORK_DATA_STATIC_INIT(),			\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
@@ -91,6 +93,14 @@ struct execute_work {
 #define PREPARE_DELAYED_WORK(_work, _func)			\
 	PREPARE_WORK(&(_work)->work, (_func))
 
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+extern void __init_work(struct work_struct *work, int onstack);
+extern void destroy_work_on_stack(struct work_struct *work);
+#else
+static inline void __init_work(struct work_struct *work, int onstack) { }
+static inline void destroy_work_on_stack(struct work_struct *work) { }
+#endif
+
 /*
  * initialize all of a work item in one go
  *
@@ -99,24 +109,36 @@ struct execute_work {
  * to generate better code.
  */
 #ifdef CONFIG_LOCKDEP
-#define INIT_WORK(_work, _func)						\
+#define __INIT_WORK(_work, _func, _onstack)				\
 	do {								\
 		static struct lock_class_key __key;			\
 									\
+		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
 #else
-#define INIT_WORK(_work, _func)						\
+#define __INIT_WORK(_work, _func, _onstack)				\
 	do {								\
+		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
 #endif
 
+#define INIT_WORK(_work, _func)					\
+	do {							\
+		__INIT_WORK((_work), (_func), 0);		\
+	} while (0)
+
+#define INIT_WORK_ON_STACK(_work, _func)			\
+	do {							\
+		__INIT_WORK((_work), (_func), 1);		\
+	} while (0)
+
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
@@ -125,22 +147,16 @@ struct execute_work {
 
 #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
 	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
+		INIT_WORK_ON_STACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_deferrable(&(_work)->timer);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer_on_stack(&(_work)->timer);		\
-	} while (0)
-
 /**
  * work_pending - Find out whether a work item is currently pending
  * @work: The work item in question
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 12328147132c..ddad63fbb61b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,116 @@ struct workqueue_struct {
 #endif
 };
 
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+
+static struct debug_obj_descr work_debug_descr;
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int work_fixup_init(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		cancel_work_sync(work);
+		debug_object_init(work, &work_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int work_fixup_activate(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+
+	case ODEBUG_STATE_NOTAVAILABLE:
+		/*
+		 * This is not really a fixup. The work struct was
+		 * statically initialized. We just make sure that it
+		 * is tracked in the object tracker.
+		 */
+		if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
+			debug_object_init(work, &work_debug_descr);
+			debug_object_activate(work, &work_debug_descr);
+			return 0;
+		}
+		WARN_ON_ONCE(1);
+		return 0;
+
+	case ODEBUG_STATE_ACTIVE:
+		WARN_ON(1);
+
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int work_fixup_free(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		cancel_work_sync(work);
+		debug_object_free(work, &work_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static struct debug_obj_descr work_debug_descr = {
+	.name		= "work_struct",
+	.fixup_init	= work_fixup_init,
+	.fixup_activate	= work_fixup_activate,
+	.fixup_free	= work_fixup_free,
+};
+
+static inline void debug_work_activate(struct work_struct *work)
+{
+	debug_object_activate(work, &work_debug_descr);
+}
+
+static inline void debug_work_deactivate(struct work_struct *work)
+{
+	debug_object_deactivate(work, &work_debug_descr);
+}
+
+void __init_work(struct work_struct *work, int onstack)
+{
+	if (onstack)
+		debug_object_init_on_stack(work, &work_debug_descr);
+	else
+		debug_object_init(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(__init_work);
+
+void destroy_work_on_stack(struct work_struct *work)
+{
+	debug_object_free(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+
+#else
+static inline void debug_work_activate(struct work_struct *work) { }
+static inline void debug_work_deactivate(struct work_struct *work) { }
+#endif
+
 /* Serializes the accesses to the list of workqueues. */
 static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
@@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 {
 	unsigned long flags;
 
+	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
 	insert_work(cwq, work, &cwq->worklist);
 	spin_unlock_irqrestore(&cwq->lock, flags);
@@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
 		trace_workqueue_execution(cwq->thread, work);
+		debug_work_deactivate(work);
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irq(&cwq->lock);
@@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work)
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 			struct wq_barrier *barr, struct list_head *head)
 {
-	INIT_WORK(&barr->work, wq_barrier_func);
+	/*
+	 * debugobject calls are safe here even with cwq->lock locked
+	 * as we know for sure that this will not trigger any of the
+	 * checks and call back into the fixup functions where we
+	 * might deadlock.
+	 */
+	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
 	init_completion(&barr->done);
 
+	debug_work_activate(&barr->work);
 	insert_work(cwq, &barr->work, head);
 }
 
@@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 	}
 	spin_unlock_irq(&cwq->lock);
 
-	if (active)
+	if (active) {
 		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
 
 	return active;
 }
@@ -451,6 +572,7 @@ out:
 		return 0;
 
 	wait_for_completion(&barr.done);
+	destroy_work_on_stack(&barr.work);
 	return 1;
 }
 EXPORT_SYMBOL_GPL(flush_work);
@@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work)
 		 */
 		smp_rmb();
 		if (cwq == get_wq_data(work)) {
+			debug_work_deactivate(work);
 			list_del_init(&work->entry);
 			ret = 1;
 		}
@@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 	}
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running))
+	if (unlikely(running)) {
 		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
 }
 
 static void wait_on_work(struct work_struct *work)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 234ceb10861f..c91f0519d493 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -298,6 +298,14 @@ config DEBUG_OBJECTS_TIMERS
 	  timer routines to track the life time of timer objects and
 	  validate the timer operations.
 
+config DEBUG_OBJECTS_WORK
+	bool "Debug work objects"
+	depends on DEBUG_OBJECTS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  work queue routines to track the life time of work objects and
+	  validate the work operations.
+
 config DEBUG_OBJECTS_ENABLE_DEFAULT
 	int "debug_objects bootup default value (0-1)"
         range 0 1
-- 
cgit v1.2.3


From 31b4ff06e01a9a98a8e6ae6e8c42213648eec1d1 Mon Sep 17 00:00:00 2001
From: Balaji Rao <balajirrao@openmoko.org>
Date: Thu, 5 Nov 2009 00:24:55 +0300
Subject: pcf50633: introduces battery charging current control

Implement a new sysfs attribute to allow changing MBC charging limit on
the fly independently of usb current limit. It also gets set
automatically every time usb current limit is changed.

Limiting charging current also prevents violating USB specification in
the case when the whole device is shut down and usb current limit is
reset to the factory default by the pcf50633 state transition.

Signed-off-by: Balaji Rao <balajirrao@openmoko.org>
Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/pcf50633-charger.c  | 78 ++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/pcf50633/core.h |  7 ++++
 2 files changed, 80 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c
index 48e92c020f4b..21b6e64e7805 100644
--- a/drivers/power/pcf50633-charger.c
+++ b/drivers/power/pcf50633-charger.c
@@ -48,16 +48,21 @@ int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
 	u8 bits;
 	int charging_start = 1;
 	u8 mbcs2, chgmod;
+	unsigned int mbcc5;
 
-	if (ma >= 1000)
+	if (ma >= 1000) {
 		bits = PCF50633_MBCC7_USB_1000mA;
-	else if (ma >= 500)
+		ma = 1000;
+	} else if (ma >= 500) {
 		bits = PCF50633_MBCC7_USB_500mA;
-	else if (ma >= 100)
+		ma = 500;
+	} else if (ma >= 100) {
 		bits = PCF50633_MBCC7_USB_100mA;
-	else {
+		ma = 100;
+	} else {
 		bits = PCF50633_MBCC7_USB_SUSPEND;
 		charging_start = 0;
+		ma = 0;
 	}
 
 	ret = pcf50633_reg_set_bit_mask(pcf, PCF50633_REG_MBCC7,
@@ -67,7 +72,24 @@ int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
 	else
 		dev_info(pcf->dev, "usb curlim to %d mA\n", ma);
 
-	/* Manual charging start */
+	/*
+	 * We limit the charging current to be the USB current limit.
+	 * The reason is that on pcf50633, when it enters PMU Standby mode,
+	 * which it does when the device goes "off", the USB current limit
+	 * reverts to the variant default.  In at least one common case, that
+	 * default is 500mA.  By setting the charging current to be the same
+	 * as the USB limit we set here before PMU standby, we enforce it only
+	 * using the correct amount of current even when the USB current limit
+	 * gets reset to the wrong thing
+	 */
+
+	if (mbc->pcf->pdata->charger_reference_current_ma) {
+		mbcc5 = (ma << 8) / mbc->pcf->pdata->charger_reference_current_ma;
+		if (mbcc5 > 255)
+			mbcc5 = 255;
+		pcf50633_reg_write(mbc->pcf, PCF50633_REG_MBCC5, mbcc5);
+	}
+
 	mbcs2 = pcf50633_reg_read(pcf, PCF50633_REG_MBCS2);
 	chgmod = (mbcs2 & PCF50633_MBCS2_MBC_MASK);
 
@@ -157,9 +179,55 @@ static ssize_t set_usblim(struct device *dev,
 
 static DEVICE_ATTR(usb_curlim, S_IRUGO | S_IWUSR, show_usblim, set_usblim);
 
+static ssize_t
+show_chglim(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pcf50633_mbc *mbc = dev_get_drvdata(dev);
+	u8 mbcc5 = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCC5);
+	unsigned int ma;
+
+	if (!mbc->pcf->pdata->charger_reference_current_ma)
+		return -ENODEV;
+
+	ma = (mbc->pcf->pdata->charger_reference_current_ma *  mbcc5) >> 8;
+
+	return sprintf(buf, "%u\n", ma);
+}
+
+static ssize_t set_chglim(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct pcf50633_mbc *mbc = dev_get_drvdata(dev);
+	unsigned long ma;
+	unsigned int mbcc5;
+	int ret;
+
+	if (!mbc->pcf->pdata->charger_reference_current_ma)
+		return -ENODEV;
+
+	ret = strict_strtoul(buf, 10, &ma);
+	if (ret)
+		return -EINVAL;
+
+	mbcc5 = (ma << 8) / mbc->pcf->pdata->charger_reference_current_ma;
+	if (mbcc5 > 255)
+		mbcc5 = 255;
+	pcf50633_reg_write(mbc->pcf, PCF50633_REG_MBCC5, mbcc5);
+
+	return count;
+}
+
+/*
+ * This attribute allows to change MBC charging limit on the fly
+ * independently of usb current limit. It also gets set automatically every
+ * time usb current limit is changed.
+ */
+static DEVICE_ATTR(chg_curlim, S_IRUGO | S_IWUSR, show_chglim, set_chglim);
+
 static struct attribute *pcf50633_mbc_sysfs_entries[] = {
 	&dev_attr_chgmode.attr,
 	&dev_attr_usb_curlim.attr,
+	&dev_attr_chg_curlim.attr,
 	NULL,
 };
 
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index 9aba7b779fbc..09af8fdfbb5d 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -31,6 +31,13 @@ struct pcf50633_platform_data {
 
 	int charging_restart_interval;
 
+	/*
+	 * Should be set accordingly to the reference resistor used, see
+	 * I_{ch(ref)} charger reference current in the pcf50633 User
+	 * Manual.
+	 */
+	int charger_reference_current_ma;
+
 	/* Callbacks */
 	void (*probe_done)(struct pcf50633 *);
 	void (*mbc_event_callback)(struct pcf50633 *, int);
-- 
cgit v1.2.3


From e98c73a24f33d6f54402f5cef2e7bf282d1d1fcc Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Thu, 5 Nov 2009 00:24:57 +0300
Subject: pcf50633: Get rid of charging restart software auto-triggering

After reaching Battery Full condition MBC state machine switches back
into charging mode when the battery voltage falls below 96% of a
battery float voltage. The voltage drop in Li-Ion batteries is
marginal (1-2%) till about 80% of its capacity - which means, after a
BATFULL, charging won't be restarted until 75-80%.

That is a desired behaviour recommended by battery manufacturers,
don't mess with it.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/pcf50633-charger.c  | 46 ---------------------------------------
 include/linux/mfd/pcf50633/core.h |  2 --
 2 files changed, 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c
index 21b6e64e7805..338311996eea 100644
--- a/drivers/power/pcf50633-charger.c
+++ b/drivers/power/pcf50633-charger.c
@@ -37,8 +37,6 @@ struct pcf50633_mbc {
 	struct power_supply usb;
 	struct power_supply adapter;
 	struct power_supply ac;
-
-	struct delayed_work charging_restart_work;
 };
 
 int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
@@ -236,44 +234,10 @@ static struct attribute_group mbc_attr_group = {
 	.attrs	= pcf50633_mbc_sysfs_entries,
 };
 
-/* MBC state machine switches into charging mode when the battery voltage
- * falls below 96% of a battery float voltage. But the voltage drop in Li-ion
- * batteries is marginal(1~2 %) till about 80% of its capacity - which means,
- * after a BATFULL, charging won't be restarted until 80%.
- *
- * This work_struct function restarts charging at regular intervals to make
- * sure we don't discharge too much
- */
-
-static void pcf50633_mbc_charging_restart(struct work_struct *work)
-{
-	struct pcf50633_mbc *mbc;
-	u8 mbcs2, chgmod;
-
-	mbc = container_of(work, struct pcf50633_mbc,
-				charging_restart_work.work);
-
-	mbcs2 = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCS2);
-	chgmod = (mbcs2 & PCF50633_MBCS2_MBC_MASK);
-
-	if (chgmod != PCF50633_MBCS2_MBC_BAT_FULL)
-		return;
-
-	/* Restart charging */
-	pcf50633_reg_set_bit_mask(mbc->pcf, PCF50633_REG_MBCC1,
-				PCF50633_MBCC1_RESUME, PCF50633_MBCC1_RESUME);
-	mbc->usb_active = 1;
-	power_supply_changed(&mbc->usb);
-
-	dev_info(mbc->pcf->dev, "Charging restarted\n");
-}
-
 static void
 pcf50633_mbc_irq_handler(int irq, void *data)
 {
 	struct pcf50633_mbc *mbc = data;
-	int chg_restart_interval =
-			mbc->pcf->pdata->charging_restart_interval;
 
 	/* USB */
 	if (irq == PCF50633_IRQ_USBINS) {
@@ -282,7 +246,6 @@ pcf50633_mbc_irq_handler(int irq, void *data)
 		mbc->usb_online = 0;
 		mbc->usb_active = 0;
 		pcf50633_mbc_usb_curlim_set(mbc->pcf, 0);
-		cancel_delayed_work_sync(&mbc->charging_restart_work);
 	}
 
 	/* Adapter */
@@ -297,10 +260,6 @@ pcf50633_mbc_irq_handler(int irq, void *data)
 	if (irq == PCF50633_IRQ_BATFULL) {
 		mbc->usb_active = 0;
 		mbc->adapter_active = 0;
-
-		if (chg_restart_interval > 0)
-			schedule_delayed_work(&mbc->charging_restart_work,
-							chg_restart_interval);
 	} else if (irq == PCF50633_IRQ_USBLIMON)
 		mbc->usb_active = 0;
 	else if (irq == PCF50633_IRQ_USBLIMOFF)
@@ -463,9 +422,6 @@ static int __devinit pcf50633_mbc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	INIT_DELAYED_WORK(&mbc->charging_restart_work,
-				pcf50633_mbc_charging_restart);
-
 	ret = sysfs_create_group(&pdev->dev.kobj, &mbc_attr_group);
 	if (ret)
 		dev_err(mbc->pcf->dev, "failed to create sysfs entries\n");
@@ -492,8 +448,6 @@ static int __devexit pcf50633_mbc_remove(struct platform_device *pdev)
 	power_supply_unregister(&mbc->adapter);
 	power_supply_unregister(&mbc->ac);
 
-	cancel_delayed_work_sync(&mbc->charging_restart_work);
-
 	kfree(mbc);
 
 	return 0;
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index 09af8fdfbb5d..46df7f053c29 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -29,8 +29,6 @@ struct pcf50633_platform_data {
 	char **batteries;
 	int num_batteries;
 
-	int charging_restart_interval;
-
 	/*
 	 * Should be set accordingly to the reference resistor used, see
 	 * I_{ch(ref)} charger reference current in the pcf50633 User
-- 
cgit v1.2.3


From c329795052aa339850a45fab649ab97a36905136 Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Thu, 5 Nov 2009 00:24:59 +0300
Subject: pcf50633: Query charger status directly

Current scheme is fragile and is likely to go off sync, especially on
batfull->adapter charging automatic MBC transition.

Query the status bit every time we need it instead.

We need to export another function to query for USB presence because
we can't read anything from PCF50633 (via I2C) inside irq context and
that is needed by usb gadgets.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/pcf50633-charger.c | 50 +++++++++++++++++++++++-----------------
 include/linux/mfd/pcf50633/mbc.h |  1 +
 2 files changed, 30 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c
index 47187337b96c..b915a008f587 100644
--- a/drivers/power/pcf50633-charger.c
+++ b/drivers/power/pcf50633-charger.c
@@ -29,9 +29,7 @@
 struct pcf50633_mbc {
 	struct pcf50633 *pcf;
 
-	int adapter_active;
 	int adapter_online;
-	int usb_active;
 	int usb_online;
 
 	struct power_supply usb;
@@ -88,7 +86,7 @@ int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
 		pcf50633_reg_write(mbc->pcf, PCF50633_REG_MBCC5, mbcc5);
 	}
 
-	mbcs2 = pcf50633_reg_read(pcf, PCF50633_REG_MBCS2);
+	mbcs2 = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCS2);
 	chgmod = (mbcs2 & PCF50633_MBCS2_MBC_MASK);
 
 	/* If chgmod == BATFULL, setting chgena has no effect.
@@ -105,8 +103,6 @@ int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
 				PCF50633_MBCC1_CHGENA, PCF50633_MBCC1_CHGENA);
 	}
 
-	mbc->usb_active = charging_start;
-
 	power_supply_changed(&mbc->usb);
 
 	return ret;
@@ -117,20 +113,44 @@ int pcf50633_mbc_get_status(struct pcf50633 *pcf)
 {
 	struct pcf50633_mbc *mbc  = platform_get_drvdata(pcf->mbc_pdev);
 	int status = 0;
+	u8 chgmod;
+
+	if (!mbc)
+		return 0;
+
+	chgmod = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCS2)
+		& PCF50633_MBCS2_MBC_MASK;
 
 	if (mbc->usb_online)
 		status |= PCF50633_MBC_USB_ONLINE;
-	if (mbc->usb_active)
+	if (chgmod == PCF50633_MBCS2_MBC_USB_PRE ||
+	    chgmod == PCF50633_MBCS2_MBC_USB_PRE_WAIT ||
+	    chgmod == PCF50633_MBCS2_MBC_USB_FAST ||
+	    chgmod == PCF50633_MBCS2_MBC_USB_FAST_WAIT)
 		status |= PCF50633_MBC_USB_ACTIVE;
 	if (mbc->adapter_online)
 		status |= PCF50633_MBC_ADAPTER_ONLINE;
-	if (mbc->adapter_active)
+	if (chgmod == PCF50633_MBCS2_MBC_ADP_PRE ||
+	    chgmod == PCF50633_MBCS2_MBC_ADP_PRE_WAIT ||
+	    chgmod == PCF50633_MBCS2_MBC_ADP_FAST ||
+	    chgmod == PCF50633_MBCS2_MBC_ADP_FAST_WAIT)
 		status |= PCF50633_MBC_ADAPTER_ACTIVE;
 
 	return status;
 }
 EXPORT_SYMBOL_GPL(pcf50633_mbc_get_status);
 
+int pcf50633_mbc_get_usb_online_status(struct pcf50633 *pcf)
+{
+	struct pcf50633_mbc *mbc  = platform_get_drvdata(pcf->mbc_pdev);
+
+	if (!mbc)
+		return 0;
+
+	return mbc->usb_online;
+}
+EXPORT_SYMBOL_GPL(pcf50633_mbc_get_usb_online_status);
+
 static ssize_t
 show_chgmode(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -248,26 +268,14 @@ pcf50633_mbc_irq_handler(int irq, void *data)
 		mbc->usb_online = 1;
 	} else if (irq == PCF50633_IRQ_USBREM) {
 		mbc->usb_online = 0;
-		mbc->usb_active = 0;
 		pcf50633_mbc_usb_curlim_set(mbc->pcf, 0);
 	}
 
 	/* Adapter */
-	if (irq == PCF50633_IRQ_ADPINS) {
+	if (irq == PCF50633_IRQ_ADPINS)
 		mbc->adapter_online = 1;
-		mbc->adapter_active = 1;
-	} else if (irq == PCF50633_IRQ_ADPREM) {
+	else if (irq == PCF50633_IRQ_ADPREM)
 		mbc->adapter_online = 0;
-		mbc->adapter_active = 0;
-	}
-
-	if (irq == PCF50633_IRQ_BATFULL) {
-		mbc->usb_active = 0;
-		mbc->adapter_active = 0;
-	} else if (irq == PCF50633_IRQ_USBLIMON)
-		mbc->usb_active = 0;
-	else if (irq == PCF50633_IRQ_USBLIMOFF)
-		mbc->usb_active = 1;
 
 	power_supply_changed(&mbc->ac);
 	power_supply_changed(&mbc->usb);
diff --git a/include/linux/mfd/pcf50633/mbc.h b/include/linux/mfd/pcf50633/mbc.h
index 4119579acf2c..df4f5fa88de3 100644
--- a/include/linux/mfd/pcf50633/mbc.h
+++ b/include/linux/mfd/pcf50633/mbc.h
@@ -128,6 +128,7 @@ enum pcf50633_reg_mbcs3 {
 int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma);
 
 int pcf50633_mbc_get_status(struct pcf50633 *);
+int pcf50633_mbc_get_usb_online_status(struct pcf50633 *);
 
 #endif
 
-- 
cgit v1.2.3


From 9a1654ba0b50402a6bd03c7b0fe9b0200a5ea7b1 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 15 Nov 2009 07:20:12 +0000
Subject: net: Optimize hard_start_xmit() return checking

Recent changes in the TX error propagation require additional checking
and masking of values returned from hard_start_xmit(), mainly to
separate cases where skb was consumed. This aim can be simplified by
changing the order of NETDEV_TX and NET_XMIT codes, because the latter
are treated similarly to negative (ERRNO) values.

After this change much simpler dev_xmit_complete() is also used in
sch_direct_xmit(), so it is moved to netdevice.h.

Additionally NET_RX definitions in netdevice.h are moved up from
between TX codes to avoid confusion while reading the TX comment.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 42 ++++++++++++++++++++++++++++++------------
 net/core/dev.c            | 17 -----------------
 net/sched/sch_generic.c   | 23 +++++------------------
 3 files changed, 35 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 61425d0c6123..7043f85e643d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -63,6 +63,10 @@ struct wireless_dev;
 #define HAVE_FREE_NETDEV		/* free_netdev() */
 #define HAVE_NETDEV_PRIV		/* netdev_priv() */
 
+/* Backlog congestion levels */
+#define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
+#define NET_RX_DROP		1	/* packet dropped */
+
 /*
  * Transmit return codes: transmit return codes originate from three different
  * namespaces:
@@ -82,14 +86,10 @@ struct wireless_dev;
 
 /* qdisc ->enqueue() return codes. */
 #define NET_XMIT_SUCCESS	0x00
-#define NET_XMIT_DROP		0x10	/* skb dropped			*/
-#define NET_XMIT_CN		0x20	/* congestion notification	*/
-#define NET_XMIT_POLICED	0x30	/* skb is shot by police	*/
-#define NET_XMIT_MASK		0xf0	/* qdisc flags in net/sch_generic.h */
-
-/* Backlog congestion levels */
-#define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
-#define NET_RX_DROP		1	/* packet dropped */
+#define NET_XMIT_DROP		0x01	/* skb dropped			*/
+#define NET_XMIT_CN		0x02	/* congestion notification	*/
+#define NET_XMIT_POLICED	0x03	/* skb is shot by police	*/
+#define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
  * indicates that the device will soon be dropping packets, or already drops
@@ -98,16 +98,34 @@ struct wireless_dev;
 #define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)
 
 /* Driver transmit return codes */
-#define NETDEV_TX_MASK		0xf
+#define NETDEV_TX_MASK		0xf0
 
 enum netdev_tx {
 	__NETDEV_TX_MIN	 = INT_MIN,	/* make sure enum is signed */
-	NETDEV_TX_OK	 = 0,		/* driver took care of packet */
-	NETDEV_TX_BUSY	 = 1,		/* driver tx path was busy*/
-	NETDEV_TX_LOCKED = 2,		/* driver tx lock was already taken */
+	NETDEV_TX_OK	 = 0x00,	/* driver took care of packet */
+	NETDEV_TX_BUSY	 = 0x10,	/* driver tx path was busy*/
+	NETDEV_TX_LOCKED = 0x20,	/* driver tx lock was already taken */
 };
 typedef enum netdev_tx netdev_tx_t;
 
+/*
+ * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant;
+ * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
+ */
+static inline bool dev_xmit_complete(int rc)
+{
+	/*
+	 * Positive cases with an skb consumed by a driver:
+	 * - successful transmission (rc == NETDEV_TX_OK)
+	 * - error while transmitting (rc < 0)
+	 * - error while queueing to a different device (rc & NET_XMIT_MASK)
+	 */
+	if (likely(rc < NET_XMIT_MASK))
+		return true;
+
+	return false;
+}
+
 #endif
 
 #define MAX_ADDR_LEN	32		/* Largest hardware address length */
diff --git a/net/core/dev.c b/net/core/dev.c
index 32045df1da5c..4b24d79414e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1924,23 +1924,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
-static inline bool dev_xmit_complete(int rc)
-{
-	/* successful transmission */
-	if (rc == NETDEV_TX_OK)
-		return true;
-
-	/* error while transmitting, driver consumed skb */
-	if (rc < 0)
-		return true;
-
-	/* error while queueing to a different device, driver consumed skb */
-	if (rc & NET_XMIT_MASK)
-		return true;
-
-	return false;
-}
-
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b13821ad2fb6..5173c1e1b19c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -119,39 +119,26 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_stopped(txq) &&
-	    !netif_tx_queue_frozen(txq)) {
+	if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
-		/* an error implies that the skb was consumed */
-		if (ret < 0)
-			ret = NETDEV_TX_OK;
-		/* all NET_XMIT codes map to NETDEV_TX_OK */
-		ret &= ~NET_XMIT_MASK;
-	}
 	HARD_TX_UNLOCK(dev, txq);
 
 	spin_lock(root_lock);
 
-	switch (ret) {
-	case NETDEV_TX_OK:
-		/* Driver sent out skb successfully */
+	if (dev_xmit_complete(ret)) {
+		/* Driver sent out skb successfully or skb was consumed */
 		ret = qdisc_qlen(q);
-		break;
-
-	case NETDEV_TX_LOCKED:
+	} else if (ret == NETDEV_TX_LOCKED) {
 		/* Driver try lock failed */
 		ret = handle_dev_cpu_collision(skb, txq, q);
-		break;
-
-	default:
+	} else {
 		/* Driver returned NETDEV_TX_BUSY - requeue skb */
 		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
 			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
 			       dev->name, ret, q->q.qlen);
 
 		ret = dev_requeue_skb(skb, q);
-		break;
 	}
 
 	if (ret && (netif_tx_queue_stopped(txq) ||
-- 
cgit v1.2.3


From b9f5d52670c27e71f04c466aee77e3a2eeca8080 Mon Sep 17 00:00:00 2001
From: Marin Mitov <mitov@issp.bas.bg>
Date: Fri, 13 Nov 2009 07:58:41 +0000
Subject: remove deprecated and not used: print_mac()

The function print_mac in net/ethernet/eth.c is marked __deprecated
and not used. Remove it.

Signed-off-by: Marin Mitov <mitov@issp.bas.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 4 ----
 net/ethernet/eth.c       | 7 -------
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 580b6004d00e..005e1525ab86 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -136,10 +136,6 @@ extern struct ctl_table ether_table[];
 
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
-/*
- *	Display a 6 byte device address (MAC) in a readable format.
- */
-extern char *print_mac(char *buf, const unsigned char *addr) __deprecated;
 #define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
 #define MAC_BUF_SIZE	18
 #define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE]
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5a883affecd3..dd3db88f8f0a 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -393,10 +393,3 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 	return ((ssize_t) l);
 }
 EXPORT_SYMBOL(sysfs_format_mac);
-
-char *print_mac(char *buf, const unsigned char *addr)
-{
-	_format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN);
-	return buf;
-}
-EXPORT_SYMBOL(print_mac);
-- 
cgit v1.2.3


From e29d4363174949a7a4e46f670993d7ff43342c1c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 15 Nov 2009 22:23:47 -0800
Subject: Revert "isdn: isdn_ppp: Use SKB list facilities instead of home-grown
 implementation."

This reverts commit 38783e671399b5405f1fd177d602c400a9577ae6.

It causes kernel bugzilla #14594

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c | 352 ++++++++++++++++++++------------------------
 include/linux/isdn_ppp.h    |   2 +-
 2 files changed, 164 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 2d14b64202a3..642d5aaf53ce 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1535,10 +1535,8 @@ static int isdn_ppp_mp_bundle_array_init(void)
 	int sz = ISDN_MAX_CHANNELS*sizeof(ippp_bundle);
 	if( (isdn_ppp_bundle_arr = kzalloc(sz, GFP_KERNEL)) == NULL )
 		return -ENOMEM;
-	for (i = 0; i < ISDN_MAX_CHANNELS; i++) {
+	for( i = 0; i < ISDN_MAX_CHANNELS; i++ )
 		spin_lock_init(&isdn_ppp_bundle_arr[i].lock);
-		skb_queue_head_init(&isdn_ppp_bundle_arr[i].frags);
-	}
 	return 0;
 }
 
@@ -1571,7 +1569,7 @@ static int isdn_ppp_mp_init( isdn_net_local * lp, ippp_bundle * add_to )
 		if ((lp->netdev->pb = isdn_ppp_mp_bundle_alloc()) == NULL)
 			return -ENOMEM;
 		lp->next = lp->last = lp;	/* nobody else in a queue */
-		skb_queue_head_init(&lp->netdev->pb->frags);
+		lp->netdev->pb->frags = NULL;
 		lp->netdev->pb->frames = 0;
 		lp->netdev->pb->seq = UINT_MAX;
 	}
@@ -1583,29 +1581,28 @@ static int isdn_ppp_mp_init( isdn_net_local * lp, ippp_bundle * add_to )
 
 static u32 isdn_ppp_mp_get_seq( int short_seq, 
 					struct sk_buff * skb, u32 last_seq );
-static void isdn_ppp_mp_discard(ippp_bundle *mp, struct sk_buff *from,
-				struct sk_buff *to);
-static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp,
-				   struct sk_buff *from, struct sk_buff *to,
-				   u32 lastseq);
-static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb);
+static struct sk_buff * isdn_ppp_mp_discard( ippp_bundle * mp,
+			struct sk_buff * from, struct sk_buff * to );
+static void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
+				struct sk_buff * from, struct sk_buff * to );
+static void isdn_ppp_mp_free_skb( ippp_bundle * mp, struct sk_buff * skb );
 static void isdn_ppp_mp_print_recv_pkt( int slot, struct sk_buff * skb );
 
 static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp, 
-				struct sk_buff *skb)
+							struct sk_buff *skb)
 {
-	struct sk_buff *newfrag, *frag, *start, *nextf;
-	u32 newseq, minseq, thisseq;
-	isdn_mppp_stats *stats;
 	struct ippp_struct *is;
+	isdn_net_local * lpq;
+	ippp_bundle * mp;
+	isdn_mppp_stats * stats;
+	struct sk_buff * newfrag, * frag, * start, *nextf;
+	u32 newseq, minseq, thisseq;
 	unsigned long flags;
-	isdn_net_local *lpq;
-	ippp_bundle *mp;
 	int slot;
 
 	spin_lock_irqsave(&net_dev->pb->lock, flags);
-	mp = net_dev->pb;
-	stats = &mp->stats;
+    	mp = net_dev->pb;
+        stats = &mp->stats;
 	slot = lp->ppp_slot;
 	if (slot < 0 || slot >= ISDN_MAX_CHANNELS) {
 		printk(KERN_ERR "%s: lp->ppp_slot(%d)\n",
@@ -1616,19 +1613,20 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 		return;
 	}
 	is = ippp_table[slot];
-	if (++mp->frames > stats->max_queue_len)
+    	if( ++mp->frames > stats->max_queue_len )
 		stats->max_queue_len = mp->frames;
-
+	
 	if (is->debug & 0x8)
 		isdn_ppp_mp_print_recv_pkt(lp->ppp_slot, skb);
 
-	newseq = isdn_ppp_mp_get_seq(is->mpppcfg & SC_IN_SHORT_SEQ,
-				     skb, is->last_link_seqno);
+	newseq = isdn_ppp_mp_get_seq(is->mpppcfg & SC_IN_SHORT_SEQ, 
+						skb, is->last_link_seqno);
+
 
 	/* if this packet seq # is less than last already processed one,
 	 * toss it right away, but check for sequence start case first 
 	 */
-	if (mp->seq > MP_LONGSEQ_MAX && (newseq & MP_LONGSEQ_MAXBIT)) {
+	if( mp->seq > MP_LONGSEQ_MAX && (newseq & MP_LONGSEQ_MAXBIT) ) {
 		mp->seq = newseq;	/* the first packet: required for
 					 * rfc1990 non-compliant clients --
 					 * prevents constant packet toss */
@@ -1638,7 +1636,7 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 		spin_unlock_irqrestore(&mp->lock, flags);
 		return;
 	}
-
+	
 	/* find the minimum received sequence number over all links */
 	is->last_link_seqno = minseq = newseq;
 	for (lpq = net_dev->queue;;) {
@@ -1659,31 +1657,22 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 					 * packets */
 	newfrag = skb;
 
-	/* Insert new fragment into the proper sequence slot.  */
-	skb_queue_walk(&mp->frags, frag) {
-		if (MP_SEQ(frag) == newseq) {
-			isdn_ppp_mp_free_skb(mp, newfrag);
-			newfrag = NULL;
-			break;
-		}
-		if (MP_LT(newseq, MP_SEQ(frag))) {
-			__skb_queue_before(&mp->frags, frag, newfrag);
-			newfrag = NULL;
-			break;
-		}
-	}
-	if (newfrag)
-		__skb_queue_tail(&mp->frags, newfrag);
+  	/* if this new fragment is before the first one, then enqueue it now. */
+  	if ((frag = mp->frags) == NULL || MP_LT(newseq, MP_SEQ(frag))) {
+		newfrag->next = frag;
+    		mp->frags = frag = newfrag;
+    		newfrag = NULL;
+  	}
 
-	frag = skb_peek(&mp->frags);
-	start = ((MP_FLAGS(frag) & MP_BEGIN_FRAG) &&
-		 (MP_SEQ(frag) == mp->seq)) ? frag : NULL;
-	if (!start)
-		goto check_overflow;
+  	start = MP_FLAGS(frag) & MP_BEGIN_FRAG &&
+				MP_SEQ(frag) == mp->seq ? frag : NULL;
 
-	/* main fragment traversing loop
+	/* 
+	 * main fragment traversing loop
 	 *
 	 * try to accomplish several tasks:
+	 * - insert new fragment into the proper sequence slot (once that's done
+	 *   newfrag will be set to NULL)
 	 * - reassemble any complete fragment sequence (non-null 'start'
 	 *   indicates there is a continguous sequence present)
 	 * - discard any incomplete sequences that are below minseq -- due
@@ -1692,46 +1681,71 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 	 *   come to complete such sequence and it should be discarded
 	 *
 	 * loop completes when we accomplished the following tasks:
+	 * - new fragment is inserted in the proper sequence ('newfrag' is 
+	 *   set to NULL)
 	 * - we hit a gap in the sequence, so no reassembly/processing is 
 	 *   possible ('start' would be set to NULL)
 	 *
 	 * algorithm for this code is derived from code in the book
 	 * 'PPP Design And Debugging' by James Carlson (Addison-Wesley)
 	 */
-	skb_queue_walk_safe(&mp->frags, frag, nextf) {
-		thisseq = MP_SEQ(frag);
-
-		/* check for misplaced start */
-		if (start != frag && (MP_FLAGS(frag) & MP_BEGIN_FRAG)) {
-			printk(KERN_WARNING"isdn_mppp(seq %d): new "
-			       "BEGIN flag with no prior END", thisseq);
-			stats->seqerrs++;
-			stats->frame_drops++;
-			isdn_ppp_mp_discard(mp, start, frag);
-			start = frag;
-		} else if (MP_LE(thisseq, minseq)) {		
-			if (MP_FLAGS(frag) & MP_BEGIN_FRAG)
+  	while (start != NULL || newfrag != NULL) {
+
+    		thisseq = MP_SEQ(frag);
+    		nextf = frag->next;
+
+    		/* drop any duplicate fragments */
+    		if (newfrag != NULL && thisseq == newseq) {
+      			isdn_ppp_mp_free_skb(mp, newfrag);
+      			newfrag = NULL;
+    		}
+
+    		/* insert new fragment before next element if possible. */
+    		if (newfrag != NULL && (nextf == NULL || 
+						MP_LT(newseq, MP_SEQ(nextf)))) {
+      			newfrag->next = nextf;
+      			frag->next = nextf = newfrag;
+      			newfrag = NULL;
+    		}
+
+    		if (start != NULL) {
+	    		/* check for misplaced start */
+      			if (start != frag && (MP_FLAGS(frag) & MP_BEGIN_FRAG)) {
+				printk(KERN_WARNING"isdn_mppp(seq %d): new "
+				      "BEGIN flag with no prior END", thisseq);
+				stats->seqerrs++;
+				stats->frame_drops++;
+				start = isdn_ppp_mp_discard(mp, start,frag);
+				nextf = frag->next;
+      			}
+    		} else if (MP_LE(thisseq, minseq)) {		
+      			if (MP_FLAGS(frag) & MP_BEGIN_FRAG)
 				start = frag;
-			else {
+      			else {
 				if (MP_FLAGS(frag) & MP_END_FRAG)
-					stats->frame_drops++;
-				__skb_unlink(skb, &mp->frags);
+	  				stats->frame_drops++;
+				if( mp->frags == frag )
+					mp->frags = nextf;	
 				isdn_ppp_mp_free_skb(mp, frag);
+				frag = nextf;
 				continue;
-			}
+      			}
 		}
-
-		/* if we have end fragment, then we have full reassembly
-		 * sequence -- reassemble and process packet now
+		
+		/* if start is non-null and we have end fragment, then
+		 * we have full reassembly sequence -- reassemble 
+		 * and process packet now
 		 */
-		if (MP_FLAGS(frag) & MP_END_FRAG) {
-			minseq = mp->seq = (thisseq+1) & MP_LONGSEQ_MASK;
-			/* Reassemble the packet then dispatch it */
-			isdn_ppp_mp_reassembly(net_dev, lp, start, frag, thisseq);
+    		if (start != NULL && (MP_FLAGS(frag) & MP_END_FRAG)) {
+      			minseq = mp->seq = (thisseq+1) & MP_LONGSEQ_MASK;
+      			/* Reassemble the packet then dispatch it */
+			isdn_ppp_mp_reassembly(net_dev, lp, start, nextf);
+      
+      			start = NULL;
+      			frag = NULL;
 
-			start = NULL;
-			frag = NULL;
-		}
+      			mp->frags = nextf;
+    		}
 
 		/* check if need to update start pointer: if we just
 		 * reassembled the packet and sequence is contiguous
@@ -1742,25 +1756,26 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 		 * below low watermark and set start to the next frag or
 		 * clear start ptr.
 		 */ 
-		if (nextf != (struct sk_buff *)&mp->frags && 
+    		if (nextf != NULL && 
 		    ((thisseq+1) & MP_LONGSEQ_MASK) == MP_SEQ(nextf)) {
-			/* if we just reassembled and the next one is here, 
-			 * then start another reassembly.
-			 */
-			if (frag == NULL) {
+      			/* if we just reassembled and the next one is here, 
+			 * then start another reassembly. */
+
+      			if (frag == NULL) {
 				if (MP_FLAGS(nextf) & MP_BEGIN_FRAG)
-					start = nextf;
-				else {
-					printk(KERN_WARNING"isdn_mppp(seq %d):"
-					       " END flag with no following "
-					       "BEGIN", thisseq);
+	  				start = nextf;
+				else
+				{
+	  				printk(KERN_WARNING"isdn_mppp(seq %d):"
+						" END flag with no following "
+						"BEGIN", thisseq);
 					stats->seqerrs++;
 				}
 			}
-		} else {
-			if (nextf != (struct sk_buff *)&mp->frags &&
-			    frag != NULL &&
-			    MP_LT(thisseq, minseq)) {
+
+    		} else {
+			if ( nextf != NULL && frag != NULL &&
+						MP_LT(thisseq, minseq)) {
 				/* we've got a break in the sequence
 				 * and we not at the end yet
 				 * and we did not just reassembled
@@ -1769,39 +1784,41 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 			 	 * discard all the frames below low watermark 
 				 * and start over */
 				stats->frame_drops++;
-				isdn_ppp_mp_discard(mp, start, nextf);
+				mp->frags = isdn_ppp_mp_discard(mp,start,nextf);
 			}
 			/* break in the sequence, no reassembly */
-			start = NULL;
-		}
-		if (!start)
-			break;
-	}
-
-check_overflow:
+      			start = NULL;
+    		}
+	  			
+    		frag = nextf;
+  	}	/* while -- main loop */
+	
+  	if (mp->frags == NULL)
+    		mp->frags = frag;
+		
 	/* rather straighforward way to deal with (not very) possible 
-	 * queue overflow
-	 */
+	 * queue overflow */
 	if (mp->frames > MP_MAX_QUEUE_LEN) {
 		stats->overflows++;
-		skb_queue_walk_safe(&mp->frags, frag, nextf) {
-			if (mp->frames <= MP_MAX_QUEUE_LEN)
-				break;
-			__skb_unlink(frag, &mp->frags);
-			isdn_ppp_mp_free_skb(mp, frag);
+		while (mp->frames > MP_MAX_QUEUE_LEN) {
+			frag = mp->frags->next;
+			isdn_ppp_mp_free_skb(mp, mp->frags);
+			mp->frags = frag;
 		}
 	}
 	spin_unlock_irqrestore(&mp->lock, flags);
 }
 
-static void isdn_ppp_mp_cleanup(isdn_net_local *lp)
+static void isdn_ppp_mp_cleanup( isdn_net_local * lp )
 {
-	struct sk_buff *skb, *tmp;
-
-	skb_queue_walk_safe(&lp->netdev->pb->frags, skb, tmp) {
-		__skb_unlink(skb, &lp->netdev->pb->frags);
-		isdn_ppp_mp_free_skb(lp->netdev->pb, skb);
-	}
+	struct sk_buff * frag = lp->netdev->pb->frags;
+	struct sk_buff * nextfrag;
+    	while( frag ) {
+		nextfrag = frag->next;
+		isdn_ppp_mp_free_skb(lp->netdev->pb, frag);
+		frag = nextfrag;
+	}
+	lp->netdev->pb->frags = NULL;
 }
 
 static u32 isdn_ppp_mp_get_seq( int short_seq, 
@@ -1838,115 +1855,72 @@ static u32 isdn_ppp_mp_get_seq( int short_seq,
 	return seq;
 }
 
-static void isdn_ppp_mp_discard(ippp_bundle *mp, struct sk_buff *from,
-				struct sk_buff *to)
+struct sk_buff * isdn_ppp_mp_discard( ippp_bundle * mp,
+			struct sk_buff * from, struct sk_buff * to )
 {
-	if (from) {
-		struct sk_buff *skb, *tmp;
-		int freeing = 0;
-
-		skb_queue_walk_safe(&mp->frags, skb, tmp) {
-			if (skb == to)
-				break;
-			if (skb == from)
-				freeing = 1;
-			if (!freeing)
-				continue;
-			__skb_unlink(skb, &mp->frags);
-			isdn_ppp_mp_free_skb(mp, skb);
+	if( from )
+		while (from != to) {
+	  		struct sk_buff * next = from->next;
+			isdn_ppp_mp_free_skb(mp, from);
+	  		from = next;
 		}
-	}
-}
-
-static unsigned int calc_tot_len(struct sk_buff_head *queue,
-				 struct sk_buff *from, struct sk_buff *to)
-{
-	unsigned int tot_len = 0;
-	struct sk_buff *skb;
-	int found_start = 0;
-
-	skb_queue_walk(queue, skb) {
-		if (skb == from)
-			found_start = 1;
-		if (!found_start)
-			continue;
-		tot_len += skb->len - MP_HEADER_LEN;
-		if (skb == to)
-			break;
-	}
-	return tot_len;
+	return from;
 }
 
-/* Reassemble packet using fragments in the reassembly queue from
- * 'from' until 'to', inclusive.
- */
-static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp,
-				   struct sk_buff *from, struct sk_buff *to,
-				   u32 lastseq)
+void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
+				struct sk_buff * from, struct sk_buff * to )
 {
-	ippp_bundle *mp = net_dev->pb;
-	unsigned int tot_len;
-	struct sk_buff *skb;
+	ippp_bundle * mp = net_dev->pb;
 	int proto;
+	struct sk_buff * skb;
+	unsigned int tot_len;
 
 	if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) {
 		printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n",
 			__func__, lp->ppp_slot);
 		return;
 	}
-
-	tot_len = calc_tot_len(&mp->frags, from, to);
-
-	if (MP_FLAGS(from) == (MP_BEGIN_FRAG | MP_END_FRAG)) {
-		if (ippp_table[lp->ppp_slot]->debug & 0x40)
+	if( MP_FLAGS(from) == (MP_BEGIN_FRAG | MP_END_FRAG) ) {
+		if( ippp_table[lp->ppp_slot]->debug & 0x40 )
 			printk(KERN_DEBUG "isdn_mppp: reassembly: frame %d, "
-			       "len %d\n", MP_SEQ(from), from->len);
+					"len %d\n", MP_SEQ(from), from->len );
 		skb = from;
 		skb_pull(skb, MP_HEADER_LEN);
-		__skb_unlink(skb, &mp->frags);
 		mp->frames--;	
 	} else {
-		struct sk_buff *walk, *tmp;
-		int found_start = 0;
+		struct sk_buff * frag;
+		int n;
 
-		if (ippp_table[lp->ppp_slot]->debug & 0x40)
-			printk(KERN_DEBUG"isdn_mppp: reassembling frames %d "
-			       "to %d, len %d\n", MP_SEQ(from), lastseq,
-			       tot_len);
+		for(tot_len=n=0, frag=from; frag != to; frag=frag->next, n++)
+			tot_len += frag->len - MP_HEADER_LEN;
 
-		skb = dev_alloc_skb(tot_len);
-		if (!skb)
+		if( ippp_table[lp->ppp_slot]->debug & 0x40 )
+			printk(KERN_DEBUG"isdn_mppp: reassembling frames %d "
+				"to %d, len %d\n", MP_SEQ(from), 
+				(MP_SEQ(from)+n-1) & MP_LONGSEQ_MASK, tot_len );
+		if( (skb = dev_alloc_skb(tot_len)) == NULL ) {
 			printk(KERN_ERR "isdn_mppp: cannot allocate sk buff "
-			       "of size %d\n", tot_len);
-
-		found_start = 0;
-		skb_queue_walk_safe(&mp->frags, walk, tmp) {
-			if (walk == from)
-				found_start = 1;
-			if (!found_start)
-				continue;
+					"of size %d\n", tot_len);
+			isdn_ppp_mp_discard(mp, from, to);
+			return;
+		}
 
-			if (skb) {
-				unsigned int len = walk->len - MP_HEADER_LEN;
-				skb_copy_from_linear_data_offset(walk, MP_HEADER_LEN,
-								 skb_put(skb, len),
-								 len);
-			}
-			__skb_unlink(walk, &mp->frags);
-			isdn_ppp_mp_free_skb(mp, walk);
+		while( from != to ) {
+			unsigned int len = from->len - MP_HEADER_LEN;
 
-			if (walk == to)
-				break;
+			skb_copy_from_linear_data_offset(from, MP_HEADER_LEN,
+							 skb_put(skb,len),
+							 len);
+			frag = from->next;
+			isdn_ppp_mp_free_skb(mp, from);
+			from = frag; 
 		}
 	}
-	if (!skb)
-		return;
-
    	proto = isdn_ppp_strip_proto(skb);
 	isdn_ppp_push_higher(net_dev, lp, skb, proto);
 }
 
-static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb)
+static void isdn_ppp_mp_free_skb(ippp_bundle * mp, struct sk_buff * skb)
 {
 	dev_kfree_skb(skb);
 	mp->frames--;
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index 4c218ee7587a..8687a7dc0632 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -157,7 +157,7 @@ typedef struct {
 
 typedef struct {
   int mp_mrru;                        /* unused                             */
-  struct sk_buff_head frags;	/* fragments sl list */
+  struct sk_buff * frags;	/* fragments sl list -- use skb->next */
   long frames;			/* number of frames in the frame list */
   unsigned int seq;		/* last processed packet seq #: any packets
   				 * with smaller seq # will be dropped
-- 
cgit v1.2.3


From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 16 Nov 2009 12:45:14 +0100
Subject: perf_event: Optimize perf_output_lock()

The purpose of perf_output_{un,}lock() is to:

 1) avoid publishing incomplete data
    [ possible when publishing a head that is ahead of an entry
      that is still being written ]

 2) guarantee fwd progress
    [ a simple refcount on pending writers doesn't need to drop to
      0, making it so would end up implementing something like forced
      quiecent states of RCU ]

To satisfy the above without undue complexity it serializes
between CPUs, this means that a pending writer can only be the
same cpu in a nested context, and since (under normal operation)
a cpu always makes progress we're good -- if the head is only
published when the bottom  most writer completes.

Now we don't need to disable IRQs in order to serialize between
CPUs, disabling preemption ought to be sufficient, esp since we
already deal with nesting due to NMIs.

This avoids potentially expensive (and needless) local IRQ
disable/enable ops.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1258373161.26714.254.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 -
 kernel/perf_event.c        | 21 +++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index df4e73e33774..7f87563c8485 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -714,7 +714,6 @@ struct perf_output_handle {
 	int				nmi;
 	int				sample;
 	int				locked;
-	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6f4ed3b4cd73..3256e36ad251 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2674,20 +2674,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 static void perf_output_lock(struct perf_output_handle *handle)
 {
 	struct perf_mmap_data *data = handle->data;
-	int cpu;
+	int cur, cpu = get_cpu();
 
 	handle->locked = 0;
 
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
-
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
+	for (;;) {
+		cur = atomic_cmpxchg(&data->lock, -1, cpu);
+		if (cur == -1) {
+			handle->locked = 1;
+			break;
+		}
+		if (cur == cpu)
+			break;
 
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
 		cpu_relax();
-
-	handle->locked = 1;
+	}
 }
 
 static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2733,7 +2734,7 @@ again:
 	if (atomic_xchg(&data->wakeup, 0))
 		perf_output_wakeup(handle);
 out:
-	local_irq_restore(handle->flags);
+	put_cpu();
 }
 
 void perf_output_copy(struct perf_output_handle *handle,
-- 
cgit v1.2.3


From 0696b711e4be45fa104c12329f617beb29c03f78 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Tue, 17 Nov 2009 13:49:50 +0800
Subject: timekeeping: Fix clock_gettime vsyscall time warp

Since commit 0a544198 "timekeeping: Move NTP adjusted clock multiplier
to struct timekeeper" the clock multiplier of vsyscall is updated with
the unmodified clock multiplier of the clock source and not with the
NTP adjusted multiplier of the timekeeper.

This causes user space observerable time warps:
new CLOCK-warp maximum: 120 nsecs,  00000025c337c537 -> 00000025c337c4bf

Add a new argument "mult" to update_vsyscall() and hand in the
timekeeping internal NTP adjusted multiplier.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Cc: "Zhang Yanmin" <yanmin_zhang@linux.intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Tony Luck <tony.luck@intel.com>
LKML-Reference: <1258436990.17765.83.camel@minggr.sh.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c       | 4 ++--
 arch/powerpc/kernel/time.c    | 5 +++--
 arch/s390/kernel/time.c       | 3 ++-
 arch/x86/kernel/vsyscall_64.c | 5 +++--
 include/linux/clocksource.h   | 6 ++++--
 kernel/time/timekeeping.c     | 6 +++---
 6 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 4990495d7531..a35c661e5e89 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -473,7 +473,7 @@ void update_vsyscall_tz(void)
 {
 }
 
-void update_vsyscall(struct timespec *wall, struct clocksource *c)
+void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult)
 {
         unsigned long flags;
 
@@ -481,7 +481,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c)
 
         /* copy fsyscall clock data */
         fsyscall_gtod_data.clk_mask = c->mask;
-        fsyscall_gtod_data.clk_mult = c->mult;
+        fsyscall_gtod_data.clk_mult = mult;
         fsyscall_gtod_data.clk_shift = c->shift;
         fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio;
         fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a136a11c490d..39713312fbc7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -828,7 +828,8 @@ static cycle_t timebase_read(struct clocksource *cs)
 	return (cycle_t)get_tb();
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+		     u32 mult)
 {
 	u64 t2x, stamp_xsec;
 
@@ -841,7 +842,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 
 	/* XXX this assumes clock->shift == 22 */
 	/* 4611686018 ~= 2^(20+64-22) / 1e9 */
-	t2x = (u64) clock->mult * 4611686018ULL;
+	t2x = (u64) mult * 4611686018ULL;
 	stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
 	do_div(stamp_xsec, 1000000000);
 	stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 34162a0b2caa..68e1ecf5ebab 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void)
 	return &clocksource_tod;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+		     u32 mult)
 {
 	if (clock != &clocksource_tod)
 		return;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8cb4974ff599..62f39d79b775 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+		     u32 mult)
 {
 	unsigned long flags;
 
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 	vsyscall_gtod_data.clock.vread = clock->vread;
 	vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
 	vsyscall_gtod_data.clock.mask = clock->mask;
-	vsyscall_gtod_data.clock.mult = clock->mult;
+	vsyscall_gtod_data.clock.mult = mult;
 	vsyscall_gtod_data.clock.shift = clock->shift;
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 83d2fbd81b93..95e4995d9987 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -280,10 +280,12 @@ extern struct clocksource * __init __weak clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
-extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
+extern void
+update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult);
 extern void update_vsyscall_tz(void);
 #else
-static inline void update_vsyscall(struct timespec *ts, struct clocksource *c)
+static inline void
+update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult)
 {
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c3a4e2907eaa..2a6d3e3e2c3e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -177,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond)
 {
 	xtime.tv_sec += leapsecond;
 	wall_to_monotonic.tv_sec -= leapsecond;
-	update_vsyscall(&xtime, timekeeper.clock);
+	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
 
 #ifdef CONFIG_GENERIC_TIME
@@ -337,7 +337,7 @@ int do_settimeofday(struct timespec *tv)
 	timekeeper.ntp_error = 0;
 	ntp_clear();
 
-	update_vsyscall(&xtime, timekeeper.clock);
+	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -811,7 +811,7 @@ void update_wall_time(void)
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
-	update_vsyscall(&xtime, timekeeper.clock);
+	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
 
 /**
-- 
cgit v1.2.3


From c85e9d7739fc8d879c4293ea020760926d6f87cd Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 17 Nov 2009 10:16:32 -0500
Subject: znet: fix build failure from i82593.h relocation

znet was including "wireless/i82593.h" (which is a bit wierd), and I
missed that when I relocated i82593.h to drivers/staging/wavelan.  Since
I don't have ISA turned-on in my normal .config, I didn't see the build
failures -- mea culpa!

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/znet.c                     |   3 +-
 drivers/staging/wavelan/i82593.h       | 229 ---------------------------------
 drivers/staging/wavelan/wavelan_cs.p.h |   2 +-
 include/linux/i82593.h                 | 229 +++++++++++++++++++++++++++++++++
 4 files changed, 231 insertions(+), 232 deletions(-)
 delete mode 100644 drivers/staging/wavelan/i82593.h
 create mode 100644 include/linux/i82593.h

(limited to 'include/linux')

diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index b42347333750..443c4eee28c1 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -103,8 +103,7 @@
 #include <asm/io.h>
 #include <asm/dma.h>
 
-/* This include could be elsewhere, since it is not wireless specific */
-#include "wireless/i82593.h"
+#include <linux/i82593.h>
 
 static char version[] __initdata = "znet.c:v1.02 9/23/94 becker@scyld.com\n";
 
diff --git a/drivers/staging/wavelan/i82593.h b/drivers/staging/wavelan/i82593.h
deleted file mode 100644
index afac5c7a323d..000000000000
--- a/drivers/staging/wavelan/i82593.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Definitions for Intel 82593 CSMA/CD Core LAN Controller
- * The definitions are taken from the 1992 users manual with Intel
- * order number 297125-001.
- *
- * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp
- *
- * Copyright 1994, Anders Klemets <klemets@it.kth.se>
- *
- * HISTORY
- * i82593.h,v
- * Revision 1.4  2005/11/4  09:15:00  baroniunas
- * Modified copyright with permission of author as follows:
- *
- *   "If I82539.H is the only file with my copyright statement
- *    that is included in the Source Forge project, then you have
- *    my approval to change the copyright statement to be a GPL
- *    license, in the way you proposed on October 10."
- *
- * Revision 1.1  1996/07/17 15:23:12  root
- * Initial revision
- *
- * Revision 1.3  1995/04/05  15:13:58  adj
- * Initial alpha release
- *
- * Revision 1.2  1994/06/16  23:57:31  klemets
- * Mirrored all the fields in the configuration block.
- *
- * Revision 1.1  1994/06/02  20:25:34  klemets
- * Initial revision
- *
- *
- */
-#ifndef	_I82593_H
-#define	_I82593_H
-
-/* Intel 82593 CSMA/CD Core LAN Controller */
-
-/* Port 0 Command Register definitions */
-
-/* Execution operations */
-#define OP0_NOP			0	/* CHNL = 0 */
-#define OP0_SWIT_TO_PORT_1	0	/* CHNL = 1 */
-#define OP0_IA_SETUP		1
-#define OP0_CONFIGURE		2
-#define OP0_MC_SETUP		3
-#define OP0_TRANSMIT		4
-#define OP0_TDR			5
-#define OP0_DUMP		6
-#define OP0_DIAGNOSE		7
-#define OP0_TRANSMIT_NO_CRC	9
-#define OP0_RETRANSMIT		12
-#define OP0_ABORT		13
-/* Reception operations */
-#define OP0_RCV_ENABLE		8
-#define OP0_RCV_DISABLE		10
-#define OP0_STOP_RCV		11
-/* Status pointer control operations */
-#define OP0_FIX_PTR		15	/* CHNL = 1 */
-#define OP0_RLS_PTR		15	/* CHNL = 0 */
-#define OP0_RESET		14
-
-#define CR0_CHNL		(1 << 4)	/* 0=Channel 0, 1=Channel 1 */
-#define CR0_STATUS_0		0x00
-#define CR0_STATUS_1		0x20
-#define CR0_STATUS_2		0x40
-#define CR0_STATUS_3		0x60
-#define CR0_INT_ACK		(1 << 7)	/* 0=No ack, 1=acknowledge */
-
-/* Port 0 Status Register definitions */
-
-#define SR0_NO_RESULT		0		/* dummy */
-#define SR0_EVENT_MASK		0x0f
-#define SR0_IA_SETUP_DONE	1
-#define SR0_CONFIGURE_DONE	2
-#define SR0_MC_SETUP_DONE	3
-#define SR0_TRANSMIT_DONE	4
-#define SR0_TDR_DONE		5
-#define SR0_DUMP_DONE		6
-#define SR0_DIAGNOSE_PASSED	7
-#define SR0_TRANSMIT_NO_CRC_DONE 9
-#define SR0_RETRANSMIT_DONE	12
-#define SR0_EXECUTION_ABORTED	13
-#define SR0_END_OF_FRAME	8
-#define SR0_RECEPTION_ABORTED	10
-#define SR0_DIAGNOSE_FAILED	15
-#define SR0_STOP_REG_HIT	11
-
-#define SR0_CHNL		(1 << 4)
-#define SR0_EXECUTION		(1 << 5)
-#define SR0_RECEPTION		(1 << 6)
-#define SR0_INTERRUPT		(1 << 7)
-#define SR0_BOTH_RX_TX		(SR0_EXECUTION | SR0_RECEPTION)
-
-#define SR3_EXEC_STATE_MASK	0x03
-#define SR3_EXEC_IDLE		0
-#define SR3_TX_ABORT_IN_PROGRESS 1
-#define SR3_EXEC_ACTIVE		2
-#define SR3_ABORT_IN_PROGRESS	3
-#define SR3_EXEC_CHNL		(1 << 2)
-#define SR3_STP_ON_NO_RSRC	(1 << 3)
-#define SR3_RCVING_NO_RSRC	(1 << 4)
-#define SR3_RCV_STATE_MASK	0x60
-#define SR3_RCV_IDLE		0x00
-#define SR3_RCV_READY		0x20
-#define SR3_RCV_ACTIVE		0x40
-#define SR3_RCV_STOP_IN_PROG	0x60
-#define SR3_RCV_CHNL		(1 << 7)
-
-/* Port 1 Command Register definitions */
-
-#define OP1_NOP			0
-#define OP1_SWIT_TO_PORT_0	1
-#define OP1_INT_DISABLE		2
-#define OP1_INT_ENABLE		3
-#define OP1_SET_TS		5
-#define OP1_RST_TS		7
-#define OP1_POWER_DOWN		8
-#define OP1_RESET_RING_MNGMT	11
-#define OP1_RESET		14
-#define OP1_SEL_RST		15
-
-#define CR1_STATUS_4		0x00
-#define CR1_STATUS_5		0x20
-#define CR1_STATUS_6		0x40
-#define CR1_STOP_REG_UPDATE	(1 << 7)
-
-/* Receive frame status bits */
-
-#define	RX_RCLD			(1 << 0)
-#define RX_IA_MATCH		(1 << 1)
-#define	RX_NO_AD_MATCH		(1 << 2)
-#define RX_NO_SFD		(1 << 3)
-#define RX_SRT_FRM		(1 << 7)
-#define RX_OVRRUN		(1 << 8)
-#define RX_ALG_ERR		(1 << 10)
-#define RX_CRC_ERR		(1 << 11)
-#define RX_LEN_ERR		(1 << 12)
-#define RX_RCV_OK		(1 << 13)
-#define RX_TYP_LEN		(1 << 15)
-
-/* Transmit status bits */
-
-#define TX_NCOL_MASK		0x0f
-#define TX_FRTL			(1 << 4)
-#define TX_MAX_COL		(1 << 5)
-#define TX_HRT_BEAT		(1 << 6)
-#define TX_DEFER		(1 << 7)
-#define TX_UND_RUN		(1 << 8)
-#define TX_LOST_CTS		(1 << 9)
-#define TX_LOST_CRS		(1 << 10)
-#define TX_LTCOL		(1 << 11)
-#define TX_OK			(1 << 13)
-#define TX_COLL			(1 << 15)
-
-struct i82593_conf_block {
-  u_char fifo_limit : 4,
-  	 forgnesi   : 1,
-  	 fifo_32    : 1,
-  	 d6mod      : 1,
-  	 throttle_enb : 1;
-  u_char throttle   : 6,
-	 cntrxint   : 1,
-	 contin	    : 1;
-  u_char addr_len   : 3,
-  	 acloc 	    : 1,
- 	 preamb_len : 2,
-  	 loopback   : 2;
-  u_char lin_prio   : 3,
-	 tbofstop   : 1,
-	 exp_prio   : 3,
-	 bof_met    : 1;
-  u_char	    : 4,
-	 ifrm_spc   : 4;
-  u_char	    : 5,
-	 slottim_low : 3;
-  u_char slottim_hi : 3,
-		    : 1,
-	 max_retr   : 4;
-  u_char prmisc     : 1,
-	 bc_dis     : 1,
-  		    : 1,
-	 crs_1	    : 1,
-	 nocrc_ins  : 1,
-	 crc_1632   : 1,
-  	 	    : 1,
-  	 crs_cdt    : 1;
-  u_char cs_filter  : 3,
-	 crs_src    : 1,
-	 cd_filter  : 3,
-		    : 1;
-  u_char	    : 2,
-  	 min_fr_len : 6;
-  u_char lng_typ    : 1,
-	 lng_fld    : 1,
-	 rxcrc_xf   : 1,
-	 artx	    : 1,
-	 sarec	    : 1,
-	 tx_jabber  : 1,	/* why is this called max_len in the manual? */
-	 hash_1	    : 1,
-  	 lbpkpol    : 1;
-  u_char	    : 6,
-  	 fdx	    : 1,
-  	  	    : 1;
-  u_char dummy_6    : 6,	/* supposed to be ones */
-  	 mult_ia    : 1,
-  	 dis_bof    : 1;
-  u_char dummy_1    : 1,	/* supposed to be one */
-	 tx_ifs_retrig : 2,
-	 mc_all     : 1,
-	 rcv_mon    : 2,
-	 frag_acpt  : 1,
-  	 tstrttrs   : 1;
-  u_char fretx	    : 1,
-	 runt_eop   : 1,
-	 hw_sw_pin  : 1,
-	 big_endn   : 1,
-	 syncrqs    : 1,
-	 sttlen     : 1,
-	 tx_eop     : 1,
-  	 rx_eop	    : 1;
-  u_char rbuf_size  : 5,
-	 rcvstop    : 1,
-  	 	    : 2;
-};
-
-#define I82593_MAX_MULTICAST_ADDRESSES	128	/* Hardware hashed filter */
-
-#endif /* _I82593_H */
diff --git a/drivers/staging/wavelan/wavelan_cs.p.h b/drivers/staging/wavelan/wavelan_cs.p.h
index 81d91531c4f9..8fbfaa8a5a67 100644
--- a/drivers/staging/wavelan/wavelan_cs.p.h
+++ b/drivers/staging/wavelan/wavelan_cs.p.h
@@ -446,7 +446,7 @@
 #include <pcmcia/ds.h>
 
 /* Wavelan declarations */
-#include "i82593.h"	/* Definitions for the Intel chip */
+#include <linux/i82593.h>	/* Definitions for the Intel chip */
 
 #include "wavelan_cs.h"	/* Others bits of the hardware */
 
diff --git a/include/linux/i82593.h b/include/linux/i82593.h
new file mode 100644
index 000000000000..afac5c7a323d
--- /dev/null
+++ b/include/linux/i82593.h
@@ -0,0 +1,229 @@
+/*
+ * Definitions for Intel 82593 CSMA/CD Core LAN Controller
+ * The definitions are taken from the 1992 users manual with Intel
+ * order number 297125-001.
+ *
+ * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp
+ *
+ * Copyright 1994, Anders Klemets <klemets@it.kth.se>
+ *
+ * HISTORY
+ * i82593.h,v
+ * Revision 1.4  2005/11/4  09:15:00  baroniunas
+ * Modified copyright with permission of author as follows:
+ *
+ *   "If I82539.H is the only file with my copyright statement
+ *    that is included in the Source Forge project, then you have
+ *    my approval to change the copyright statement to be a GPL
+ *    license, in the way you proposed on October 10."
+ *
+ * Revision 1.1  1996/07/17 15:23:12  root
+ * Initial revision
+ *
+ * Revision 1.3  1995/04/05  15:13:58  adj
+ * Initial alpha release
+ *
+ * Revision 1.2  1994/06/16  23:57:31  klemets
+ * Mirrored all the fields in the configuration block.
+ *
+ * Revision 1.1  1994/06/02  20:25:34  klemets
+ * Initial revision
+ *
+ *
+ */
+#ifndef	_I82593_H
+#define	_I82593_H
+
+/* Intel 82593 CSMA/CD Core LAN Controller */
+
+/* Port 0 Command Register definitions */
+
+/* Execution operations */
+#define OP0_NOP			0	/* CHNL = 0 */
+#define OP0_SWIT_TO_PORT_1	0	/* CHNL = 1 */
+#define OP0_IA_SETUP		1
+#define OP0_CONFIGURE		2
+#define OP0_MC_SETUP		3
+#define OP0_TRANSMIT		4
+#define OP0_TDR			5
+#define OP0_DUMP		6
+#define OP0_DIAGNOSE		7
+#define OP0_TRANSMIT_NO_CRC	9
+#define OP0_RETRANSMIT		12
+#define OP0_ABORT		13
+/* Reception operations */
+#define OP0_RCV_ENABLE		8
+#define OP0_RCV_DISABLE		10
+#define OP0_STOP_RCV		11
+/* Status pointer control operations */
+#define OP0_FIX_PTR		15	/* CHNL = 1 */
+#define OP0_RLS_PTR		15	/* CHNL = 0 */
+#define OP0_RESET		14
+
+#define CR0_CHNL		(1 << 4)	/* 0=Channel 0, 1=Channel 1 */
+#define CR0_STATUS_0		0x00
+#define CR0_STATUS_1		0x20
+#define CR0_STATUS_2		0x40
+#define CR0_STATUS_3		0x60
+#define CR0_INT_ACK		(1 << 7)	/* 0=No ack, 1=acknowledge */
+
+/* Port 0 Status Register definitions */
+
+#define SR0_NO_RESULT		0		/* dummy */
+#define SR0_EVENT_MASK		0x0f
+#define SR0_IA_SETUP_DONE	1
+#define SR0_CONFIGURE_DONE	2
+#define SR0_MC_SETUP_DONE	3
+#define SR0_TRANSMIT_DONE	4
+#define SR0_TDR_DONE		5
+#define SR0_DUMP_DONE		6
+#define SR0_DIAGNOSE_PASSED	7
+#define SR0_TRANSMIT_NO_CRC_DONE 9
+#define SR0_RETRANSMIT_DONE	12
+#define SR0_EXECUTION_ABORTED	13
+#define SR0_END_OF_FRAME	8
+#define SR0_RECEPTION_ABORTED	10
+#define SR0_DIAGNOSE_FAILED	15
+#define SR0_STOP_REG_HIT	11
+
+#define SR0_CHNL		(1 << 4)
+#define SR0_EXECUTION		(1 << 5)
+#define SR0_RECEPTION		(1 << 6)
+#define SR0_INTERRUPT		(1 << 7)
+#define SR0_BOTH_RX_TX		(SR0_EXECUTION | SR0_RECEPTION)
+
+#define SR3_EXEC_STATE_MASK	0x03
+#define SR3_EXEC_IDLE		0
+#define SR3_TX_ABORT_IN_PROGRESS 1
+#define SR3_EXEC_ACTIVE		2
+#define SR3_ABORT_IN_PROGRESS	3
+#define SR3_EXEC_CHNL		(1 << 2)
+#define SR3_STP_ON_NO_RSRC	(1 << 3)
+#define SR3_RCVING_NO_RSRC	(1 << 4)
+#define SR3_RCV_STATE_MASK	0x60
+#define SR3_RCV_IDLE		0x00
+#define SR3_RCV_READY		0x20
+#define SR3_RCV_ACTIVE		0x40
+#define SR3_RCV_STOP_IN_PROG	0x60
+#define SR3_RCV_CHNL		(1 << 7)
+
+/* Port 1 Command Register definitions */
+
+#define OP1_NOP			0
+#define OP1_SWIT_TO_PORT_0	1
+#define OP1_INT_DISABLE		2
+#define OP1_INT_ENABLE		3
+#define OP1_SET_TS		5
+#define OP1_RST_TS		7
+#define OP1_POWER_DOWN		8
+#define OP1_RESET_RING_MNGMT	11
+#define OP1_RESET		14
+#define OP1_SEL_RST		15
+
+#define CR1_STATUS_4		0x00
+#define CR1_STATUS_5		0x20
+#define CR1_STATUS_6		0x40
+#define CR1_STOP_REG_UPDATE	(1 << 7)
+
+/* Receive frame status bits */
+
+#define	RX_RCLD			(1 << 0)
+#define RX_IA_MATCH		(1 << 1)
+#define	RX_NO_AD_MATCH		(1 << 2)
+#define RX_NO_SFD		(1 << 3)
+#define RX_SRT_FRM		(1 << 7)
+#define RX_OVRRUN		(1 << 8)
+#define RX_ALG_ERR		(1 << 10)
+#define RX_CRC_ERR		(1 << 11)
+#define RX_LEN_ERR		(1 << 12)
+#define RX_RCV_OK		(1 << 13)
+#define RX_TYP_LEN		(1 << 15)
+
+/* Transmit status bits */
+
+#define TX_NCOL_MASK		0x0f
+#define TX_FRTL			(1 << 4)
+#define TX_MAX_COL		(1 << 5)
+#define TX_HRT_BEAT		(1 << 6)
+#define TX_DEFER		(1 << 7)
+#define TX_UND_RUN		(1 << 8)
+#define TX_LOST_CTS		(1 << 9)
+#define TX_LOST_CRS		(1 << 10)
+#define TX_LTCOL		(1 << 11)
+#define TX_OK			(1 << 13)
+#define TX_COLL			(1 << 15)
+
+struct i82593_conf_block {
+  u_char fifo_limit : 4,
+  	 forgnesi   : 1,
+  	 fifo_32    : 1,
+  	 d6mod      : 1,
+  	 throttle_enb : 1;
+  u_char throttle   : 6,
+	 cntrxint   : 1,
+	 contin	    : 1;
+  u_char addr_len   : 3,
+  	 acloc 	    : 1,
+ 	 preamb_len : 2,
+  	 loopback   : 2;
+  u_char lin_prio   : 3,
+	 tbofstop   : 1,
+	 exp_prio   : 3,
+	 bof_met    : 1;
+  u_char	    : 4,
+	 ifrm_spc   : 4;
+  u_char	    : 5,
+	 slottim_low : 3;
+  u_char slottim_hi : 3,
+		    : 1,
+	 max_retr   : 4;
+  u_char prmisc     : 1,
+	 bc_dis     : 1,
+  		    : 1,
+	 crs_1	    : 1,
+	 nocrc_ins  : 1,
+	 crc_1632   : 1,
+  	 	    : 1,
+  	 crs_cdt    : 1;
+  u_char cs_filter  : 3,
+	 crs_src    : 1,
+	 cd_filter  : 3,
+		    : 1;
+  u_char	    : 2,
+  	 min_fr_len : 6;
+  u_char lng_typ    : 1,
+	 lng_fld    : 1,
+	 rxcrc_xf   : 1,
+	 artx	    : 1,
+	 sarec	    : 1,
+	 tx_jabber  : 1,	/* why is this called max_len in the manual? */
+	 hash_1	    : 1,
+  	 lbpkpol    : 1;
+  u_char	    : 6,
+  	 fdx	    : 1,
+  	  	    : 1;
+  u_char dummy_6    : 6,	/* supposed to be ones */
+  	 mult_ia    : 1,
+  	 dis_bof    : 1;
+  u_char dummy_1    : 1,	/* supposed to be one */
+	 tx_ifs_retrig : 2,
+	 mc_all     : 1,
+	 rcv_mon    : 2,
+	 frag_acpt  : 1,
+  	 tstrttrs   : 1;
+  u_char fretx	    : 1,
+	 runt_eop   : 1,
+	 hw_sw_pin  : 1,
+	 big_endn   : 1,
+	 syncrqs    : 1,
+	 sttlen     : 1,
+	 tx_eop     : 1,
+  	 rx_eop	    : 1;
+  u_char rbuf_size  : 5,
+	 rcvstop    : 1,
+  	 	    : 2;
+};
+
+#define I82593_MAX_MULTICAST_ADDRESSES	128	/* Hardware hashed filter */
+
+#endif /* _I82593_H */
-- 
cgit v1.2.3


From 6ad696d2cf535772dff659298ec7e7260e344595 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 17 Nov 2009 14:06:22 -0800
Subject: mm: allow memory hotplug and hibernation in the same kernel

Allow memory hotplug and hibernation in the same kernel

Memory hotplug and hibernation were exclusive in Kconfig.  This is
obviously a problem for distribution kernels who want to support both in
the same image.

After some discussions with Rafael and others the only problem is with
parallel memory hotadd or removal while a hibernation operation is in
process.  It was also working for s390 before.

This patch removes the Kconfig level exclusion, and simply makes the
memory add / remove functions grab the pm_mutex to exclude against
hibernation.

Fixes a regression - old kernels didn't exclude memory hotadd and
hibernation.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/suspend.h | 21 +++++++++++++++++++--
 mm/Kconfig              |  5 +----
 mm/memory_hotplug.c     | 21 +++++++++++++++++----
 3 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index cd15df6c63cd..5e781d824e6d 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -301,6 +301,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 #endif /* !CONFIG_PM_SLEEP */
 
+extern struct mutex pm_mutex;
+
 #ifndef CONFIG_HIBERNATION
 static inline void register_nosave_region(unsigned long b, unsigned long e)
 {
@@ -308,8 +310,23 @@ static inline void register_nosave_region(unsigned long b, unsigned long e)
 static inline void register_nosave_region_late(unsigned long b, unsigned long e)
 {
 }
-#endif
 
-extern struct mutex pm_mutex;
+static inline void lock_system_sleep(void) {}
+static inline void unlock_system_sleep(void) {}
+
+#else
+
+/* Let some subsystems like memory hotadd exclude hibernation */
+
+static inline void lock_system_sleep(void)
+{
+	mutex_lock(&pm_mutex);
+}
+
+static inline void unlock_system_sleep(void)
+{
+	mutex_unlock(&pm_mutex);
+}
+#endif
 
 #endif /* _LINUX_SUSPEND_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index fd3386242cf0..44cf6f0a3a6d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,12 +128,9 @@ config SPARSEMEM_VMEMMAP
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
 	depends on SPARSEMEM || X86_64_ACPI_NUMA
-	depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG
+	depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
 	depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
 
-comment "Memory hotplug is currently incompatible with Software Suspend"
-	depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390
-
 config MEMORY_HOTPLUG_SPARSE
 	def_bool y
 	depends on SPARSEMEM && MEMORY_HOTPLUG
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 380aef45c2cf..2047465cd27c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
 #include <linux/migrate.h>
 #include <linux/page-isolation.h>
 #include <linux/pfn.h>
+#include <linux/suspend.h>
 
 #include <asm/tlbflush.h>
 
@@ -485,14 +486,18 @@ int __ref add_memory(int nid, u64 start, u64 size)
 	struct resource *res;
 	int ret;
 
+	lock_system_sleep();
+
 	res = register_memory_resource(start, size);
+	ret = -EEXIST;
 	if (!res)
-		return -EEXIST;
+		goto out;
 
 	if (!node_online(nid)) {
 		pgdat = hotadd_new_pgdat(nid, start);
+		ret = -ENOMEM;
 		if (!pgdat)
-			return -ENOMEM;
+			goto out;
 		new_pgdat = 1;
 	}
 
@@ -515,7 +520,8 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		BUG_ON(ret);
 	}
 
-	return ret;
+	goto out;
+
 error:
 	/* rollback pgdat allocation and others */
 	if (new_pgdat)
@@ -523,6 +529,8 @@ error:
 	if (res)
 		release_memory_resource(res);
 
+out:
+	unlock_system_sleep();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
@@ -759,6 +767,8 @@ int offline_pages(unsigned long start_pfn,
 	if (!test_pages_in_a_zone(start_pfn, end_pfn))
 		return -EINVAL;
 
+	lock_system_sleep();
+
 	zone = page_zone(pfn_to_page(start_pfn));
 	node = zone_to_nid(zone);
 	nr_pages = end_pfn - start_pfn;
@@ -766,7 +776,7 @@ int offline_pages(unsigned long start_pfn,
 	/* set above range as isolated */
 	ret = start_isolate_page_range(start_pfn, end_pfn);
 	if (ret)
-		return ret;
+		goto out;
 
 	arg.start_pfn = start_pfn;
 	arg.nr_pages = nr_pages;
@@ -844,6 +854,7 @@ repeat:
 	writeback_set_ratelimit();
 
 	memory_notify(MEM_OFFLINE, &arg);
+	unlock_system_sleep();
 	return 0;
 
 failed_removal:
@@ -853,6 +864,8 @@ failed_removal:
 	/* pushback to free area */
 	undo_isolate_page_range(start_pfn, end_pfn);
 
+out:
+	unlock_system_sleep();
 	return ret;
 }
 
-- 
cgit v1.2.3


From d83345adf96bc13a5e360f4649a2e68ef968dec0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Nov 2009 03:36:51 +0000
Subject: net: add dev_txq_stats_fold() helper

Some drivers ndo_get_stats() method need to perform txqueue stats folding.

Move folding from dev_get_stats() to a new dev_txq_stats_fold() function

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 48 ++++++++++++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7043f85e643d..c8fa4627de00 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1941,6 +1941,7 @@ extern void		netdev_features_change(struct net_device *dev);
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
 extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
+extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
 extern int		weight_p;
diff --git a/net/core/dev.c b/net/core/dev.c
index d867522290b9..c3e0578d29d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5168,6 +5168,32 @@ void netdev_run_todo(void)
 	}
 }
 
+/**
+ *	dev_txq_stats_fold - fold tx_queues stats
+ *	@dev: device to get statistics from
+ *	@stats: struct net_device_stats to hold results
+ */
+void dev_txq_stats_fold(const struct net_device *dev,
+			struct net_device_stats *stats)
+{
+	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	unsigned int i;
+	struct netdev_queue *txq;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		txq = netdev_get_tx_queue(dev, i);
+		tx_bytes   += txq->tx_bytes;
+		tx_packets += txq->tx_packets;
+		tx_dropped += txq->tx_dropped;
+	}
+	if (tx_bytes || tx_packets || tx_dropped) {
+		stats->tx_bytes   = tx_bytes;
+		stats->tx_packets = tx_packets;
+		stats->tx_dropped = tx_dropped;
+	}
+}
+EXPORT_SYMBOL(dev_txq_stats_fold);
+
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
@@ -5182,25 +5208,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
 
 	if (ops->ndo_get_stats)
 		return ops->ndo_get_stats(dev);
-	else {
-		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-		struct net_device_stats *stats = &dev->stats;
-		unsigned int i;
-		struct netdev_queue *txq;
-
-		for (i = 0; i < dev->num_tx_queues; i++) {
-			txq = netdev_get_tx_queue(dev, i);
-			tx_bytes   += txq->tx_bytes;
-			tx_packets += txq->tx_packets;
-			tx_dropped += txq->tx_dropped;
-		}
-		if (tx_bytes || tx_packets || tx_dropped) {
-			stats->tx_bytes   = tx_bytes;
-			stats->tx_packets = tx_packets;
-			stats->tx_dropped = tx_dropped;
-		}
-		return stats;
-	}
+
+	dev_txq_stats_fold(dev, &dev->stats);
+	return &dev->stats;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
-- 
cgit v1.2.3


From 395264d509aec45149745843d9a737140a1ece16 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Mon, 16 Nov 2009 13:49:35 +0000
Subject: net: introduce NETDEV_UNREGISTER_PERNET

This new event is called once for each unique net namespace in batched
unregister operations (with the argument set to a random device from
that namespace) and once per device in non-batched unregister
operations.

It allows us to factorize some device unregister work such as clearing the
routing cache.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h |  1 +
 net/core/dev.c           | 29 +++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 29714b8441b1..b0c3671d463c 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -202,6 +202,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_BONDING_OLDTYPE  0x000E
 #define NETDEV_BONDING_NEWTYPE  0x000F
 #define NETDEV_POST_INIT	0x0010
+#define NETDEV_UNREGISTER_PERNET 0x0011
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index c3e0578d29d1..e25fe5d9343b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1344,6 +1344,7 @@ rollback:
 				nb->notifier_call(nb, NETDEV_DOWN, dev);
 			}
 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+			nb->notifier_call(nb, NETDEV_UNREGISTER_PERNET, dev);
 		}
 	}
 
@@ -4721,7 +4722,8 @@ static void net_set_todo(struct net_device *dev)
 
 static void rollback_registered_many(struct list_head *head)
 {
-	struct net_device *dev;
+	struct net_device *dev, *aux, *fdev;
+	LIST_HEAD(pernet_list);
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -4779,8 +4781,24 @@ static void rollback_registered_many(struct list_head *head)
 
 	synchronize_net();
 
-	list_for_each_entry(dev, head, unreg_list)
+	list_for_each_entry_safe(dev, aux, head, unreg_list) {
+		int new_net = 1;
+		list_for_each_entry(fdev, &pernet_list, unreg_list) {
+			if (dev_net(dev) == dev_net(fdev)) {
+				new_net = 0;
+				dev_put(dev);
+				break;
+			}
+		}
+		if (new_net)
+			list_move(&dev->unreg_list, &pernet_list);
+	}
+
+	list_for_each_entry_safe(dev, aux, &pernet_list, unreg_list) {
+		call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
+		list_move(&dev->unreg_list, head);
 		dev_put(dev);
+	}
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -5074,6 +5092,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+			/* don't resend NETDEV_UNREGISTER_PERNET, _PERNET users
+			 * should have already handle it the first time */
 
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
@@ -5385,6 +5405,10 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
  *	unregister_netdevice_many - unregister many devices
  *	@head: list of devices
  *
+ *	WARNING: Calling this modifies the given list
+ *	(in rollback_registered_many). It may change the order of the elements
+ *	in the list. However, you can assume it does not add or delete elements
+ *	to/from the list.
  */
 void unregister_netdevice_many(struct list_head *head)
 {
@@ -5504,6 +5528,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	   this device. They should clean all the things.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
 
 	/*
 	 *	Flush the unicast and multicast chains
-- 
cgit v1.2.3


From e014debecd3ee3832e6476b3a9c948edfcfd1250 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 17 Nov 2009 05:59:21 +0000
Subject: linkwatch: linkwatch_forget_dev() to speedup device dismantle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Herbert Xu a écrit :
> On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote:
>> Really, the link watch stuff is just due for a redesign.  I don't
>> think a simple hack is going to cut it this time, sorry Eric :-)
>
> I have no objections against any redesigns, but since the only
> caller of linkwatch_forget_dev runs in process context with the
> RTNL, it could also legally emit those events.

Thanks guys, here an updated version then, before linkwatch surgery ?

In this version, I force the event to be sent synchronously.

[PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle

time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105

real	0m0.266s
user	0m0.000s
sys	0m0.001s

real	0m0.770s
user	0m0.000s
sys	0m0.000s

real	0m1.022s
user	0m0.000s
sys	0m0.000s

One problem of current schem in vlan dismantle phase is the
holding of device done by following chain :

vlan_dev_stop() ->
	netif_carrier_off(dev) ->
		linkwatch_fire_event(dev) ->
			dev_hold() ...

And __linkwatch_run_queue() runs up to one second later...

A generic fix to this problem is to add a linkwatch_forget_dev() method
to unlink the device from the list of watched devices.

dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory),
to be able to unlink device in O(1).

After patch :
time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105

real    0m0.024s
user    0m0.000s
sys     0m0.000s

real    0m0.032s
user    0m0.000s
sys     0m0.001s

real    0m0.033s
user    0m0.000s
sys     0m0.000s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +-
 net/core/dev.c            |  3 ++
 net/core/link_watch.c     | 94 ++++++++++++++++++++++++++++-------------------
 3 files changed, 62 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c8fa4627de00..97873e31661c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -896,7 +896,7 @@ struct net_device {
 	/* device index hash chain */
 	struct hlist_node	index_hlist;
 
-	struct net_device	*link_watch_next;
+	struct list_head	link_watch_list;
 
 	/* register/unregister state machine */
 	enum { NETREG_UNINITIALIZED=0,
@@ -1600,6 +1600,7 @@ static inline void dev_hold(struct net_device *dev)
  */
 
 extern void linkwatch_fire_event(struct net_device *dev);
+extern void linkwatch_forget_dev(struct net_device *dev);
 
 /**
  *	netif_carrier_ok - test if carrier present
diff --git a/net/core/dev.c b/net/core/dev.c
index e25fe5d9343b..c128af708ebf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5085,6 +5085,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
 
+	linkwatch_forget_dev(dev);
+
 	rebroadcast_time = warning_time = jiffies;
 	while (atomic_read(&dev->refcnt) != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -5311,6 +5313,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->link_watch_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	strcpy(dev->name, name);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af699d7..5910b555a54a 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -35,7 +35,7 @@ static unsigned long linkwatch_nextevent;
 static void linkwatch_event(struct work_struct *dummy);
 static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
-static struct net_device *lweventlist;
+static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
 
 static unsigned char default_operstate(const struct net_device *dev)
@@ -89,8 +89,10 @@ static void linkwatch_add_event(struct net_device *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&lweventlist_lock, flags);
-	dev->link_watch_next = lweventlist;
-	lweventlist = dev;
+	if (list_empty(&dev->link_watch_list)) {
+		list_add_tail(&dev->link_watch_list, &lweventlist);
+		dev_hold(dev);
+	}
 	spin_unlock_irqrestore(&lweventlist_lock, flags);
 }
 
@@ -133,9 +135,35 @@ static void linkwatch_schedule_work(int urgent)
 }
 
 
+static void linkwatch_do_dev(struct net_device *dev)
+{
+	/*
+	 * Make sure the above read is complete since it can be
+	 * rewritten as soon as we clear the bit below.
+	 */
+	smp_mb__before_clear_bit();
+
+	/* We are about to handle this device,
+	 * so new events can be accepted
+	 */
+	clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+	rfc2863_policy(dev);
+	if (dev->flags & IFF_UP) {
+		if (netif_carrier_ok(dev))
+			dev_activate(dev);
+		else
+			dev_deactivate(dev);
+
+		netdev_state_change(dev);
+	}
+	dev_put(dev);
+}
+
 static void __linkwatch_run_queue(int urgent_only)
 {
-	struct net_device *next;
+	struct net_device *dev;
+	LIST_HEAD(wrk);
 
 	/*
 	 * Limit the number of linkwatch events to one
@@ -153,46 +181,40 @@ static void __linkwatch_run_queue(int urgent_only)
 	clear_bit(LW_URGENT, &linkwatch_flags);
 
 	spin_lock_irq(&lweventlist_lock);
-	next = lweventlist;
-	lweventlist = NULL;
-	spin_unlock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &wrk);
 
-	while (next) {
-		struct net_device *dev = next;
+	while (!list_empty(&wrk)) {
 
-		next = dev->link_watch_next;
+		dev = list_first_entry(&wrk, struct net_device, link_watch_list);
+		list_del_init(&dev->link_watch_list);
 
 		if (urgent_only && !linkwatch_urgent_event(dev)) {
-			linkwatch_add_event(dev);
+			list_add_tail(&dev->link_watch_list, &lweventlist);
 			continue;
 		}
-
-		/*
-		 * Make sure the above read is complete since it can be
-		 * rewritten as soon as we clear the bit below.
-		 */
-		smp_mb__before_clear_bit();
-
-		/* We are about to handle this device,
-		 * so new events can be accepted
-		 */
-		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
-
-		rfc2863_policy(dev);
-		if (dev->flags & IFF_UP) {
-			if (netif_carrier_ok(dev))
-				dev_activate(dev);
-			else
-				dev_deactivate(dev);
-
-			netdev_state_change(dev);
-		}
-
-		dev_put(dev);
+		spin_unlock_irq(&lweventlist_lock);
+		linkwatch_do_dev(dev);
+		spin_lock_irq(&lweventlist_lock);
 	}
 
-	if (lweventlist)
+	if (!list_empty(&lweventlist))
 		linkwatch_schedule_work(0);
+	spin_unlock_irq(&lweventlist_lock);
+}
+
+void linkwatch_forget_dev(struct net_device *dev)
+{
+	unsigned long flags;
+	int clean = 0;
+
+	spin_lock_irqsave(&lweventlist_lock, flags);
+	if (!list_empty(&dev->link_watch_list)) {
+		list_del_init(&dev->link_watch_list);
+		clean = 1;
+	}
+	spin_unlock_irqrestore(&lweventlist_lock, flags);
+	if (clean)
+		linkwatch_do_dev(dev);
 }
 
 
@@ -216,8 +238,6 @@ void linkwatch_fire_event(struct net_device *dev)
 	bool urgent = linkwatch_urgent_event(dev);
 
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
-		dev_hold(dev);
-
 		linkwatch_add_event(dev);
 	} else if (!urgent)
 		return;
-- 
cgit v1.2.3


From 2ea6dec4a22a6f66f6633876212fd4d195cf8277 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 17 Nov 2009 14:27:27 -0800
Subject: generic-ipi: Add smp_call_function_any()

Andrew points out that acpi-cpufreq uses cpumask_any, when it really
would prefer to use the same CPU if possible (to avoid an IPI).  In
general, this seems a good idea to offer.

[ tglx: Documented selection preference and Inlined the UP case to
  	avoid the copy of smp_call_function_single() and the extra
  	EXPORT ]

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Zhao Yakui <yakui.zhao@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/smp.h | 11 ++++++++++-
 kernel/smp.c        | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 39c64bae776d..7a0570e6a596 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask,
 void __smp_call_function_single(int cpuid, struct call_single_data *data,
 				int wait);
 
+int smp_call_function_any(const struct cpumask *mask,
+			  void (*func)(void *info), void *info, int wait);
+
 /*
  * Generic and arch helpers
  */
@@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { }
 #define smp_prepare_boot_cpu()			do {} while (0)
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
-static inline void init_call_single_data(void)
+static inline void init_call_single_data(void) { }
+
+static inline int
+smp_call_function_any(const struct cpumask *mask, void (*func)(void *info),
+		      void *info, int wait)
 {
+	return smp_call_function_single(0, func, info, wait);
 }
+
 #endif /* !SMP */
 
 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 8bd618f0364d..a8c76069cf50 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -319,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/*
+ * smp_call_function_any - Run a function on any of the given cpus
+ * @mask: The mask of cpus it can run on.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait until function has completed.
+ *
+ * Returns 0 on success, else a negative status code (if no cpus were online).
+ * Note that @wait will be implicitly turned on in case of allocation failures,
+ * since we fall back to on-stack allocation.
+ *
+ * Selection preference:
+ *	1) current cpu if in @mask
+ *	2) any cpu of current node if in @mask
+ *	3) any other online cpu in @mask
+ */
+int smp_call_function_any(const struct cpumask *mask,
+			  void (*func)(void *info), void *info, int wait)
+{
+	unsigned int cpu;
+	const struct cpumask *nodemask;
+	int ret;
+
+	/* Try for same CPU (cheapest) */
+	cpu = get_cpu();
+	if (cpumask_test_cpu(cpu, mask))
+		goto call;
+
+	/* Try for same node. */
+	nodemask = cpumask_of_node(cpu);
+	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
+	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
+		if (cpu_online(cpu))
+			goto call;
+	}
+
+	/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
+	cpu = cpumask_any_and(mask, cpu_online_mask);
+call:
+	ret = smp_call_function_single(cpu, func, info, wait);
+	put_cpu();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_function_any);
+
 /**
  * __smp_call_function_single(): Run a function on another CPU
  * @cpu: The CPU to run on.
-- 
cgit v1.2.3


From 60a0a52df149286a25fddf9b2d0cfe77cf0bc516 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 16 Nov 2009 02:30:16 -0800
Subject: sysctl: kill dead ctl_handler definitions.

When removing the sysctl strategy routines I overlooked their definitions
in sysctl.h.    So remove those unnecessary definitions now.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sysctl.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 4e40442777cf..b4f6adc6a02c 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -972,10 +972,6 @@ extern int sysctl_perm(struct ctl_table_root *root,
 
 typedef struct ctl_table ctl_table;
 
-typedef int ctl_handler (struct ctl_table *table,
-			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen);
-
 typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
 
@@ -996,13 +992,6 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int,
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
 				      void __user *, size_t *, loff_t *);
 
-extern ctl_handler sysctl_data;
-extern ctl_handler sysctl_string;
-extern ctl_handler sysctl_intvec;
-extern ctl_handler sysctl_jiffies;
-extern ctl_handler sysctl_ms_jiffies;
-
-
 /*
  * Register a set of sysctl names by calling register_sysctl_table
  * with an initialised array of struct ctl_table's.  An entry with 
-- 
cgit v1.2.3


From 86926d0096279b9739ceeff40f68d3c33b9119a9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 16 Nov 2009 02:40:01 -0800
Subject: sysctl: Remove CTL_NONE and CTL_UNNUMBERED

Now that the sysctl structures no longer have a ctl_name field
there is no reason to retain the definitions for CTL_NONE and
CTL_UNNUMBERED, or to explain their historic usage.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 Documentation/sysctl/ctl_unnumbered.txt | 22 ----------------------
 include/linux/sysctl.h                  |  9 ---------
 2 files changed, 31 deletions(-)
 delete mode 100644 Documentation/sysctl/ctl_unnumbered.txt

(limited to 'include/linux')

diff --git a/Documentation/sysctl/ctl_unnumbered.txt b/Documentation/sysctl/ctl_unnumbered.txt
deleted file mode 100644
index 23003a8ea3e7..000000000000
--- a/Documentation/sysctl/ctl_unnumbered.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-
-Except for a few extremely rare exceptions user space applications do not use
-the binary sysctl interface.  Instead everyone uses /proc/sys/...  with
-readable ascii names.
-
-Recently the kernel has started supporting setting the binary sysctl value to
-CTL_UNNUMBERED so we no longer need to assign a binary sysctl path to allow
-sysctls to show up in /proc/sys.
-
-Assigning binary sysctl numbers is an endless source of conflicts in sysctl.h,
-breaking of the user space ABI (because of those conflicts), and maintenance
-problems.  A complete pass through all of the sysctl users revealed multiple
-instances where the sysctl binary interface was broken and had gone undetected
-for years.
-
-So please do not add new binary sysctl numbers.  They are unneeded and
-problematic.
-
-If you really need a new binary sysctl number please first merge your sysctl
-into the kernel and then as a separate patch allocate a binary sysctl number.
-
-(ebiederm@xmission.com, June 2007)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index b4f6adc6a02c..c83a86a22381 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -15,9 +15,6 @@
  **  The kernel will then return -ENOTDIR to any application using
  **  the old binary interface.
  **
- **  For new interfaces unless you really need a binary number
- **  please use CTL_UNNUMBERED.
- **
  ****************************************************************
  ****************************************************************
  */
@@ -50,12 +47,6 @@ struct __sysctl_args {
 
 /* Top-level names: */
 
-/* For internal pattern-matching use only: */
-#ifdef __KERNEL__
-#define CTL_NONE	0
-#define CTL_UNNUMBERED	CTL_NONE	/* sysctl without a binary number */
-#endif
-
 enum
 {
 	CTL_KERN=1,		/* General kernel info and control */
-- 
cgit v1.2.3


From c95cf3d09adc9afe7816a13a920b6df36062a3fe Mon Sep 17 00:00:00 2001
From: David-John Willis <John.Willis@Distant-earth.com>
Date: Tue, 17 Nov 2009 18:50:09 +0200
Subject: wl1251: add NVS in EEPROM support

wl1251 supports also that NVS is stored in a separate EEPROM, add support
for that.

kvalo: use platform data instead Kconfig and use kernel style

Signed-off-by: David-John Willis <John.Willis@Distant-earth.com>
Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/wl12xx/wl1251.h      |  1 +
 drivers/net/wireless/wl12xx/wl1251_boot.c | 20 +++++++++++++-------
 drivers/net/wireless/wl12xx/wl1251_reg.h  |  6 ++++++
 drivers/net/wireless/wl12xx/wl1251_spi.c  |  2 ++
 include/linux/spi/wl12xx.h                |  1 +
 5 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h
index a839466664f0..054533f7a124 100644
--- a/drivers/net/wireless/wl12xx/wl1251.h
+++ b/drivers/net/wireless/wl12xx/wl1251.h
@@ -269,6 +269,7 @@ struct wl1251 {
 
 	void (*set_power)(bool enable);
 	int irq;
+	bool use_eeprom;
 
 	enum wl1251_state state;
 	struct mutex mutex;
diff --git a/drivers/net/wireless/wl12xx/wl1251_boot.c b/drivers/net/wireless/wl12xx/wl1251_boot.c
index 5094f24ad034..2e733e7bdfd4 100644
--- a/drivers/net/wireless/wl12xx/wl1251_boot.c
+++ b/drivers/net/wireless/wl12xx/wl1251_boot.c
@@ -494,13 +494,19 @@ int wl1251_boot(struct wl1251 *wl)
 		goto out;
 
 	/* 2. start processing NVS file */
-	ret = wl1251_boot_upload_nvs(wl);
-	if (ret < 0)
-		goto out;
-
-	/* write firmware's last address (ie. it's length) to
-	 * ACX_EEPROMLESS_IND_REG */
-	wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len);
+	if (wl->use_eeprom) {
+		wl1251_reg_write32(wl, ACX_REG_EE_START, START_EEPROM_MGR);
+		msleep(4000);
+		wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, USE_EEPROM);
+	} else {
+		ret = wl1251_boot_upload_nvs(wl);
+		if (ret < 0)
+			goto out;
+
+		/* write firmware's last address (ie. it's length) to
+		 * ACX_EEPROMLESS_IND_REG */
+		wl1251_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len);
+	}
 
 	/* 6. read the EEPROM parameters */
 	tmp = wl1251_reg_read32(wl, SCR_PAD2);
diff --git a/drivers/net/wireless/wl12xx/wl1251_reg.h b/drivers/net/wireless/wl12xx/wl1251_reg.h
index 06e1bd94a739..0ca3b4326056 100644
--- a/drivers/net/wireless/wl12xx/wl1251_reg.h
+++ b/drivers/net/wireless/wl12xx/wl1251_reg.h
@@ -370,6 +370,7 @@ enum wl12xx_acx_int_reg {
  EEPROM location specified in the EE_ADDR register.
  The Wlan hardware hardware clears this bit automatically.
 *===============================================*/
+#define EE_CTL                              (REGISTERS_BASE + 0x2000)
 #define ACX_EE_CTL_REG                      EE_CTL
 #define EE_WRITE                            0x00000001ul
 #define EE_READ                             0x00000002ul
@@ -380,6 +381,7 @@ enum wl12xx_acx_int_reg {
   This register specifies the address
   within the EEPROM from/to which to read/write data.
   ===============================================*/
+#define EE_ADDR                             (REGISTERS_BASE + 0x2008)
 #define ACX_EE_ADDR_REG                     EE_ADDR
 
 /*===============================================
@@ -389,8 +391,12 @@ enum wl12xx_acx_int_reg {
   data from the EEPROM or the write data
   to be written to the EEPROM.
   ===============================================*/
+#define EE_DATA                             (REGISTERS_BASE + 0x2004)
 #define ACX_EE_DATA_REG                     EE_DATA
 
+#define EEPROM_ACCESS_TO                    10000   /* timeout counter */
+#define START_EEPROM_MGR                    0x00000001
+
 /*===============================================
   EEPROM Base Address  - 32bit RW
   ------------------------------------------
diff --git a/drivers/net/wireless/wl12xx/wl1251_spi.c b/drivers/net/wireless/wl12xx/wl1251_spi.c
index 2cf8a2169d43..9cc8c323830f 100644
--- a/drivers/net/wireless/wl12xx/wl1251_spi.c
+++ b/drivers/net/wireless/wl12xx/wl1251_spi.c
@@ -270,6 +270,8 @@ static int __devinit wl1251_spi_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
+	wl->use_eeprom = pdata->use_eeprom;
+
 	ret = request_irq(wl->irq, wl1251_irq, 0, DRIVER_NAME, wl);
 	if (ret < 0) {
 		wl1251_error("request_irq() failed: %d", ret);
diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h
index 11430cab2aad..aed64ed3dc8a 100644
--- a/include/linux/spi/wl12xx.h
+++ b/include/linux/spi/wl12xx.h
@@ -26,6 +26,7 @@
 
 struct wl12xx_platform_data {
 	void (*set_power)(bool enable);
+	bool use_eeprom;
 };
 
 #endif
-- 
cgit v1.2.3


From 0878c3504f92f1bf063d0890a9960d4b9e6c4618 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 18 Nov 2009 16:48:00 +0100
Subject: rfkill: Add missing description for RFKILL_TYPE_GPS

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Janakiram Sistla <janakiram.sistla@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 3392c59d2706..a75e9e566ce5 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -35,6 +35,7 @@
  * @RFKILL_TYPE_UWB: switch is on a ultra wideband device.
  * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device.
  * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
+ * @RFKILL_TYPE_GPS: switch is on a GPS device.
  * @NUM_RFKILL_TYPES: number of defined rfkill types
  */
 enum rfkill_type {
-- 
cgit v1.2.3


From 875405a7793e9c35fab33819e7e5df7a98b6064c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 18 Nov 2009 16:48:01 +0100
Subject: rfkill: Add constant for RFKILL_TYPE_FM radio devices

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Janakiram Sistla <janakiram.sistla@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 2 ++
 net/rfkill/core.c      | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index a75e9e566ce5..97059d08a626 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -36,6 +36,7 @@
  * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device.
  * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
  * @RFKILL_TYPE_GPS: switch is on a GPS device.
+ * @RFKILL_TYPE_FM: switch is on a FM radio device.
  * @NUM_RFKILL_TYPES: number of defined rfkill types
  */
 enum rfkill_type {
@@ -46,6 +47,7 @@ enum rfkill_type {
 	RFKILL_TYPE_WIMAX,
 	RFKILL_TYPE_WWAN,
 	RFKILL_TYPE_GPS,
+	RFKILL_TYPE_FM,
 	NUM_RFKILL_TYPES,
 };
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index ba2efb960c60..09f4e161799b 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -592,11 +592,13 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
 		return "wwan";
 	case RFKILL_TYPE_GPS:
 		return "gps";
+	case RFKILL_TYPE_FM:
+		return "fm";
 	default:
 		BUG();
 	}
 
-	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_GPS + 1);
+	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1);
 }
 
 static ssize_t rfkill_type_show(struct device *dev,
-- 
cgit v1.2.3


From 136cfa28615ccce0f9374811480e0b81c4191ea5 Mon Sep 17 00:00:00 2001
From: Rui Paulo <rpaulo@gmail.com>
Date: Wed, 18 Nov 2009 18:40:00 +0000
Subject: mac80211: use a structure to hold the mesh config information element

Signed-off-by: Rui Paulo <rpaulo@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 16 +++++++++++++++-
 net/mac80211/ieee80211_i.h |  3 +--
 net/mac80211/mesh.c        | 23 ++++++++---------------
 net/mac80211/util.c        |  4 ++--
 net/wireless/scan.c        |  9 +++++----
 5 files changed, 31 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 49b1abd2fe97..afa8e0ac27a7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -115,7 +115,6 @@
 #define IEEE80211_MAX_SSID_LEN		32
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
-#define IEEE80211_MESH_CONFIG_LEN	7
 
 #define IEEE80211_QOS_CTL_LEN		2
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
@@ -554,6 +553,21 @@ struct ieee80211_tim_ie {
 	u8 virtual_map[1];
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_meshconf_ie
+ *
+ * This structure refers to "Mesh Configuration information element"
+ */
+struct ieee80211_meshconf_ie {
+	u8 meshconf_psel;
+	u8 meshconf_pmetric;
+	u8 meshconf_congest;
+	u8 meshconf_synch;
+	u8 meshconf_auth;
+	u8 meshconf_form;
+	u8 meshconf_cap;
+} __attribute__ ((packed));
+
 /**
  * struct ieee80211_rann_ie
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 74e79935de0a..87d27f450a05 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -798,7 +798,7 @@ struct ieee802_11_elems {
 	u8 *wmm_param;
 	struct ieee80211_ht_cap *ht_cap_elem;
 	struct ieee80211_ht_info *ht_info_elem;
-	u8 *mesh_config;
+	struct ieee80211_meshconf_ie *mesh_config;
 	u8 *mesh_id;
 	u8 *peer_link;
 	u8 *preq;
@@ -826,7 +826,6 @@ struct ieee802_11_elems {
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
-	u8 mesh_config_len;
 	u8 mesh_id_len;
 	u8 peer_link_len;
 	u8 preq_len;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 3a0683ba357b..51adb1115215 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -16,12 +16,6 @@
 #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
 #define IEEE80211_MESH_RANN_INTERVAL	     (1 * HZ)
 
-#define MESHCONF_PP_OFFSET 	0		/* Path Selection Protocol */
-#define MESHCONF_PM_OFFSET	1		/* Path Selection Metric   */
-#define MESHCONF_CC_OFFSET	2		/* Congestion Control Mode */
-#define MESHCONF_SP_OFFSET	3		/* Synchronization Protocol */
-#define MESHCONF_AUTH_OFFSET	4		/* Authentication Protocol */
-#define MESHCONF_CAPAB_OFFSET 	6
 #define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01
 #define MESHCONF_CAPAB_FORWARDING    0x08
 
@@ -87,12 +81,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
 	 */
 	if (ifmsh->mesh_id_len == ie->mesh_id_len &&
 		memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 &&
-		(ifmsh->mesh_pp_id == *(ie->mesh_config + MESHCONF_PP_OFFSET))&&
-		(ifmsh->mesh_pm_id == *(ie->mesh_config + MESHCONF_PM_OFFSET))&&
-		(ifmsh->mesh_cc_id == *(ie->mesh_config + MESHCONF_CC_OFFSET))&&
-		(ifmsh->mesh_sp_id == *(ie->mesh_config + MESHCONF_SP_OFFSET))&&
-		(ifmsh->mesh_auth_id == *(ie->mesh_config +
-		    MESHCONF_AUTH_OFFSET)))
+		(ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) &&
+		(ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) &&
+		(ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) &&
+		(ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) &&
+		(ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))
 		return true;
 
 	return false;
@@ -105,7 +98,7 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
  */
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
-	return (*(ie->mesh_config + MESHCONF_CAPAB_OFFSET) &
+	return (ie->mesh_config->meshconf_cap &
 	    MESHCONF_CAPAB_ACCEPT_PLINKS) != 0;
 }
 
@@ -262,9 +255,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	if (sdata->u.mesh.mesh_id_len)
 		memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len);
 
-	pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN);
+	pos = skb_put(skb, 2 + sizeof(struct ieee80211_meshconf_ie));
 	*pos++ = WLAN_EID_MESH_CONFIG;
-	*pos++ = IEEE80211_MESH_CONFIG_LEN;
+	*pos++ = sizeof(struct ieee80211_meshconf_ie);
 
 	/* Active path selection protocol ID */
 	*pos++ = sdata->u.mesh.mesh_pp_id;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5ae1bf389849..2fb0432ac830 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -666,8 +666,8 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->mesh_id_len = elen;
 			break;
 		case WLAN_EID_MESH_CONFIG:
-			elems->mesh_config = pos;
-			elems->mesh_config_len = elen;
+			if (elen >= sizeof(struct ieee80211_meshconf_ie))
+				elems->mesh_config = (void *)pos;
 			break;
 		case WLAN_EID_PEER_LINK:
 			elems->peer_link = pos;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e2d344ff6745..d03447d68cdd 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -217,7 +217,7 @@ static bool is_mesh(struct cfg80211_bss *a,
 		     a->len_information_elements);
 	if (!ie)
 		return false;
-	if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
+	if (ie[1] != sizeof(struct ieee80211_meshconf_ie))
 		return false;
 
 	/*
@@ -225,7 +225,8 @@ static bool is_mesh(struct cfg80211_bss *a,
 	 * comparing since that may differ between stations taking
 	 * part in the same mesh.
 	 */
-	return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0;
+	return memcmp(ie + 2, meshcfg,
+	    sizeof(struct ieee80211_meshconf_ie) - 2) == 0;
 }
 
 static int cmp_bss(struct cfg80211_bss *a,
@@ -399,7 +400,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 				  res->pub.information_elements,
 				  res->pub.len_information_elements);
 		if (!meshid || !meshcfg ||
-		    meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) {
+		    meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) {
 			/* bogus mesh */
 			kref_put(&res->ref, bss_release);
 			return NULL;
@@ -865,7 +866,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
 			break;
 		case WLAN_EID_MESH_CONFIG:
 			ismesh = true;
-			if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
+			if (ie[1] != sizeof(struct ieee80211_meshconf_ie))
 				break;
 			buf = kmalloc(50, GFP_ATOMIC);
 			if (!buf)
-- 
cgit v1.2.3


From 386e50cc7d82b3799ea6f53267f04f123ae05afe Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Wed, 18 Nov 2009 23:30:41 -0800
Subject: X25: Enable setting of cause and diagnostic fields

Adds SIOCX25SCAUSEDIAG, allowing X.25 programs to set the cause and
diagnostic fields.

Normally used to indicate status upon closing connections.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/x25.h |  1 +
 net/x25/af_x25.c    | 12 ++++++++++++
 net/x25/x25_subr.c  |  6 ++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/x25.h b/include/linux/x25.h
index d035e4e87d07..6450a7f12074 100644
--- a/include/linux/x25.h
+++ b/include/linux/x25.h
@@ -25,6 +25,7 @@
 #define SIOCX25SENDCALLACCPT    (SIOCPROTOPRIVATE + 9)
 #define SIOCX25GDTEFACILITIES (SIOCPROTOPRIVATE + 10)
 #define SIOCX25SDTEFACILITIES (SIOCPROTOPRIVATE + 11)
+#define SIOCX25SCAUSEDIAG	(SIOCPROTOPRIVATE + 12)
 
 /*
  *	Values for {get,set}sockopt.
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 39ce03e07d18..ac7dba46fa33 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1471,6 +1471,17 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			break;
 		}
 
+		case SIOCX25SCAUSEDIAG: {
+			struct x25_causediag causediag;
+			rc = -EFAULT;
+			if (copy_from_user(&causediag, argp, sizeof(causediag)))
+				break;
+			x25->causediag = causediag;
+			rc = 0;
+			break;
+
+		}
+
 		case SIOCX25SCUDMATCHLEN: {
 			struct x25_subaddr sub_addr;
 			rc = -EINVAL;
@@ -1639,6 +1650,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 	case SIOCX25GCALLUSERDATA:
 	case SIOCX25SCALLUSERDATA:
 	case SIOCX25GCAUSEDIAG:
+	case SIOCX25SCAUSEDIAG:
 	case SIOCX25SCUDMATCHLEN:
 	case SIOCX25CALLACCPTAPPRV:
 	case SIOCX25SENDCALLACCPT:
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 511a5986af3e..352b32d216fc 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -225,6 +225,12 @@ void x25_write_internal(struct sock *sk, int frametype)
 			break;
 
 		case X25_CLEAR_REQUEST:
+			dptr    = skb_put(skb, 3);
+			*dptr++ = frametype;
+			*dptr++ = x25->causediag.cause;
+			*dptr++ = x25->causediag.diagnostic;
+			break;
+
 		case X25_RESET_REQUEST:
 			dptr    = skb_put(skb, 3);
 			*dptr++ = frametype;
-- 
cgit v1.2.3


From ad4bb6f8883a13bb0f65b194dae36c62a02ac779 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 19 Nov 2009 00:56:30 +0100
Subject: cfg80211: disallow bridging managed/adhoc interfaces

A number of people have tried to add a wireless interface
(in managed mode) to a bridge and then complained that it
doesn't work. It cannot work, however, because in 802.11
networks all packets need to be acknowledged and as such
need to be sent to the right address. Promiscuous doesn't
help here. The wireless address format used for these
links has only space for three addresses, the
 * transmitter, which must be equal to the sender (origin)
 * receiver (on the wireless medium), which is the AP in
   the case of managed mode
 * the recipient (destination), which is on the APs local
   network segment

In an IBSS, it is similar, but the receiver and recipient
must match and the third address is used as the BSSID.

To avoid such mistakes in the future, disallow adding a
wireless interface to a bridge.

Felix has recently added a four-address mode to the AP
and client side that can be used (after negotiating that
it is possible, which must happen out-of-band by setting
up both sides) for bridging, so allow that case.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/if.h     |  1 +
 net/bridge/br_if.c     |  4 ++++
 net/wireless/core.c    |  4 ++++
 net/wireless/nl80211.c | 12 ++++++++----
 net/wireless/util.c    | 31 +++++++++++++++++++++++++++++++
 5 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if.h b/include/linux/if.h
index 3b2a46bf8f8d..3a9f410a296b 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -70,6 +70,7 @@
 #define IFF_XMIT_DST_RELEASE 0x400	/* dev_hard_start_xmit() is allowed to
 					 * release skb->dst
 					 */
+#define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a6f74b2b9571..a2cbe61f6e65 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -390,6 +390,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (dev->br_port != NULL)
 		return -EBUSY;
 
+	/* No bridging devices that dislike that (e.g. wireless) */
+	if (dev->priv_flags & IFF_DONT_BRIDGE)
+		return -EOPNOTSUPP;
+
 	p = new_nbp(br, dev);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e2cc6e7522dd..fc5e9b508607 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -697,6 +697,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 #endif
 		if (!dev->ethtool_ops)
 			dev->ethtool_ops = &cfg80211_ethtool_ops;
+
+		if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+		     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
+			dev->priv_flags |= IFF_DONT_BRIDGE;
 		break;
 	case NETDEV_GOING_DOWN:
 		switch (wdev->iftype) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b7b0f67b0c61..149539ade15e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -969,10 +969,14 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
 }
 
 static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
-			       u8 use_4addr, enum nl80211_iftype iftype)
+			       struct net_device *netdev, u8 use_4addr,
+			       enum nl80211_iftype iftype)
 {
-	if (!use_4addr)
+	if (!use_4addr) {
+		if (netdev && netdev->br_port)
+			return -EBUSY;
 		return 0;
+	}
 
 	switch (iftype) {
 	case NL80211_IFTYPE_AP_VLAN:
@@ -1033,7 +1037,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
 		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
 		change = true;
-		err = nl80211_valid_4addr(rdev, params.use_4addr, ntype);
+		err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype);
 		if (err)
 			goto unlock;
 	} else {
@@ -1111,7 +1115,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
 		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
-		err = nl80211_valid_4addr(rdev, params.use_4addr, type);
+		err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
 		if (err)
 			goto unlock;
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 17a7a4cfc617..59361fdcb5d0 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -658,6 +658,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 	    !(rdev->wiphy.interface_modes & (1 << ntype)))
 		return -EOPNOTSUPP;
 
+	/* if it's part of a bridge, reject changing type to station/ibss */
+	if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC ||
+			     ntype == NL80211_IFTYPE_STATION))
+		return -EBUSY;
+
 	if (ntype != otype) {
 		dev->ieee80211_ptr->use_4addr = false;
 
@@ -687,5 +692,31 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 	if (!err && params && params->use_4addr != -1)
 		dev->ieee80211_ptr->use_4addr = params->use_4addr;
 
+	if (!err) {
+		dev->priv_flags &= ~IFF_DONT_BRIDGE;
+		switch (ntype) {
+		case NL80211_IFTYPE_STATION:
+			if (dev->ieee80211_ptr->use_4addr)
+				break;
+			/* fall through */
+		case NL80211_IFTYPE_ADHOC:
+			dev->priv_flags |= IFF_DONT_BRIDGE;
+			break;
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_AP_VLAN:
+		case NL80211_IFTYPE_WDS:
+		case NL80211_IFTYPE_MESH_POINT:
+			/* bridging OK */
+			break;
+		case NL80211_IFTYPE_MONITOR:
+			/* monitor can't bridge anyway */
+			break;
+		case NL80211_IFTYPE_UNSPECIFIED:
+		case __NL80211_IFTYPE_AFTER_LAST:
+			/* not happening */
+			break;
+		}
+	}
+
 	return err;
 }
-- 
cgit v1.2.3


From 3d7a641e544e428191667e8b1f83f96fa46dbd65 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:10:23 +0000
Subject: SLOW_WORK: Wait for outstanding work items belonging to a module to
 clear

Wait for outstanding slow work items belonging to a module to clear when
unregistering that module as a user of the facility.  This prevents the put_ref
code of a work item from being taken away before it returns.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt |  13 ++++-
 fs/fscache/main.c           |   6 +-
 fs/fscache/object.c         |   1 +
 fs/fscache/operation.c      |   1 +
 fs/gfs2/main.c              |   4 +-
 fs/gfs2/recovery.c          |   1 +
 include/linux/slow-work.h   |   8 ++-
 kernel/slow-work.c          | 132 ++++++++++++++++++++++++++++++++++++++++++--
 8 files changed, 150 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index ebc50f808ea4..f12fda31dcdc 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -64,9 +64,11 @@ USING SLOW WORK ITEMS
 Firstly, a module or subsystem wanting to make use of slow work items must
 register its interest:
 
-	 int ret = slow_work_register_user();
+	 int ret = slow_work_register_user(struct module *module);
 
-This will return 0 if successful, or a -ve error upon failure.
+This will return 0 if successful, or a -ve error upon failure.  The module
+pointer should be the module interested in using this facility (almost
+certainly THIS_MODULE).
 
 
 Slow work items may then be set up by:
@@ -110,7 +112,12 @@ operation.  When all a module's slow work items have been processed, and the
 module has no further interest in the facility, it should unregister its
 interest:
 
-	slow_work_unregister_user();
+	slow_work_unregister_user(struct module *module);
+
+The module pointer is used to wait for all outstanding work items for that
+module before completing the unregistration.  This prevents the put_ref() code
+from being taken away before it completes.  module should almost certainly be
+THIS_MODULE.
 
 
 ===============
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 4de41b597499..add6bdb53f04 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -48,7 +48,7 @@ static int __init fscache_init(void)
 {
 	int ret;
 
-	ret = slow_work_register_user();
+	ret = slow_work_register_user(THIS_MODULE);
 	if (ret < 0)
 		goto error_slow_work;
 
@@ -80,7 +80,7 @@ error_kobj:
 error_cookie_jar:
 	fscache_proc_cleanup();
 error_proc:
-	slow_work_unregister_user();
+	slow_work_unregister_user(THIS_MODULE);
 error_slow_work:
 	return ret;
 }
@@ -97,7 +97,7 @@ static void __exit fscache_exit(void)
 	kobject_put(fscache_root);
 	kmem_cache_destroy(fscache_cookie_jar);
 	fscache_proc_cleanup();
-	slow_work_unregister_user();
+	slow_work_unregister_user(THIS_MODULE);
 	printk(KERN_NOTICE "FS-Cache: Unloaded\n");
 }
 
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 392a41b1b79d..d236eb1d6f37 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -45,6 +45,7 @@ static void fscache_enqueue_dependents(struct fscache_object *);
 static void fscache_dequeue_object(struct fscache_object *);
 
 const struct slow_work_ops fscache_object_slow_work_ops = {
+	.owner		= THIS_MODULE,
 	.get_ref	= fscache_object_slow_work_get_ref,
 	.put_ref	= fscache_object_slow_work_put_ref,
 	.execute	= fscache_object_slow_work_execute,
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index e7f8d53b8b6b..f1a2857b2ff5 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -453,6 +453,7 @@ static void fscache_op_execute(struct slow_work *work)
 }
 
 const struct slow_work_ops fscache_op_slow_work_ops = {
+	.owner		= THIS_MODULE,
 	.get_ref	= fscache_op_get_ref,
 	.put_ref	= fscache_op_put_ref,
 	.execute	= fscache_op_execute,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index eacd78a5d082..5b31f7741a8f 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -114,7 +114,7 @@ static int __init init_gfs2_fs(void)
 	if (error)
 		goto fail_unregister;
 
-	error = slow_work_register_user();
+	error = slow_work_register_user(THIS_MODULE);
 	if (error)
 		goto fail_slow;
 
@@ -163,7 +163,7 @@ static void __exit exit_gfs2_fs(void)
 	gfs2_unregister_debugfs();
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
-	slow_work_unregister_user();
+	slow_work_unregister_user(THIS_MODULE);
 
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 59d2695509d3..b2bb779f09ed 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -593,6 +593,7 @@ fail:
 }
 
 struct slow_work_ops gfs2_recover_ops = {
+	.owner	 = THIS_MODULE,
 	.get_ref = gfs2_recover_get_ref,
 	.put_ref = gfs2_recover_put_ref,
 	.execute = gfs2_recover_work,
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index b65c8881f07a..9adb2b30754f 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -24,6 +24,9 @@ struct slow_work;
  * The operations used to support slow work items
  */
 struct slow_work_ops {
+	/* owner */
+	struct module *owner;
+
 	/* get a ref on a work item
 	 * - return 0 if successful, -ve if not
 	 */
@@ -42,6 +45,7 @@ struct slow_work_ops {
  *   queued
  */
 struct slow_work {
+	struct module		*owner;	/* the owning module */
 	unsigned long		flags;
 #define SLOW_WORK_PENDING	0	/* item pending (further) execution */
 #define SLOW_WORK_EXECUTING	1	/* item currently executing */
@@ -84,8 +88,8 @@ static inline void vslow_work_init(struct slow_work *work,
 }
 
 extern int slow_work_enqueue(struct slow_work *work);
-extern int slow_work_register_user(void);
-extern void slow_work_unregister_user(void);
+extern int slow_work_register_user(struct module *owner);
+extern void slow_work_unregister_user(struct module *owner);
 
 #ifdef CONFIG_SYSCTL
 extern ctl_table slow_work_sysctls[];
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 0d31135efbf4..dd08f376e406 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -22,6 +22,8 @@
 #define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after
 					 * OOM */
 
+#define SLOW_WORK_THREAD_LIMIT	255	/* abs maximum number of slow-work threads */
+
 static void slow_work_cull_timeout(unsigned long);
 static void slow_work_oom_timeout(unsigned long);
 
@@ -46,7 +48,7 @@ static unsigned vslow_work_proportion = 50; /* % of threads that may process
 
 #ifdef CONFIG_SYSCTL
 static const int slow_work_min_min_threads = 2;
-static int slow_work_max_max_threads = 255;
+static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
 static const int slow_work_min_vslow = 1;
 static const int slow_work_max_vslow = 99;
 
@@ -97,6 +99,23 @@ static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
 static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
 static struct slow_work slow_work_new_thread; /* new thread starter */
 
+/*
+ * slow work ID allocation (use slow_work_queue_lock)
+ */
+static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
+
+/*
+ * Unregistration tracking to prevent put_ref() from disappearing during module
+ * unload
+ */
+#ifdef CONFIG_MODULES
+static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
+static struct module *slow_work_unreg_module;
+static struct slow_work *slow_work_unreg_work_item;
+static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
+static DEFINE_MUTEX(slow_work_unreg_sync_lock);
+#endif
+
 /*
  * The queues of work items and the lock governing access to them.  These are
  * shared between all the CPUs.  It doesn't make sense to have per-CPU queues
@@ -149,8 +168,11 @@ static unsigned slow_work_calc_vsmax(void)
  * Attempt to execute stuff queued on a slow thread.  Return true if we managed
  * it, false if there was nothing to do.
  */
-static bool slow_work_execute(void)
+static bool slow_work_execute(int id)
 {
+#ifdef CONFIG_MODULES
+	struct module *module;
+#endif
 	struct slow_work *work = NULL;
 	unsigned vsmax;
 	bool very_slow;
@@ -186,6 +208,12 @@ static bool slow_work_execute(void)
 	} else {
 		very_slow = false; /* avoid the compiler warning */
 	}
+
+#ifdef CONFIG_MODULES
+	if (work)
+		slow_work_thread_processing[id] = work->owner;
+#endif
+
 	spin_unlock_irq(&slow_work_queue_lock);
 
 	if (!work)
@@ -219,7 +247,18 @@ static bool slow_work_execute(void)
 		spin_unlock_irq(&slow_work_queue_lock);
 	}
 
+	/* sort out the race between module unloading and put_ref() */
 	work->ops->put_ref(work);
+
+#ifdef CONFIG_MODULES
+	module = slow_work_thread_processing[id];
+	slow_work_thread_processing[id] = NULL;
+	smp_mb();
+	if (slow_work_unreg_work_item == work ||
+	    slow_work_unreg_module == module)
+		wake_up_all(&slow_work_unreg_wq);
+#endif
+
 	return true;
 
 auto_requeue:
@@ -232,6 +271,7 @@ auto_requeue:
 	else
 		list_add_tail(&work->link, &slow_work_queue);
 	spin_unlock_irq(&slow_work_queue_lock);
+	slow_work_thread_processing[id] = NULL;
 	return true;
 }
 
@@ -368,13 +408,22 @@ static inline bool slow_work_available(int vsmax)
  */
 static int slow_work_thread(void *_data)
 {
-	int vsmax;
+	int vsmax, id;
 
 	DEFINE_WAIT(wait);
 
 	set_freezable();
 	set_user_nice(current, -5);
 
+	/* allocate ourselves an ID */
+	spin_lock_irq(&slow_work_queue_lock);
+	id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
+	BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
+	__set_bit(id, slow_work_ids);
+	spin_unlock_irq(&slow_work_queue_lock);
+
+	sprintf(current->comm, "kslowd%03u", id);
+
 	for (;;) {
 		vsmax = vslow_work_proportion;
 		vsmax *= atomic_read(&slow_work_thread_count);
@@ -395,7 +444,7 @@ static int slow_work_thread(void *_data)
 		vsmax *= atomic_read(&slow_work_thread_count);
 		vsmax /= 100;
 
-		if (slow_work_available(vsmax) && slow_work_execute()) {
+		if (slow_work_available(vsmax) && slow_work_execute(id)) {
 			cond_resched();
 			if (list_empty(&slow_work_queue) &&
 			    list_empty(&vslow_work_queue) &&
@@ -412,6 +461,10 @@ static int slow_work_thread(void *_data)
 			break;
 	}
 
+	spin_lock_irq(&slow_work_queue_lock);
+	__clear_bit(id, slow_work_ids);
+	spin_unlock_irq(&slow_work_queue_lock);
+
 	if (atomic_dec_and_test(&slow_work_thread_count))
 		complete_and_exit(&slow_work_last_thread_exited, 0);
 	return 0;
@@ -475,6 +528,7 @@ static void slow_work_new_thread_execute(struct slow_work *work)
 }
 
 static const struct slow_work_ops slow_work_new_thread_ops = {
+	.owner		= THIS_MODULE,
 	.get_ref	= slow_work_new_thread_get_ref,
 	.put_ref	= slow_work_new_thread_put_ref,
 	.execute	= slow_work_new_thread_execute,
@@ -546,12 +600,13 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
 
 /**
  * slow_work_register_user - Register a user of the facility
+ * @module: The module about to make use of the facility
  *
  * Register a user of the facility, starting up the initial threads if there
  * aren't any other users at this point.  This will return 0 if successful, or
  * an error if not.
  */
-int slow_work_register_user(void)
+int slow_work_register_user(struct module *module)
 {
 	struct task_struct *p;
 	int loop;
@@ -598,14 +653,79 @@ error:
 }
 EXPORT_SYMBOL(slow_work_register_user);
 
+/*
+ * wait for all outstanding items from the calling module to complete
+ * - note that more items may be queued whilst we're waiting
+ */
+static void slow_work_wait_for_items(struct module *module)
+{
+	DECLARE_WAITQUEUE(myself, current);
+	struct slow_work *work;
+	int loop;
+
+	mutex_lock(&slow_work_unreg_sync_lock);
+	add_wait_queue(&slow_work_unreg_wq, &myself);
+
+	for (;;) {
+		spin_lock_irq(&slow_work_queue_lock);
+
+		/* first of all, we wait for the last queued item in each list
+		 * to be processed */
+		list_for_each_entry_reverse(work, &vslow_work_queue, link) {
+			if (work->owner == module) {
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				slow_work_unreg_work_item = work;
+				goto do_wait;
+			}
+		}
+		list_for_each_entry_reverse(work, &slow_work_queue, link) {
+			if (work->owner == module) {
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				slow_work_unreg_work_item = work;
+				goto do_wait;
+			}
+		}
+
+		/* then we wait for the items being processed to finish */
+		slow_work_unreg_module = module;
+		smp_mb();
+		for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
+			if (slow_work_thread_processing[loop] == module)
+				goto do_wait;
+		}
+		spin_unlock_irq(&slow_work_queue_lock);
+		break; /* okay, we're done */
+
+	do_wait:
+		spin_unlock_irq(&slow_work_queue_lock);
+		schedule();
+		slow_work_unreg_work_item = NULL;
+		slow_work_unreg_module = NULL;
+	}
+
+	remove_wait_queue(&slow_work_unreg_wq, &myself);
+	mutex_unlock(&slow_work_unreg_sync_lock);
+}
+
 /**
  * slow_work_unregister_user - Unregister a user of the facility
+ * @module: The module whose items should be cleared
  *
  * Unregister a user of the facility, killing all the threads if this was the
  * last one.
+ *
+ * This waits for all the work items belonging to the nominated module to go
+ * away before proceeding.
  */
-void slow_work_unregister_user(void)
+void slow_work_unregister_user(struct module *module)
 {
+	/* first of all, wait for all outstanding items from the calling module
+	 * to complete */
+	if (module)
+		slow_work_wait_for_items(module);
+
+	/* then we can actually go about shutting down the facility if need
+	 * be */
 	mutex_lock(&slow_work_user_lock);
 
 	BUG_ON(slow_work_user_count <= 0);
-- 
cgit v1.2.3


From 0160950297c08f8233c89b9f9e7dd59cfb080809 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 19 Nov 2009 18:10:43 +0000
Subject: SLOW_WORK: Add support for cancellation of slow work

Add support for cancellation of queued slow work and delayed slow work items.
The cancellation functions will wait for items that are pending or undergoing
execution to be discarded by the slow work facility.

Attempting to enqueue work that is in the process of being cancelled will
result in ECANCELED.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt | 12 ++++++-
 include/linux/slow-work.h   |  2 ++
 kernel/slow-work.c          | 81 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 88 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index c655c517fc68..2e384bd4dead 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -108,7 +108,17 @@ on the item, 0 otherwise.
 
 
 The items are reference counted, so there ought to be no need for a flush
-operation.  When all a module's slow work items have been processed, and the
+operation.  But as the reference counting is optional, means to cancel
+existing work items are also included:
+
+	cancel_slow_work(&myitem);
+
+can be used to cancel pending work.  The above cancel function waits for
+existing work to have been executed (or prevent execution of them, depending
+on timing).
+
+
+When all a module's slow work items have been processed, and the
 module has no further interest in the facility, it should unregister its
 interest:
 
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index 9adb2b30754f..eef20182d5b4 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -51,6 +51,7 @@ struct slow_work {
 #define SLOW_WORK_EXECUTING	1	/* item currently executing */
 #define SLOW_WORK_ENQ_DEFERRED	2	/* item enqueue deferred */
 #define SLOW_WORK_VERY_SLOW	3	/* item is very slow */
+#define SLOW_WORK_CANCELLING	4	/* item is being cancelled, don't enqueue */
 	const struct slow_work_ops *ops; /* operations table for this item */
 	struct list_head	link;	/* link in queue */
 };
@@ -88,6 +89,7 @@ static inline void vslow_work_init(struct slow_work *work,
 }
 
 extern int slow_work_enqueue(struct slow_work *work);
+extern void slow_work_cancel(struct slow_work *work);
 extern int slow_work_register_user(struct module *owner);
 extern void slow_work_unregister_user(struct module *owner);
 
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index fccf421eb5c1..671cc434532a 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -236,12 +236,17 @@ static bool slow_work_execute(int id)
 	if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
 		BUG();
 
-	work->ops->execute(work);
+	/* don't execute if the work is in the process of being cancelled */
+	if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
+		work->ops->execute(work);
 
 	if (very_slow)
 		atomic_dec(&vslow_work_executing_count);
 	clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
 
+	/* wake up anyone waiting for this work to be complete */
+	wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
+
 	/* if someone tried to enqueue the item whilst we were executing it,
 	 * then it'll be left unenqueued to avoid multiple threads trying to
 	 * execute it simultaneously
@@ -314,11 +319,16 @@ auto_requeue:
  * allowed to pick items to execute.  This ensures that very slow items won't
  * overly block ones that are just ordinarily slow.
  *
- * Returns 0 if successful, -EAGAIN if not.
+ * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
+ * attempted queued)
  */
 int slow_work_enqueue(struct slow_work *work)
 {
 	unsigned long flags;
+	int ret;
+
+	if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
+		return -ECANCELED;
 
 	BUG_ON(slow_work_user_count <= 0);
 	BUG_ON(!work);
@@ -335,6 +345,9 @@ int slow_work_enqueue(struct slow_work *work)
 	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
 		spin_lock_irqsave(&slow_work_queue_lock, flags);
 
+		if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
+			goto cancelled;
+
 		/* we promise that we will not attempt to execute the work
 		 * function in more than one thread simultaneously
 		 *
@@ -352,8 +365,9 @@ int slow_work_enqueue(struct slow_work *work)
 		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
 			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
 		} else {
-			if (slow_work_get_ref(work) < 0)
-				goto cant_get_ref;
+			ret = slow_work_get_ref(work);
+			if (ret < 0)
+				goto failed;
 			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
 				list_add_tail(&work->link, &vslow_work_queue);
 			else
@@ -365,12 +379,67 @@ int slow_work_enqueue(struct slow_work *work)
 	}
 	return 0;
 
-cant_get_ref:
+cancelled:
+	ret = -ECANCELED;
+failed:
 	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
-	return -EAGAIN;
+	return ret;
 }
 EXPORT_SYMBOL(slow_work_enqueue);
 
+static int slow_work_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+/**
+ * slow_work_cancel - Cancel a slow work item
+ * @work: The work item to cancel
+ *
+ * This function will cancel a previously enqueued work item. If we cannot
+ * cancel the work item, it is guarenteed to have run when this function
+ * returns.
+ */
+void slow_work_cancel(struct slow_work *work)
+{
+	bool wait = true, put = false;
+
+	set_bit(SLOW_WORK_CANCELLING, &work->flags);
+
+	spin_lock_irq(&slow_work_queue_lock);
+
+	if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
+	    !list_empty(&work->link)) {
+		/* the link in the pending queue holds a reference on the item
+		 * that we will need to release */
+		list_del_init(&work->link);
+		wait = false;
+		put = true;
+		clear_bit(SLOW_WORK_PENDING, &work->flags);
+
+	} else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
+		/* the executor is holding our only reference on the item, so
+		 * we merely need to wait for it to finish executing */
+		clear_bit(SLOW_WORK_PENDING, &work->flags);
+	}
+
+	spin_unlock_irq(&slow_work_queue_lock);
+
+	/* the EXECUTING flag is set by the executor whilst the spinlock is set
+	 * and before the item is dequeued - so assuming the above doesn't
+	 * actually dequeue it, simply waiting for the EXECUTING flag to be
+	 * released here should be sufficient */
+	if (wait)
+		wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
+			    TASK_UNINTERRUPTIBLE);
+
+	clear_bit(SLOW_WORK_CANCELLING, &work->flags);
+	if (put)
+		slow_work_put_ref(work);
+}
+EXPORT_SYMBOL(slow_work_cancel);
+
 /*
  * Schedule a cull of the thread pool at some time in the near future
  */
-- 
cgit v1.2.3


From 6b8268b17a1ffc942bc72d7d00274e433d6b6719 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 19 Nov 2009 18:10:47 +0000
Subject: SLOW_WORK: Add delayed_slow_work support

This adds support for starting slow work with a delay, similar
to the functionality we have for workqueues.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt |  16 +++++-
 include/linux/slow-work.h   |  29 ++++++++++
 kernel/slow-work.c          | 129 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 171 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index 2e384bd4dead..a9d1b0ffdded 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long.
 Operations of both types may sleep during execution, thus tying up the thread
 loaned to it.
 
+A further class of work item is available, based on the slow work item class:
+
+ (*) Delayed slow work items.
+
+These are slow work items that have a timer to defer queueing of the item for
+a while.
+
 
 THREAD-TO-CLASS ALLOCATION
 --------------------------
@@ -93,6 +100,10 @@ Slow work items may then be set up by:
 
 	slow_work_init(&myitem, &myitem_ops);
 
+     or:
+
+	delayed_slow_work_init(&myitem, &myitem_ops);
+
      or:
 
 	vslow_work_init(&myitem, &myitem_ops);
@@ -104,7 +115,9 @@ A suitably set up work item can then be enqueued for processing:
 	int ret = slow_work_enqueue(&myitem);
 
 This will return a -ve error if the thread pool is unable to gain a reference
-on the item, 0 otherwise.
+on the item, 0 otherwise, or (for delayed work):
+
+	int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay);
 
 
 The items are reference counted, so there ought to be no need for a flush
@@ -112,6 +125,7 @@ operation.  But as the reference counting is optional, means to cancel
 existing work items are also included:
 
 	cancel_slow_work(&myitem);
+	cancel_delayed_slow_work(&myitem);
 
 can be used to cancel pending work.  The above cancel function waits for
 existing work to have been executed (or prevent execution of them, depending
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index eef20182d5b4..b245b9a9cc0b 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -17,6 +17,7 @@
 #ifdef CONFIG_SLOW_WORK
 
 #include <linux/sysctl.h>
+#include <linux/timer.h>
 
 struct slow_work;
 
@@ -52,10 +53,16 @@ struct slow_work {
 #define SLOW_WORK_ENQ_DEFERRED	2	/* item enqueue deferred */
 #define SLOW_WORK_VERY_SLOW	3	/* item is very slow */
 #define SLOW_WORK_CANCELLING	4	/* item is being cancelled, don't enqueue */
+#define SLOW_WORK_DELAYED	5	/* item is struct delayed_slow_work with active timer */
 	const struct slow_work_ops *ops; /* operations table for this item */
 	struct list_head	link;	/* link in queue */
 };
 
+struct delayed_slow_work {
+	struct slow_work	work;
+	struct timer_list	timer;
+};
+
 /**
  * slow_work_init - Initialise a slow work item
  * @work: The work item to initialise
@@ -71,6 +78,20 @@ static inline void slow_work_init(struct slow_work *work,
 	INIT_LIST_HEAD(&work->link);
 }
 
+/**
+ * slow_work_init - Initialise a delayed slow work item
+ * @work: The work item to initialise
+ * @ops: The operations to use to handle the slow work item
+ *
+ * Initialise a delayed slow work item.
+ */
+static inline void delayed_slow_work_init(struct delayed_slow_work *dwork,
+					  const struct slow_work_ops *ops)
+{
+	init_timer(&dwork->timer);
+	slow_work_init(&dwork->work, ops);
+}
+
 /**
  * vslow_work_init - Initialise a very slow work item
  * @work: The work item to initialise
@@ -93,6 +114,14 @@ extern void slow_work_cancel(struct slow_work *work);
 extern int slow_work_register_user(struct module *owner);
 extern void slow_work_unregister_user(struct module *owner);
 
+extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
+				     unsigned long delay);
+
+static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork)
+{
+	slow_work_cancel(&dwork->work);
+}
+
 #ifdef CONFIG_SYSCTL
 extern ctl_table slow_work_sysctls[];
 #endif
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 671cc434532a..f67e1daae93d 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -406,11 +406,40 @@ void slow_work_cancel(struct slow_work *work)
 	bool wait = true, put = false;
 
 	set_bit(SLOW_WORK_CANCELLING, &work->flags);
+	smp_mb();
+
+	/* if the work item is a delayed work item with an active timer, we
+	 * need to wait for the timer to finish _before_ getting the spinlock,
+	 * lest we deadlock against the timer routine
+	 *
+	 * the timer routine will leave DELAYED set if it notices the
+	 * CANCELLING flag in time
+	 */
+	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
+		struct delayed_slow_work *dwork =
+			container_of(work, struct delayed_slow_work, work);
+		del_timer_sync(&dwork->timer);
+	}
 
 	spin_lock_irq(&slow_work_queue_lock);
 
-	if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
-	    !list_empty(&work->link)) {
+	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
+		/* the timer routine aborted or never happened, so we are left
+		 * holding the timer's reference on the item and should just
+		 * drop the pending flag and wait for any ongoing execution to
+		 * finish */
+		struct delayed_slow_work *dwork =
+			container_of(work, struct delayed_slow_work, work);
+
+		BUG_ON(timer_pending(&dwork->timer));
+		BUG_ON(!list_empty(&work->link));
+
+		clear_bit(SLOW_WORK_DELAYED, &work->flags);
+		put = true;
+		clear_bit(SLOW_WORK_PENDING, &work->flags);
+
+	} else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
+		   !list_empty(&work->link)) {
 		/* the link in the pending queue holds a reference on the item
 		 * that we will need to release */
 		list_del_init(&work->link);
@@ -440,6 +469,102 @@ void slow_work_cancel(struct slow_work *work)
 }
 EXPORT_SYMBOL(slow_work_cancel);
 
+/*
+ * Handle expiry of the delay timer, indicating that a delayed slow work item
+ * should now be queued if not cancelled
+ */
+static void delayed_slow_work_timer(unsigned long data)
+{
+	struct slow_work *work = (struct slow_work *) data;
+	unsigned long flags;
+	bool queued = false, put = false;
+
+	spin_lock_irqsave(&slow_work_queue_lock, flags);
+	if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
+		clear_bit(SLOW_WORK_DELAYED, &work->flags);
+
+		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
+			/* we discard the reference the timer was holding in
+			 * favour of the one the executor holds */
+			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
+			put = true;
+		} else {
+			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
+				list_add_tail(&work->link, &vslow_work_queue);
+			else
+				list_add_tail(&work->link, &slow_work_queue);
+			queued = true;
+		}
+	}
+
+	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
+	if (put)
+		slow_work_put_ref(work);
+	if (queued)
+		wake_up(&slow_work_thread_wq);
+}
+
+/**
+ * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
+ * @dwork: The delayed work item to queue
+ * @delay: When to start executing the work, in jiffies from now
+ *
+ * This is similar to slow_work_enqueue(), but it adds a delay before the work
+ * is actually queued for processing.
+ *
+ * The item can have delayed processing requested on it whilst it is being
+ * executed.  The delay will begin immediately, and if it expires before the
+ * item finishes executing, the item will be placed back on the queue when it
+ * has done executing.
+ */
+int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
+			      unsigned long delay)
+{
+	struct slow_work *work = &dwork->work;
+	unsigned long flags;
+	int ret;
+
+	if (delay == 0)
+		return slow_work_enqueue(&dwork->work);
+
+	BUG_ON(slow_work_user_count <= 0);
+	BUG_ON(!work);
+	BUG_ON(!work->ops);
+
+	if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
+		return -ECANCELED;
+
+	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
+		spin_lock_irqsave(&slow_work_queue_lock, flags);
+
+		if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
+			goto cancelled;
+
+		/* the timer holds a reference whilst it is pending */
+		ret = work->ops->get_ref(work);
+		if (ret < 0)
+			goto cant_get_ref;
+
+		if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
+			BUG();
+		dwork->timer.expires = jiffies + delay;
+		dwork->timer.data = (unsigned long) work;
+		dwork->timer.function = delayed_slow_work_timer;
+		add_timer(&dwork->timer);
+
+		spin_unlock_irqrestore(&slow_work_queue_lock, flags);
+	}
+
+	return 0;
+
+cancelled:
+	ret = -ECANCELED;
+cant_get_ref:
+	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(delayed_slow_work_enqueue);
+
 /*
  * Schedule a cull of the thread pool at some time in the near future
  */
-- 
cgit v1.2.3


From 8fba10a42d191de612e60e7009c8f0313f90a9b3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:10:51 +0000
Subject: SLOW_WORK: Allow the work items to be viewed through a /proc file

Allow the executing and queued work items to be viewed through a /proc file
for debugging purposes.  The contents look something like the following:

    THR PID   ITEM ADDR        FL MARK  DESC
    === ===== ================ == ===== ==========
      0  3005 ffff880023f52348  a 952ms FSC: OBJ17d3: LOOK
      1  3006 ffff880024e33668  2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2
      2  3165 ffff8800296dd180  a 424ms FSC: OBJ17e4: LOOK
      3  4089 ffff8800262c8d78  a 212ms FSC: OBJ17ea: CRTN
      4  4090 ffff88002792bed8  2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2
      5  4092 ffff88002a0ef308  2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2
      6  4094 ffff88002abaf4b8  2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2
      7  4095 ffff88002bb188e0  a 388ms FSC: OBJ17e9: CRTN
    vsq     - ffff880023d99668  1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2
    vsq     - ffff8800295d1740  1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2
    vsq     - ffff880025ba3308  1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2
    vsq     - ffff880024ec83e0  1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2
    vsq     - ffff880026618e00  1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2
    vsq     - ffff880025a2a4b8  1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2
    vsq     - ffff880023cbe6d8  9 212ms FSC: OBJ17eb: LOOK
    vsq     - ffff880024d37590  9 212ms FSC: OBJ17ec: LOOK
    vsq     - ffff880027746cb0  9 212ms FSC: OBJ17ed: LOOK
    vsq     - ffff880024d37ae8  9 212ms FSC: OBJ17ee: LOOK
    vsq     - ffff880024d37cb0  9 212ms FSC: OBJ17ef: LOOK
    vsq     - ffff880025036550  9 212ms FSC: OBJ17f0: LOOK
    vsq     - ffff8800250368e0  9 212ms FSC: OBJ17f1: LOOK
    vsq     - ffff880025036aa8  9 212ms FSC: OBJ17f2: LOOK

In the 'THR' column, executing items show the thread they're occupying and
queued threads indicate which queue they're on.  'PID' shows the process ID of
a slow-work thread that's executing something.  'FL' shows the work item flags.
'MARK' indicates how long since an item was queued or began executing.  Lastly,
the 'DESC' column permits the owner of an item to give some information.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt |  60 +++++++++++-
 include/linux/slow-work.h   |  11 +++
 init/Kconfig                |  10 ++
 kernel/Makefile             |   1 +
 kernel/slow-work-proc.c     | 227 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/slow-work.c          |  44 ++++++---
 kernel/slow-work.h          |  72 ++++++++++++++
 7 files changed, 413 insertions(+), 12 deletions(-)
 create mode 100644 kernel/slow-work-proc.c
 create mode 100644 kernel/slow-work.h

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index a9d1b0ffdded..f120238e70fe 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -149,7 +149,8 @@ ITEM OPERATIONS
 ===============
 
 Each work item requires a table of operations of type struct slow_work_ops.
-Only ->execute() is required, getting and putting of a reference are optional.
+Only ->execute() is required; the getting and putting of a reference and the
+describing of an item are all optional.
 
  (*) Get a reference on an item:
 
@@ -179,6 +180,16 @@ Only ->execute() is required, getting and putting of a reference are optional.
      This should perform the work required of the item.  It may sleep, it may
      perform disk I/O and it may wait for locks.
 
+ (*) View an item through /proc:
+
+	void (*desc)(struct slow_work *work, struct seq_file *m);
+
+     If supplied, this should print to 'm' a small string describing the work
+     the item is to do.  This should be no more than about 40 characters, and
+     shouldn't include a newline character.
+
+     See the 'Viewing executing and queued items' section below.
+
 
 ==================
 POOL CONFIGURATION
@@ -203,3 +214,50 @@ The slow-work thread pool has a number of configurables:
      is bounded to between 1 and one fewer than the number of active threads.
      This ensures there is always at least one thread that can process very
      slow work items, and always at least one thread that won't.
+
+
+==================================
+VIEWING EXECUTING AND QUEUED ITEMS
+==================================
+
+If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available:
+
+	/proc/slow_work_rq
+
+through which the list of work items being executed and the queues of items to
+be executed may be viewed.  The owner of a work item is given the chance to
+add some information of its own.
+
+The contents look something like the following:
+
+    THR PID   ITEM ADDR        FL MARK  DESC
+    === ===== ================ == ===== ==========
+      0  3005 ffff880023f52348  a 952ms FSC: OBJ17d3: LOOK
+      1  3006 ffff880024e33668  2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2
+      2  3165 ffff8800296dd180  a 424ms FSC: OBJ17e4: LOOK
+      3  4089 ffff8800262c8d78  a 212ms FSC: OBJ17ea: CRTN
+      4  4090 ffff88002792bed8  2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2
+      5  4092 ffff88002a0ef308  2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2
+      6  4094 ffff88002abaf4b8  2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2
+      7  4095 ffff88002bb188e0  a 388ms FSC: OBJ17e9: CRTN
+    vsq     - ffff880023d99668  1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2
+    vsq     - ffff8800295d1740  1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2
+    vsq     - ffff880025ba3308  1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2
+    vsq     - ffff880024ec83e0  1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2
+    vsq     - ffff880026618e00  1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2
+    vsq     - ffff880025a2a4b8  1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2
+    vsq     - ffff880023cbe6d8  9 212ms FSC: OBJ17eb: LOOK
+    vsq     - ffff880024d37590  9 212ms FSC: OBJ17ec: LOOK
+    vsq     - ffff880027746cb0  9 212ms FSC: OBJ17ed: LOOK
+    vsq     - ffff880024d37ae8  9 212ms FSC: OBJ17ee: LOOK
+    vsq     - ffff880024d37cb0  9 212ms FSC: OBJ17ef: LOOK
+    vsq     - ffff880025036550  9 212ms FSC: OBJ17f0: LOOK
+    vsq     - ffff8800250368e0  9 212ms FSC: OBJ17f1: LOOK
+    vsq     - ffff880025036aa8  9 212ms FSC: OBJ17f2: LOOK
+
+In the 'THR' column, executing items show the thread they're occupying and
+queued threads indicate which queue they're on.  'PID' shows the process ID of
+a slow-work thread that's executing something.  'FL' shows the work item flags.
+'MARK' indicates how long since an item was queued or began executing.  Lastly,
+the 'DESC' column permits the owner of an item to give some information.
+
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index b245b9a9cc0b..f41485145ed1 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -20,6 +20,9 @@
 #include <linux/timer.h>
 
 struct slow_work;
+#ifdef CONFIG_SLOW_WORK_PROC
+struct seq_file;
+#endif
 
 /*
  * The operations used to support slow work items
@@ -38,6 +41,11 @@ struct slow_work_ops {
 
 	/* execute a work item */
 	void (*execute)(struct slow_work *work);
+
+#ifdef CONFIG_SLOW_WORK_PROC
+	/* describe a work item for /proc */
+	void (*desc)(struct slow_work *work, struct seq_file *m);
+#endif
 };
 
 /*
@@ -56,6 +64,9 @@ struct slow_work {
 #define SLOW_WORK_DELAYED	5	/* item is struct delayed_slow_work with active timer */
 	const struct slow_work_ops *ops; /* operations table for this item */
 	struct list_head	link;	/* link in queue */
+#ifdef CONFIG_SLOW_WORK_PROC
+	struct timespec		mark;	/* jiffies at which queued or exec begun */
+#endif
 };
 
 struct delayed_slow_work {
diff --git a/init/Kconfig b/init/Kconfig
index 9e03ef8b311e..ab5c64801fe5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1098,6 +1098,16 @@ config SLOW_WORK
 
 	  See Documentation/slow-work.txt.
 
+config SLOW_WORK_PROC
+	bool "Slow work debugging through /proc"
+	default n
+	depends on SLOW_WORK && PROC_FS
+	help
+	  Display the contents of the slow work run queue through /proc,
+	  including items currently executing.
+
+	  See Documentation/slow-work.txt.
+
 endmenu		# General setup
 
 config HAVE_GENERIC_DMA_COHERENT
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..776ffed1556d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
+obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c
new file mode 100644
index 000000000000..3988032571f5
--- /dev/null
+++ b/kernel/slow-work-proc.c
@@ -0,0 +1,227 @@
+/* Slow work debugging
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/slow-work.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include "slow-work.h"
+
+#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
+#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
+#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
+
+void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
+{
+	seq_puts(m, "Slow-work: New thread");
+}
+
+/*
+ * Render the time mark field on a work item into a 5-char time with units plus
+ * a space
+ */
+static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
+{
+	struct timespec now, diff;
+
+	now = CURRENT_TIME;
+	diff = timespec_sub(now, work->mark);
+
+	if (diff.tv_sec < 0)
+		seq_puts(m, "  -ve ");
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
+		seq_printf(m, "%3luns ", diff.tv_nsec);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
+		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
+		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
+	else if (diff.tv_sec <= 1)
+		seq_puts(m, "   1s ");
+	else if (diff.tv_sec < 60)
+		seq_printf(m, "%4lus ", diff.tv_sec);
+	else if (diff.tv_sec < 60 * 60)
+		seq_printf(m, "%4lum ", diff.tv_sec / 60);
+	else if (diff.tv_sec < 60 * 60 * 24)
+		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
+	else
+		seq_puts(m, "exces ");
+}
+
+/*
+ * Describe a slow work item for /proc
+ */
+static int slow_work_runqueue_show(struct seq_file *m, void *v)
+{
+	struct slow_work *work;
+	struct list_head *p = v;
+	unsigned long id;
+
+	switch ((unsigned long) v) {
+	case 1:
+		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
+		return 0;
+	case 2:
+		seq_puts(m, "=== ===== ================ == ===== ==========\n");
+		return 0;
+
+	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
+		id = (unsigned long) v - 3;
+
+		read_lock(&slow_work_execs_lock);
+		work = slow_work_execs[id];
+		if (work) {
+			smp_read_barrier_depends();
+
+			seq_printf(m, "%3lu %5d %16p %2lx ",
+				   id, slow_work_pids[id], work, work->flags);
+			slow_work_print_mark(m, work);
+
+			if (work->ops->desc)
+				work->ops->desc(work, m);
+			seq_putc(m, '\n');
+		}
+		read_unlock(&slow_work_execs_lock);
+		return 0;
+
+	default:
+		work = list_entry(p, struct slow_work, link);
+		seq_printf(m, "%3s     - %16p %2lx ",
+			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
+			   work, work->flags);
+		slow_work_print_mark(m, work);
+
+		if (work->ops->desc)
+			work->ops->desc(work, m);
+		seq_putc(m, '\n');
+		return 0;
+	}
+}
+
+/*
+ * map the iterator to a work item
+ */
+static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *p;
+	unsigned long count, id;
+
+	switch (*_pos >> ITERATOR_SHIFT) {
+	case 0x0:
+		if (*_pos == 0)
+			*_pos = 1;
+		if (*_pos < 3)
+			return (void *)(unsigned long) *_pos;
+		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
+			for (id = *_pos - 3;
+			     id < SLOW_WORK_THREAD_LIMIT;
+			     id++, (*_pos)++)
+				if (slow_work_execs[id])
+					return (void *)(unsigned long) *_pos;
+		*_pos = 0x1UL << ITERATOR_SHIFT;
+
+	case 0x1:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &slow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+
+	case 0x2:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &vslow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
+{
+	spin_lock_irq(&slow_work_queue_lock);
+	return slow_work_runqueue_index(m, _pos);
+}
+
+/*
+ * move to the next line
+ */
+static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	struct list_head *p = v;
+	unsigned long selector = *_pos >> ITERATOR_SHIFT;
+
+	(*_pos)++;
+	switch (selector) {
+	case 0x0:
+		return slow_work_runqueue_index(m, _pos);
+
+	case 0x1:
+		if (*_pos >> ITERATOR_SHIFT == 0x1) {
+			p = p->next;
+			if (p != &slow_work_queue)
+				return p;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+		p = &vslow_work_queue;
+
+	case 0x2:
+		if (*_pos >> ITERATOR_SHIFT == 0x2) {
+			p = p->next;
+			if (p != &vslow_work_queue)
+				return p;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * clean up after reading
+ */
+static void slow_work_runqueue_stop(struct seq_file *m, void *v)
+{
+	spin_unlock_irq(&slow_work_queue_lock);
+}
+
+static const struct seq_operations slow_work_runqueue_ops = {
+	.start		= slow_work_runqueue_start,
+	.stop		= slow_work_runqueue_stop,
+	.next		= slow_work_runqueue_next,
+	.show		= slow_work_runqueue_show,
+};
+
+/*
+ * open "/proc/slow_work_rq" to list queue contents
+ */
+static int slow_work_runqueue_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &slow_work_runqueue_ops);
+}
+
+const struct file_operations slow_work_runqueue_fops = {
+	.owner		= THIS_MODULE,
+	.open		= slow_work_runqueue_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index f67e1daae93d..b763bc2d2670 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -16,13 +16,8 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/wait.h>
-
-#define SLOW_WORK_CULL_TIMEOUT (5 * HZ)	/* cull threads 5s after running out of
-					 * things to do */
-#define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after
-					 * OOM */
-
-#define SLOW_WORK_THREAD_LIMIT	255	/* abs maximum number of slow-work threads */
+#include <linux/proc_fs.h>
+#include "slow-work.h"
 
 static void slow_work_cull_timeout(unsigned long);
 static void slow_work_oom_timeout(unsigned long);
@@ -116,6 +111,15 @@ static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
 static DEFINE_MUTEX(slow_work_unreg_sync_lock);
 #endif
 
+/*
+ * Data for tracking currently executing items for indication through /proc
+ */
+#ifdef CONFIG_SLOW_WORK_PROC
+struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
+pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
+DEFINE_RWLOCK(slow_work_execs_lock);
+#endif
+
 /*
  * The queues of work items and the lock governing access to them.  These are
  * shared between all the CPUs.  It doesn't make sense to have per-CPU queues
@@ -124,9 +128,9 @@ static DEFINE_MUTEX(slow_work_unreg_sync_lock);
  * There are two queues of work items: one for slow work items, and one for
  * very slow work items.
  */
-static LIST_HEAD(slow_work_queue);
-static LIST_HEAD(vslow_work_queue);
-static DEFINE_SPINLOCK(slow_work_queue_lock);
+LIST_HEAD(slow_work_queue);
+LIST_HEAD(vslow_work_queue);
+DEFINE_SPINLOCK(slow_work_queue_lock);
 
 /*
  * The thread controls.  A variable used to signal to the threads that they
@@ -182,7 +186,7 @@ static unsigned slow_work_calc_vsmax(void)
  * Attempt to execute stuff queued on a slow thread.  Return true if we managed
  * it, false if there was nothing to do.
  */
-static bool slow_work_execute(int id)
+static noinline bool slow_work_execute(int id)
 {
 #ifdef CONFIG_MODULES
 	struct module *module;
@@ -227,6 +231,10 @@ static bool slow_work_execute(int id)
 	if (work)
 		slow_work_thread_processing[id] = work->owner;
 #endif
+	if (work) {
+		slow_work_mark_time(work);
+		slow_work_begin_exec(id, work);
+	}
 
 	spin_unlock_irq(&slow_work_queue_lock);
 
@@ -247,6 +255,8 @@ static bool slow_work_execute(int id)
 	/* wake up anyone waiting for this work to be complete */
 	wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
 
+	slow_work_end_exec(id, work);
+
 	/* if someone tried to enqueue the item whilst we were executing it,
 	 * then it'll be left unenqueued to avoid multiple threads trying to
 	 * execute it simultaneously
@@ -285,6 +295,7 @@ auto_requeue:
 	 * - we transfer our ref on the item back to the appropriate queue
 	 * - don't wake another thread up as we're awake already
 	 */
+	slow_work_mark_time(work);
 	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
 		list_add_tail(&work->link, &vslow_work_queue);
 	else
@@ -368,6 +379,7 @@ int slow_work_enqueue(struct slow_work *work)
 			ret = slow_work_get_ref(work);
 			if (ret < 0)
 				goto failed;
+			slow_work_mark_time(work);
 			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
 				list_add_tail(&work->link, &vslow_work_queue);
 			else
@@ -489,6 +501,7 @@ static void delayed_slow_work_timer(unsigned long data)
 			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
 			put = true;
 		} else {
+			slow_work_mark_time(work);
 			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
 				list_add_tail(&work->link, &vslow_work_queue);
 			else
@@ -627,6 +640,7 @@ static int slow_work_thread(void *_data)
 	id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
 	BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
 	__set_bit(id, slow_work_ids);
+	slow_work_set_thread_pid(id, current->pid);
 	spin_unlock_irq(&slow_work_queue_lock);
 
 	sprintf(current->comm, "kslowd%03u", id);
@@ -669,6 +683,7 @@ static int slow_work_thread(void *_data)
 	}
 
 	spin_lock_irq(&slow_work_queue_lock);
+	slow_work_set_thread_pid(id, 0);
 	__clear_bit(id, slow_work_ids);
 	spin_unlock_irq(&slow_work_queue_lock);
 
@@ -722,6 +737,9 @@ static void slow_work_new_thread_execute(struct slow_work *work)
 static const struct slow_work_ops slow_work_new_thread_ops = {
 	.owner		= THIS_MODULE,
 	.execute	= slow_work_new_thread_execute,
+#ifdef CONFIG_SLOW_WORK_PROC
+	.desc		= slow_work_new_thread_desc,
+#endif
 };
 
 /*
@@ -948,6 +966,10 @@ static int __init init_slow_work(void)
 #ifdef CONFIG_SYSCTL
 	if (slow_work_max_max_threads < nr_cpus * 2)
 		slow_work_max_max_threads = nr_cpus * 2;
+#endif
+#ifdef CONFIG_SLOW_WORK_PROC
+	proc_create("slow_work_rq", S_IFREG | 0400, NULL,
+		    &slow_work_runqueue_fops);
 #endif
 	return 0;
 }
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
new file mode 100644
index 000000000000..3c2f007f3ad6
--- /dev/null
+++ b/kernel/slow-work.h
@@ -0,0 +1,72 @@
+/* Slow work private definitions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define SLOW_WORK_CULL_TIMEOUT (5 * HZ)	/* cull threads 5s after running out of
+					 * things to do */
+#define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after
+					 * OOM */
+
+#define SLOW_WORK_THREAD_LIMIT	255	/* abs maximum number of slow-work threads */
+
+/*
+ * slow-work.c
+ */
+#ifdef CONFIG_SLOW_WORK_PROC
+extern struct slow_work *slow_work_execs[];
+extern pid_t slow_work_pids[];
+extern rwlock_t slow_work_execs_lock;
+#endif
+
+extern struct list_head slow_work_queue;
+extern struct list_head vslow_work_queue;
+extern spinlock_t slow_work_queue_lock;
+
+/*
+ * slow-work-proc.c
+ */
+#ifdef CONFIG_SLOW_WORK_PROC
+extern const struct file_operations slow_work_runqueue_fops;
+
+extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
+#endif
+
+/*
+ * Helper functions
+ */
+static inline void slow_work_set_thread_pid(int id, pid_t pid)
+{
+#ifdef CONFIG_SLOW_WORK_PROC
+	slow_work_pids[id] = pid;
+#endif
+}
+
+static inline void slow_work_mark_time(struct slow_work *work)
+{
+#ifdef CONFIG_SLOW_WORK_PROC
+	work->mark = CURRENT_TIME;
+#endif
+}
+
+static inline void slow_work_begin_exec(int id, struct slow_work *work)
+{
+#ifdef CONFIG_SLOW_WORK_PROC
+	slow_work_execs[id] = work;
+#endif
+}
+
+static inline void slow_work_end_exec(int id, struct slow_work *work)
+{
+#ifdef CONFIG_SLOW_WORK_PROC
+	write_lock(&slow_work_execs_lock);
+	slow_work_execs[id] = NULL;
+	write_unlock(&slow_work_execs_lock);
+#endif
+}
-- 
cgit v1.2.3


From 31ba99d304494cb28fa8671ccc769c5543e1165d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:10:53 +0000
Subject: SLOW_WORK: Allow the owner of a work item to determine if it is
 queued or not

Add a function (slow_work_is_queued()) to permit the owner of a work item to
determine if the item is queued or not.

The work item is counted as being queued if it is actually on the queue, not
just if it is pending.  If it is executing and pending, then it is not on the
queue, but will rather be put back on the queue when execution finishes.

This permits a caller to quickly work out if it may be able to put another,
dependent work item on the queue behind it, or whether it will have to wait
till that is finished.

This can be used by CacheFiles to work out whether the creation a new object
can be immediately deferred when it has to wait for an old object to be
deleted, or whether a wait must take place.  If a wait is necessary, then the
slow-work thread can otherwise get blocked, preventing the deletion from
taking place.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt | 15 +++++++++++++++
 include/linux/slow-work.h   | 19 +++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index f120238e70fe..0169c9d9dd16 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -144,6 +144,21 @@ from being taken away before it completes.  module should almost certainly be
 THIS_MODULE.
 
 
+================
+HELPER FUNCTIONS
+================
+
+The slow-work facility provides a function by which it can be determined
+whether or not an item is queued for later execution:
+
+	bool queued = slow_work_is_queued(struct slow_work *work);
+
+If it returns false, then the item is not on the queue (it may be executing
+with a requeue pending).  This can be used to work out whether an item on which
+another depends is on the queue, thus allowing a dependent item to be queued
+after it.
+
+
 ===============
 ITEM OPERATIONS
 ===============
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index f41485145ed1..bfd3ab4c8898 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -120,6 +120,25 @@ static inline void vslow_work_init(struct slow_work *work,
 	INIT_LIST_HEAD(&work->link);
 }
 
+/**
+ * slow_work_is_queued - Determine if a slow work item is on the work queue
+ * work: The work item to test
+ *
+ * Determine if the specified slow-work item is on the work queue.  This
+ * returns true if it is actually on the queue.
+ *
+ * If the item is executing and has been marked for requeue when execution
+ * finishes, then false will be returned.
+ *
+ * Anyone wishing to wait for completion of execution can wait on the
+ * SLOW_WORK_EXECUTING bit.
+ */
+static inline bool slow_work_is_queued(struct slow_work *work)
+{
+	unsigned long flags = work->flags;
+	return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING);
+}
+
 extern int slow_work_enqueue(struct slow_work *work);
 extern void slow_work_cancel(struct slow_work *work);
 extern int slow_work_register_user(struct module *owner);
-- 
cgit v1.2.3


From 3bde31a4ac225cb5805be02eff6eaaf7e0766ccd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:10:57 +0000
Subject: SLOW_WORK: Allow a requeueable work item to sleep till the thread is
 needed

Add a function to allow a requeueable work item to sleep till the thread
processing it is needed by the slow-work facility to perform other work.

Sometimes a work item can't progress immediately, but must wait for the
completion of another work item that's currently being processed by another
slow-work thread.

In some circumstances, the waiting item could instead - theoretically - put
itself back on the queue and yield its thread back to the slow-work facility,
thus waiting till it gets processing time again before attempting to progress.
This would allow other work items processing time on that thread.

However, this only works if there is something on the queue for it to queue
behind - otherwise it will just get a thread again immediately, and will end
up cycling between the queue and the thread, eating up valuable CPU time.

So, slow_work_sleep_till_thread_needed() is provided such that an item can put
itself on a wait queue that will wake it up when the event it is actually
interested in occurs, then call this function in lieu of calling schedule().

This function will then sleep until either the item's event occurs or another
work item appears on the queue.  If another work item is queued, but the
item's event hasn't occurred, then the work item should requeue itself and
yield the thread back to the slow-work facility by returning.

This can be used by CacheFiles for an object that is being created on one
thread to wait for an object being deleted on another thread where there is
nothing on the queue for the creation to go and wait behind.  As soon as an
item appears on the queue that could be given thread time instead, CacheFiles
can stick the creating object back on the queue and return to the slow-work
facility - assuming the object deletion didn't also complete.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/slow-work.txt | 44 +++++++++++++++++++++
 include/linux/slow-work.h   |  3 ++
 kernel/slow-work.c          | 94 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 132 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index 0169c9d9dd16..52bc31433723 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -158,6 +158,50 @@ with a requeue pending).  This can be used to work out whether an item on which
 another depends is on the queue, thus allowing a dependent item to be queued
 after it.
 
+If the above shows an item on which another depends not to be queued, then the
+owner of the dependent item might need to wait.  However, to avoid locking up
+the threads unnecessarily be sleeping in them, it can make sense under some
+circumstances to return the work item to the queue, thus deferring it until
+some other items have had a chance to make use of the yielded thread.
+
+To yield a thread and defer an item, the work function should simply enqueue
+the work item again and return.  However, this doesn't work if there's nothing
+actually on the queue, as the thread just vacated will jump straight back into
+the item's work function, thus busy waiting on a CPU.
+
+Instead, the item should use the thread to wait for the dependency to go away,
+but rather than using schedule() or schedule_timeout() to sleep, it should use
+the following function:
+
+	bool requeue = slow_work_sleep_till_thread_needed(
+			struct slow_work *work,
+			signed long *_timeout);
+
+This will add a second wait and then sleep, such that it will be woken up if
+either something appears on the queue that could usefully make use of the
+thread - and behind which this item can be queued, or if the event the caller
+set up to wait for happens.  True will be returned if something else appeared
+on the queue and this work function should perhaps return, of false if
+something else woke it up.  The timeout is as for schedule_timeout().
+
+For example:
+
+	wq = bit_waitqueue(&my_flags, MY_BIT);
+	init_wait(&wait);
+	requeue = false;
+	do {
+		prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+		if (!test_bit(MY_BIT, &my_flags))
+			break;
+		requeue = slow_work_sleep_till_thread_needed(&my_work,
+							     &timeout);
+	} while (timeout > 0 && !requeue);
+	finish_wait(wq, &wait);
+	if (!test_bit(MY_BIT, &my_flags)
+		goto do_my_thing;
+	if (requeue)
+		return; // to slow_work
+
 
 ===============
 ITEM OPERATIONS
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index bfd3ab4c8898..5035a2691739 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -152,6 +152,9 @@ static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork)
 	slow_work_cancel(&dwork->work);
 }
 
+extern bool slow_work_sleep_till_thread_needed(struct slow_work *work,
+					       signed long *_timeout);
+
 #ifdef CONFIG_SYSCTL
 extern ctl_table slow_work_sysctls[];
 #endif
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index b763bc2d2670..da94f3c101af 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -132,6 +132,15 @@ LIST_HEAD(slow_work_queue);
 LIST_HEAD(vslow_work_queue);
 DEFINE_SPINLOCK(slow_work_queue_lock);
 
+/*
+ * The following are two wait queues that get pinged when a work item is placed
+ * on an empty queue.  These allow work items that are hogging a thread by
+ * sleeping in a way that could be deferred to yield their thread and enqueue
+ * themselves.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation);
+static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation);
+
 /*
  * The thread controls.  A variable used to signal to the threads that they
  * should exit when the queue is empty, a waitqueue used by the threads to wait
@@ -305,6 +314,50 @@ auto_requeue:
 	return true;
 }
 
+/**
+ * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work
+ * work: The work item under execution that wants to sleep
+ * _timeout: Scheduler sleep timeout
+ *
+ * Allow a requeueable work item to sleep on a slow-work processor thread until
+ * that thread is needed to do some other work or the sleep is interrupted by
+ * some other event.
+ *
+ * The caller must set up a wake up event before calling this and must have set
+ * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
+ * condition before calling this function as no test is made here.
+ *
+ * False is returned if there is nothing on the queue; true is returned if the
+ * work item should be requeued
+ */
+bool slow_work_sleep_till_thread_needed(struct slow_work *work,
+					signed long *_timeout)
+{
+	wait_queue_head_t *wfo_wq;
+	struct list_head *queue;
+
+	DEFINE_WAIT(wait);
+
+	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
+		wfo_wq = &vslow_work_queue_waits_for_occupation;
+		queue = &vslow_work_queue;
+	} else {
+		wfo_wq = &slow_work_queue_waits_for_occupation;
+		queue = &slow_work_queue;
+	}
+
+	if (!list_empty(queue))
+		return true;
+
+	add_wait_queue_exclusive(wfo_wq, &wait);
+	if (list_empty(queue))
+		*_timeout = schedule_timeout(*_timeout);
+	finish_wait(wfo_wq, &wait);
+
+	return !list_empty(queue);
+}
+EXPORT_SYMBOL(slow_work_sleep_till_thread_needed);
+
 /**
  * slow_work_enqueue - Schedule a slow work item for processing
  * @work: The work item to queue
@@ -335,6 +388,8 @@ auto_requeue:
  */
 int slow_work_enqueue(struct slow_work *work)
 {
+	wait_queue_head_t *wfo_wq;
+	struct list_head *queue;
 	unsigned long flags;
 	int ret;
 
@@ -354,6 +409,14 @@ int slow_work_enqueue(struct slow_work *work)
 	 * maintaining our promise
 	 */
 	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
+		if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
+			wfo_wq = &vslow_work_queue_waits_for_occupation;
+			queue = &vslow_work_queue;
+		} else {
+			wfo_wq = &slow_work_queue_waits_for_occupation;
+			queue = &slow_work_queue;
+		}
+
 		spin_lock_irqsave(&slow_work_queue_lock, flags);
 
 		if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
@@ -380,11 +443,13 @@ int slow_work_enqueue(struct slow_work *work)
 			if (ret < 0)
 				goto failed;
 			slow_work_mark_time(work);
-			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
-				list_add_tail(&work->link, &vslow_work_queue);
-			else
-				list_add_tail(&work->link, &slow_work_queue);
+			list_add_tail(&work->link, queue);
 			wake_up(&slow_work_thread_wq);
+
+			/* if someone who could be requeued is sleeping on a
+			 * thread, then ask them to yield their thread */
+			if (work->link.prev == queue)
+				wake_up(wfo_wq);
 		}
 
 		spin_unlock_irqrestore(&slow_work_queue_lock, flags);
@@ -487,9 +552,19 @@ EXPORT_SYMBOL(slow_work_cancel);
  */
 static void delayed_slow_work_timer(unsigned long data)
 {
+	wait_queue_head_t *wfo_wq;
+	struct list_head *queue;
 	struct slow_work *work = (struct slow_work *) data;
 	unsigned long flags;
-	bool queued = false, put = false;
+	bool queued = false, put = false, first = false;
+
+	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
+		wfo_wq = &vslow_work_queue_waits_for_occupation;
+		queue = &vslow_work_queue;
+	} else {
+		wfo_wq = &slow_work_queue_waits_for_occupation;
+		queue = &slow_work_queue;
+	}
 
 	spin_lock_irqsave(&slow_work_queue_lock, flags);
 	if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
@@ -502,17 +577,18 @@ static void delayed_slow_work_timer(unsigned long data)
 			put = true;
 		} else {
 			slow_work_mark_time(work);
-			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
-				list_add_tail(&work->link, &vslow_work_queue);
-			else
-				list_add_tail(&work->link, &slow_work_queue);
+			list_add_tail(&work->link, queue);
 			queued = true;
+			if (work->link.prev == queue)
+				first = true;
 		}
 	}
 
 	spin_unlock_irqrestore(&slow_work_queue_lock, flags);
 	if (put)
 		slow_work_put_ref(work);
+	if (first)
+		wake_up(wfo_wq);
 	if (queued)
 		wake_up(&slow_work_thread_wq);
 }
-- 
cgit v1.2.3


From 440f0affe247e9990c8f8778f1861da4fd7d5e50 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:01 +0000
Subject: FS-Cache: Annotate slow-work runqueue proc lines for FS-Cache work
 items

Annotate slow-work runqueue proc lines for FS-Cache work items.  Objects
include the object ID and the state.  Operations include the object ID, the
operation ID and the operation type and state.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/object.c           | 41 ++++++++++++++++++++++++++++++++++++++++-
 fs/fscache/operation.c        | 29 +++++++++++++++++++++++++++++
 fs/fscache/page.c             | 27 ++++++++++++++++++++++-----
 include/linux/fscache-cache.h | 12 ++++++++++++
 4 files changed, 103 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index d236eb1d6f37..615b63dd9ecc 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,9 +14,10 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
+#include <linux/seq_file.h>
 #include "internal.h"
 
-const char *fscache_object_states[] = {
+const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
 	[FSCACHE_OBJECT_INIT]		= "OBJECT_INIT",
 	[FSCACHE_OBJECT_LOOKING_UP]	= "OBJECT_LOOKING_UP",
 	[FSCACHE_OBJECT_CREATING]	= "OBJECT_CREATING",
@@ -33,9 +34,28 @@ const char *fscache_object_states[] = {
 };
 EXPORT_SYMBOL(fscache_object_states);
 
+static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
+	[FSCACHE_OBJECT_INIT]		= "INIT",
+	[FSCACHE_OBJECT_LOOKING_UP]	= "LOOK",
+	[FSCACHE_OBJECT_CREATING]	= "CRTN",
+	[FSCACHE_OBJECT_AVAILABLE]	= "AVBL",
+	[FSCACHE_OBJECT_ACTIVE]		= "ACTV",
+	[FSCACHE_OBJECT_UPDATING]	= "UPDT",
+	[FSCACHE_OBJECT_DYING]		= "DYNG",
+	[FSCACHE_OBJECT_LC_DYING]	= "LCDY",
+	[FSCACHE_OBJECT_ABORT_INIT]	= "ABTI",
+	[FSCACHE_OBJECT_RELEASING]	= "RELS",
+	[FSCACHE_OBJECT_RECYCLING]	= "RCYC",
+	[FSCACHE_OBJECT_WITHDRAWING]	= "WTHD",
+	[FSCACHE_OBJECT_DEAD]		= "DEAD",
+};
+
 static void fscache_object_slow_work_put_ref(struct slow_work *);
 static int  fscache_object_slow_work_get_ref(struct slow_work *);
 static void fscache_object_slow_work_execute(struct slow_work *);
+#ifdef CONFIG_SLOW_WORK_PROC
+static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
+#endif
 static void fscache_initialise_object(struct fscache_object *);
 static void fscache_lookup_object(struct fscache_object *);
 static void fscache_object_available(struct fscache_object *);
@@ -49,6 +69,9 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
 	.get_ref	= fscache_object_slow_work_get_ref,
 	.put_ref	= fscache_object_slow_work_put_ref,
 	.execute	= fscache_object_slow_work_execute,
+#ifdef CONFIG_SLOW_WORK_PROC
+	.desc		= fscache_object_slow_work_desc,
+#endif
 };
 EXPORT_SYMBOL(fscache_object_slow_work_ops);
 
@@ -326,6 +349,22 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
 		fscache_enqueue_object(object);
 }
 
+/*
+ * describe an object for slow-work debugging
+ */
+#ifdef CONFIG_SLOW_WORK_PROC
+static void fscache_object_slow_work_desc(struct slow_work *work,
+					  struct seq_file *m)
+{
+	struct fscache_object *object =
+		container_of(work, struct fscache_object, work);
+
+	seq_printf(m, "FSC: OBJ%x: %s",
+		   object->debug_id,
+		   fscache_object_states_short[object->state]);
+}
+#endif
+
 /*
  * initialise an object
  * - check the specified object's parent to see if we can make use of it
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index f1a2857b2ff5..91bbe6f0377c 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -13,6 +13,7 @@
 
 #define FSCACHE_DEBUG_LEVEL OPERATION
 #include <linux/module.h>
+#include <linux/seq_file.h>
 #include "internal.h"
 
 atomic_t fscache_op_debug_id;
@@ -31,6 +32,8 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	_enter("{OBJ%x OP%x,%u}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	fscache_set_op_state(op, "EnQ");
+
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
@@ -67,6 +70,8 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
+	fscache_set_op_state(op, "Run");
+
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -87,6 +92,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
+	fscache_set_op_state(op, "SubmitX");
+
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -190,6 +197,8 @@ int fscache_submit_op(struct fscache_object *object,
 
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
+	fscache_set_op_state(op, "Submit");
+
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -298,6 +307,8 @@ void fscache_put_operation(struct fscache_operation *op)
 	if (!atomic_dec_and_test(&op->usage))
 		return;
 
+	fscache_set_op_state(op, "Put");
+
 	_debug("PUT OP");
 	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
 		BUG();
@@ -452,9 +463,27 @@ static void fscache_op_execute(struct slow_work *work)
 	_leave("");
 }
 
+/*
+ * describe an operation for slow-work debugging
+ */
+#ifdef CONFIG_SLOW_WORK_PROC
+static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
+{
+	struct fscache_operation *op =
+		container_of(work, struct fscache_operation, slow_work);
+
+	seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx",
+		   op->object->debug_id, op->debug_id,
+		   op->name, op->state, op->flags);
+}
+#endif
+
 const struct slow_work_ops fscache_op_slow_work_ops = {
 	.owner		= THIS_MODULE,
 	.get_ref	= fscache_op_get_ref,
 	.put_ref	= fscache_op_put_ref,
 	.execute	= fscache_op_execute,
+#ifdef CONFIG_SLOW_WORK_PROC
+	.desc		= fscache_op_desc,
+#endif
 };
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 2568e0eb644f..e8bbc395cef6 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -63,14 +63,19 @@ static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *p
 static void fscache_attr_changed_op(struct fscache_operation *op)
 {
 	struct fscache_object *object = op->object;
+	int ret;
 
 	_enter("{OBJ%x OP%x}", object->debug_id, op->debug_id);
 
 	fscache_stat(&fscache_n_attr_changed_calls);
 
-	if (fscache_object_is_active(object) &&
-	    object->cache->ops->attr_changed(object) < 0)
-		fscache_abort_object(object);
+	if (fscache_object_is_active(object)) {
+		fscache_set_op_state(op, "CallFS");
+		ret = object->cache->ops->attr_changed(object);
+		fscache_set_op_state(op, "Done");
+		if (ret < 0)
+			fscache_abort_object(object);
+	}
 
 	_leave("");
 }
@@ -99,6 +104,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 	fscache_operation_init(op, NULL);
 	fscache_operation_init_slow(op, fscache_attr_changed_op);
 	op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
+	fscache_set_op_name(op, "Attr");
 
 	spin_lock(&cookie->lock);
 
@@ -184,6 +190,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 	op->start_time	= jiffies;
 	INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
 	INIT_LIST_HEAD(&op->to_do);
+	fscache_set_op_name(&op->op, "Retr");
 	return op;
 }
 
@@ -257,6 +264,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
+	fscache_set_op_name(&op->op, "RetrRA1");
 
 	spin_lock(&cookie->lock);
 
@@ -369,6 +377,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
+	fscache_set_op_name(&op->op, "RetrRAN");
 
 	spin_lock(&cookie->lock);
 
@@ -461,6 +470,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
+	fscache_set_op_name(&op->op, "RetrAL1");
 
 	spin_lock(&cookie->lock);
 
@@ -529,6 +539,8 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
+	fscache_set_op_state(&op->op, "GetPage");
+
 	spin_lock(&cookie->lock);
 	spin_lock(&object->lock);
 
@@ -559,13 +571,17 @@ static void fscache_write_op(struct fscache_operation *_op)
 	spin_unlock(&cookie->lock);
 
 	if (page) {
+		fscache_set_op_state(&op->op, "Store");
 		ret = object->cache->ops->write_page(op, page);
+		fscache_set_op_state(&op->op, "EndWrite");
 		fscache_end_page_write(cookie, page);
 		page_cache_release(page);
-		if (ret < 0)
+		if (ret < 0) {
+			fscache_set_op_state(&op->op, "Abort");
 			fscache_abort_object(object);
-		else
+		} else {
 			fscache_enqueue_operation(&op->op);
+		}
 	}
 
 	_leave("");
@@ -634,6 +650,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	fscache_operation_init(&op->op, fscache_release_write_op);
 	fscache_operation_init_slow(&op->op, fscache_write_op);
 	op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
+	fscache_set_op_name(&op->op, "Write1");
 
 	ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
 	if (ret < 0)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 84d3532dd3ea..7a9847ccd192 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -102,6 +102,16 @@ struct fscache_operation {
 
 	/* operation releaser */
 	fscache_operation_release_t release;
+
+#ifdef CONFIG_SLOW_WORK_PROC
+	const char *name;		/* operation name */
+	const char *state;		/* operation state */
+#define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
+#define fscache_set_op_state(OP, S)	do { (OP)->state = (S); } while(0)
+#else
+#define fscache_set_op_name(OP, N)	do { } while(0)
+#define fscache_set_op_state(OP, S)	do { } while(0)
+#endif
 };
 
 extern atomic_t fscache_op_debug_id;
@@ -125,6 +135,7 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
+	fscache_set_op_state(op, "Init");
 }
 
 /**
@@ -337,6 +348,7 @@ struct fscache_object {
 		FSCACHE_OBJECT_RECYCLING,	/* retiring object */
 		FSCACHE_OBJECT_WITHDRAWING,	/* withdrawing object */
 		FSCACHE_OBJECT_DEAD,		/* object is now dead */
+		FSCACHE_OBJECT__NSTATES
 	} state;
 
 	int			debug_id;	/* debugging ID */
-- 
cgit v1.2.3


From 4fbf4291aa15926cd4fdca0ffe9122e89d0459db Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:04 +0000
Subject: FS-Cache: Allow the current state of all objects to be dumped

Allow the current state of all fscache objects to be dumped by doing:

	cat /proc/fs/fscache/objects

By default, all objects and all fields will be shown.  This can be restricted
by adding a suitable key to one of the caller's keyrings (such as the session
keyring):

	keyctl add user fscache:objlist "<restrictions>" @s

The <restrictions> are:

	K	Show hexdump of object key (don't show if not given)
	A	Show hexdump of object aux data (don't show if not given)

And paired restrictions:

	C	Show objects that have a cookie
	c	Show objects that don't have a cookie
	B	Show objects that are busy
	b	Show objects that aren't busy
	W	Show objects that have pending writes
	w	Show objects that don't have pending writes
	R	Show objects that have outstanding reads
	r	Show objects that don't have outstanding reads
	S	Show objects that have slow work queued
	s	Show objects that don't have slow work queued

If neither side of a restriction pair is given, then both are implied.  For
example:

	keyctl add user fscache:objlist KB @s

shows objects that are busy, and lists their object keys, but does not dump
their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
not implied.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt |  81 +++++
 fs/cachefiles/interface.c                     |   1 +
 fs/cachefiles/rdwr.c                          |   6 +-
 fs/fscache/Kconfig                            |   7 +
 fs/fscache/Makefile                           |   1 +
 fs/fscache/cache.c                            |   1 +
 fs/fscache/cookie.c                           |   2 +
 fs/fscache/internal.h                         |  13 +
 fs/fscache/object-list.c                      | 432 ++++++++++++++++++++++++++
 fs/fscache/object.c                           |   2 +-
 fs/fscache/operation.c                        |   3 +
 fs/fscache/page.c                             |   6 +
 fs/fscache/proc.c                             |  13 +
 include/linux/fscache-cache.h                 |  13 +
 14 files changed, 578 insertions(+), 3 deletions(-)
 create mode 100644 fs/fscache/object-list.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 9e94b9491d89..cac09e11ca30 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -299,6 +299,87 @@ proc files.
      jiffy range covered, and the SECS field the equivalent number of seconds.
 
 
+===========
+OBJECT LIST
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through:
+
+	/proc/fs/fscache/objects
+
+This will look something like:
+
+	[root@andromeda ~]# head /proc/fs/fscache/objects
+	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
+	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+	COLUMN	DESCRIPTION
+	=======	===============================================================
+	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
+	PARENT	Debugging ID of parent object
+	STAT	Object state
+	CHLDN	Number of child objects of this object
+	OPS	Number of outstanding operations on this object
+	OOP	Number of outstanding child object management operations
+	IPR
+	EX	Number of outstanding exclusive operations
+	READS	Number of outstanding read operations
+	EM	Object's event mask
+	EV	Events raised on this object
+	F	Object flags
+	S	Object slow-work work item flags
+
+and the second set of columns describe the object's cookie, if present:
+
+	COLUMN		DESCRIPTION
+	===============	=======================================================
+	NETFS_COOKIE_DEF Name of netfs cookie definition
+	TY		Cookie type (IX - index, DT - data, hex - special)
+	FL		Cookie flags
+	NETFS_DATA	Netfs private data stored in the cookie
+	OBJECT_KEY	Object key	} 1 column, with separating comma
+	AUX_DATA	Object aux data	} presence may be configured
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file.  Something like:
+
+		keyctl add user fscache:objlist <restrictions> @s
+
+where <restrictions> are a selection of the following letters:
+
+	K	Show hexdump of object key (don't show if not given)
+	A	Show hexdump of object aux data (don't show if not given)
+
+and the following paired letters:
+
+	C	Show objects that have a cookie
+	c	Show objects that don't have a cookie
+	B	Show objects that are busy
+	b	Show objects that aren't busy
+	W	Show objects that have pending writes
+	w	Show objects that don't have pending writes
+	R	Show objects that have outstanding reads
+	r	Show objects that don't have outstanding reads
+	S	Show objects that have slow work queued
+	s	Show objects that don't have slow work queued
+
+If neither side of a letter pair is given, then both are implied.  For example:
+
+	keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
 =========
 DEBUGGING
 =========
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 431accd475a7..dd7f852746cb 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -331,6 +331,7 @@ static void cachefiles_put_object(struct fscache_object *_object)
 		}
 
 		cache = object->fscache.cache;
+		fscache_object_destroy(&object->fscache);
 		kmem_cache_free(cachefiles_object_jar, object);
 		fscache_object_destroyed(cache);
 	}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a69787e7dd96..3304646dae84 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -333,7 +333,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
-	op->op.flags = FSCACHE_OP_FAST;
+	op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
+	op->op.flags |= FSCACHE_OP_FAST;
 	op->op.processor = cachefiles_read_copier;
 
 	pagevec_init(&pagevec, 0);
@@ -639,7 +640,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	pagevec_init(&pagevec, 0);
 
-	op->op.flags = FSCACHE_OP_FAST;
+	op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
+	op->op.flags |= FSCACHE_OP_FAST;
 	op->op.processor = cachefiles_read_copier;
 
 	INIT_LIST_HEAD(&backpages);
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 9bbb8ce7bea0..864dac20a242 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -54,3 +54,10 @@ config FSCACHE_DEBUG
 	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
 
 	  See Documentation/filesystems/caching/fscache.txt for more information.
+
+config FSCACHE_OBJECT_LIST
+	bool "Maintain global object list for debugging purposes"
+	depends on FSCACHE && PROC_FS
+	help
+	  Maintain a global list of active fscache objects that can be
+	  retrieved through /proc/fs/fscache/objects for debugging purposes
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
index 91571b95aacc..6d561531cb36 100644
--- a/fs/fscache/Makefile
+++ b/fs/fscache/Makefile
@@ -15,5 +15,6 @@ fscache-y := \
 fscache-$(CONFIG_PROC_FS) += proc.o
 fscache-$(CONFIG_FSCACHE_STATS) += stats.o
 fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o
+fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o
 
 obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index e21985bbb1fb..724384ef96de 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -263,6 +263,7 @@ int fscache_add_cache(struct fscache_cache *cache,
 	spin_lock(&cache->object_list_lock);
 	list_add_tail(&ifsdef->cache_link, &cache->object_list);
 	spin_unlock(&cache->object_list_lock);
+	fscache_objlist_add(ifsdef);
 
 	/* add the cache's netfs definition index object to the top level index
 	 * cookie as a known backing object */
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 72fd18f6c71f..9b5187328230 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -349,6 +349,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 	object->cookie = cookie;
 	atomic_inc(&cookie->usage);
 	hlist_add_head(&object->cookie_link, &cookie->backing_objects);
+
+	fscache_objlist_add(object);
 	ret = 0;
 
 cant_attach_object:
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 1c341304621f..fe02973a9516 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -88,10 +88,23 @@ extern int fscache_wait_bit_interruptible(void *);
 /*
  * object.c
  */
+extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5];
+
 extern void fscache_withdrawing_object(struct fscache_cache *,
 				       struct fscache_object *);
 extern void fscache_enqueue_object(struct fscache_object *);
 
+/*
+ * object-list.c
+ */
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+extern const struct file_operations fscache_objlist_fops;
+
+extern void fscache_objlist_add(struct fscache_object *);
+#else
+#define fscache_objlist_add(object) do {} while(0)
+#endif
+
 /*
  * operation.c
  */
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
new file mode 100644
index 000000000000..e590242fa41a
--- /dev/null
+++ b/fs/fscache/object-list.c
@@ -0,0 +1,432 @@
+/* Global fscache object list maintainer and viewer
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL COOKIE
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/key.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+static struct rb_root fscache_object_list;
+static DEFINE_RWLOCK(fscache_object_list_lock);
+
+struct fscache_objlist_data {
+	unsigned long	config;		/* display configuration */
+#define FSCACHE_OBJLIST_CONFIG_KEY	0x00000001	/* show object keys */
+#define FSCACHE_OBJLIST_CONFIG_AUX	0x00000002	/* show object auxdata */
+#define FSCACHE_OBJLIST_CONFIG_COOKIE	0x00000004	/* show objects with cookies */
+#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE	0x00000008	/* show objects without cookies */
+#define FSCACHE_OBJLIST_CONFIG_BUSY	0x00000010	/* show busy objects */
+#define FSCACHE_OBJLIST_CONFIG_IDLE	0x00000020	/* show idle objects */
+#define FSCACHE_OBJLIST_CONFIG_PENDWR	0x00000040	/* show objects with pending writes */
+#define FSCACHE_OBJLIST_CONFIG_NOPENDWR	0x00000080	/* show objects without pending writes */
+#define FSCACHE_OBJLIST_CONFIG_READS	0x00000100	/* show objects with active reads */
+#define FSCACHE_OBJLIST_CONFIG_NOREADS	0x00000200	/* show objects without active reads */
+#define FSCACHE_OBJLIST_CONFIG_EVENTS	0x00000400	/* show objects with events */
+#define FSCACHE_OBJLIST_CONFIG_NOEVENTS	0x00000800	/* show objects without no events */
+#define FSCACHE_OBJLIST_CONFIG_WORK	0x00001000	/* show objects with slow work */
+#define FSCACHE_OBJLIST_CONFIG_NOWORK	0x00002000	/* show objects without slow work */
+
+	u8		buf[512];	/* key and aux data buffer */
+};
+
+/*
+ * Add an object to the object list
+ * - we use the address of the fscache_object structure as the key into the
+ *   tree
+ */
+void fscache_objlist_add(struct fscache_object *obj)
+{
+	struct fscache_object *xobj;
+	struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL;
+
+	write_lock(&fscache_object_list_lock);
+
+	while (*p) {
+		parent = *p;
+		xobj = rb_entry(parent, struct fscache_object, objlist_link);
+
+		if (obj < xobj)
+			p = &(*p)->rb_left;
+		else if (obj > xobj)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&obj->objlist_link, parent, p);
+	rb_insert_color(&obj->objlist_link, &fscache_object_list);
+
+	write_unlock(&fscache_object_list_lock);
+}
+
+/**
+ * fscache_object_destroy - Note that a cache object is about to be destroyed
+ * @object: The object to be destroyed
+ *
+ * Note the imminent destruction and deallocation of a cache object record.
+ */
+void fscache_object_destroy(struct fscache_object *obj)
+{
+	write_lock(&fscache_object_list_lock);
+
+	BUG_ON(RB_EMPTY_ROOT(&fscache_object_list));
+	rb_erase(&obj->objlist_link, &fscache_object_list);
+
+	write_unlock(&fscache_object_list_lock);
+}
+EXPORT_SYMBOL(fscache_object_destroy);
+
+/*
+ * find the object in the tree on or after the specified index
+ */
+static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
+{
+	struct fscache_object *pobj, *obj, *minobj = NULL;
+	struct rb_node *p;
+	unsigned long pos;
+
+	if (*_pos >= (unsigned long) ERR_PTR(-ENOENT))
+		return NULL;
+	pos = *_pos;
+
+	/* banners (can't represent line 0 by pos 0 as that would involve
+	 * returning a NULL pointer) */
+	if (pos == 0)
+		return (struct fscache_object *) ++(*_pos);
+	if (pos < 3)
+		return (struct fscache_object *)pos;
+
+	pobj = (struct fscache_object *)pos;
+	p = fscache_object_list.rb_node;
+	while (p) {
+		obj = rb_entry(p, struct fscache_object, objlist_link);
+		if (pobj < obj) {
+			if (!minobj || minobj > obj)
+				minobj = obj;
+			p = p->rb_left;
+		} else if (pobj > obj) {
+			p = p->rb_right;
+		} else {
+			minobj = obj;
+			break;
+		}
+		obj = NULL;
+	}
+
+	if (!minobj)
+		*_pos = (unsigned long) ERR_PTR(-ENOENT);
+	else if (minobj != obj)
+		*_pos = (unsigned long) minobj;
+	return minobj;
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos)
+	__acquires(&fscache_object_list_lock)
+{
+	read_lock(&fscache_object_list_lock);
+	return fscache_objlist_lookup(_pos);
+}
+
+/*
+ * move to the next line
+ */
+static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	(*_pos)++;
+	return fscache_objlist_lookup(_pos);
+}
+
+/*
+ * clean up after reading
+ */
+static void fscache_objlist_stop(struct seq_file *m, void *v)
+	__releases(&fscache_object_list_lock)
+{
+	read_unlock(&fscache_object_list_lock);
+}
+
+/*
+ * display an object
+ */
+static int fscache_objlist_show(struct seq_file *m, void *v)
+{
+	struct fscache_objlist_data *data = m->private;
+	struct fscache_object *obj = v;
+	unsigned long config = data->config;
+	uint16_t keylen, auxlen;
+	char _type[3], *type;
+	bool no_cookie;
+	u8 *buf = data->buf, *p;
+
+	if ((unsigned long) v == 1) {
+		seq_puts(m, "OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS"
+			 " EM EV F S"
+			 " | NETFS_COOKIE_DEF TY FL NETFS_DATA");
+		if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
+			      FSCACHE_OBJLIST_CONFIG_AUX))
+			seq_puts(m, "       ");
+		if (config & FSCACHE_OBJLIST_CONFIG_KEY)
+			seq_puts(m, "OBJECT_KEY");
+		if ((config & (FSCACHE_OBJLIST_CONFIG_KEY |
+			       FSCACHE_OBJLIST_CONFIG_AUX)) ==
+		    (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX))
+			seq_puts(m, ", ");
+		if (config & FSCACHE_OBJLIST_CONFIG_AUX)
+			seq_puts(m, "AUX_DATA");
+		seq_puts(m, "\n");
+		return 0;
+	}
+
+	if ((unsigned long) v == 2) {
+		seq_puts(m, "======== ======== ==== ===== === === === == ====="
+			 " == == = ="
+			 " | ================ == == ================");
+		if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
+			      FSCACHE_OBJLIST_CONFIG_AUX))
+			seq_puts(m, " ================");
+		seq_puts(m, "\n");
+		return 0;
+	}
+
+	/* filter out any unwanted objects */
+#define FILTER(criterion, _yes, _no)					\
+	do {								\
+		unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes;	\
+		unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no;	\
+		if (criterion) {					\
+			if (!(config & yes))				\
+				return 0;				\
+		} else {						\
+			if (!(config & no))				\
+				return 0;				\
+		}							\
+	} while(0)
+
+	if (~config) {
+		FILTER(obj->cookie,
+		       COOKIE, NOCOOKIE);
+		FILTER(obj->state != FSCACHE_OBJECT_ACTIVE ||
+		       obj->n_ops != 0 ||
+		       obj->n_obj_ops != 0 ||
+		       obj->flags ||
+		       !list_empty(&obj->dependents),
+		       BUSY, IDLE);
+		FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags),
+		       PENDWR, NOPENDWR);
+		FILTER(atomic_read(&obj->n_reads),
+		       READS, NOREADS);
+		FILTER(obj->events & obj->event_mask,
+		       EVENTS, NOEVENTS);
+		FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW),
+		       WORK, NOWORK);
+	}
+
+	seq_printf(m,
+		   "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ",
+		   obj->debug_id,
+		   obj->parent ? obj->parent->debug_id : -1,
+		   fscache_object_states_short[obj->state],
+		   obj->n_children,
+		   obj->n_ops,
+		   obj->n_obj_ops,
+		   obj->n_in_progress,
+		   obj->n_exclusive,
+		   atomic_read(&obj->n_reads),
+		   obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
+		   obj->events,
+		   obj->flags,
+		   obj->work.flags);
+
+	no_cookie = true;
+	keylen = auxlen = 0;
+	if (obj->cookie) {
+		spin_lock(&obj->lock);
+		if (obj->cookie) {
+			switch (obj->cookie->def->type) {
+			case 0:
+				type = "IX";
+				break;
+			case 1:
+				type = "DT";
+				break;
+			default:
+				sprintf(_type, "%02u",
+					obj->cookie->def->type);
+				type = _type;
+				break;
+			}
+
+			seq_printf(m, "%-16s %s %2lx %16p",
+				   obj->cookie->def->name,
+				   type,
+				   obj->cookie->flags,
+				   obj->cookie->netfs_data);
+
+			if (obj->cookie->def->get_key &&
+			    config & FSCACHE_OBJLIST_CONFIG_KEY)
+				keylen = obj->cookie->def->get_key(
+					obj->cookie->netfs_data,
+					buf, 400);
+
+			if (obj->cookie->def->get_aux &&
+			    config & FSCACHE_OBJLIST_CONFIG_AUX)
+				auxlen = obj->cookie->def->get_aux(
+					obj->cookie->netfs_data,
+					buf + keylen, 512 - keylen);
+
+			no_cookie = false;
+		}
+		spin_unlock(&obj->lock);
+
+		if (!no_cookie && (keylen > 0 || auxlen > 0)) {
+			seq_printf(m, " ");
+			for (p = buf; keylen > 0; keylen--)
+				seq_printf(m, "%02x", *p++);
+			if (auxlen > 0) {
+				if (config & FSCACHE_OBJLIST_CONFIG_KEY)
+					seq_printf(m, ", ");
+				for (; auxlen > 0; auxlen--)
+					seq_printf(m, "%02x", *p++);
+			}
+		}
+	}
+
+	if (no_cookie)
+		seq_printf(m, "<no_cookie>\n");
+	else
+		seq_printf(m, "\n");
+	return 0;
+}
+
+static const struct seq_operations fscache_objlist_ops = {
+	.start		= fscache_objlist_start,
+	.stop		= fscache_objlist_stop,
+	.next		= fscache_objlist_next,
+	.show		= fscache_objlist_show,
+};
+
+/*
+ * get the configuration for filtering the list
+ */
+static void fscache_objlist_config(struct fscache_objlist_data *data)
+{
+#ifdef CONFIG_KEYS
+	struct user_key_payload *confkey;
+	unsigned long config;
+	struct key *key;
+	const char *buf;
+	int len;
+
+	key = request_key(&key_type_user, "fscache:objlist", NULL);
+	if (IS_ERR(key))
+		goto no_config;
+
+	config = 0;
+	rcu_read_lock();
+
+	confkey = key->payload.data;
+	buf = confkey->data;
+
+	for (len = confkey->datalen - 1; len >= 0; len--) {
+		switch (buf[len]) {
+		case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY;		break;
+		case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX;		break;
+		case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE;	break;
+		case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE;	break;
+		case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY;	break;
+		case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE;	break;
+		case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR;	break;
+		case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR;	break;
+		case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS;	break;
+		case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS;	break;
+		case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK;	break;
+		case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK;	break;
+		}
+	}
+
+	rcu_read_unlock();
+	key_put(key);
+
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE;
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE;
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR;
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS;
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS;
+	if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK)))
+	    config   |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK;
+
+	data->config = config;
+	return;
+
+no_config:
+#endif
+	data->config = ULONG_MAX;
+}
+
+/*
+ * open "/proc/fs/fscache/objects" to provide a list of active objects
+ * - can be configured by a user-defined key added to the caller's keyrings
+ */
+static int fscache_objlist_open(struct inode *inode, struct file *file)
+{
+	struct fscache_objlist_data *data;
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &fscache_objlist_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+
+	/* buffer for key extraction */
+	data = kmalloc(sizeof(struct fscache_objlist_data), GFP_KERNEL);
+	if (!data) {
+		seq_release(inode, file);
+		return -ENOMEM;
+	}
+
+	/* get the configuration key */
+	fscache_objlist_config(data);
+
+	m->private = data;
+	return 0;
+}
+
+/*
+ * clean up on close
+ */
+static int fscache_objlist_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+
+	kfree(m->private);
+	m->private = NULL;
+	return seq_release(inode, file);
+}
+
+const struct file_operations fscache_objlist_fops = {
+	.owner		= THIS_MODULE,
+	.open		= fscache_objlist_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= fscache_objlist_release,
+};
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 615b63dd9ecc..ad1644f073bd 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -34,7 +34,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
 };
 EXPORT_SYMBOL(fscache_object_states);
 
-static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
+const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 	[FSCACHE_OBJECT_INIT]		= "INIT",
 	[FSCACHE_OBJECT_LOOKING_UP]	= "LOOK",
 	[FSCACHE_OBJECT_CREATING]	= "CRTN",
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 91bbe6f0377c..09e43b6e822f 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -322,6 +322,9 @@ void fscache_put_operation(struct fscache_operation *op)
 
 	object = op->object;
 
+	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags))
+		atomic_dec(&object->n_reads);
+
 	/* now... we may get called with the object spinlock held, so we
 	 * complete the cleanup here only if we can immediately acquire the
 	 * lock, and defer it otherwise */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index e8bbc395cef6..c5973e38ce39 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -275,6 +275,9 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 
 	ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
 
+	atomic_inc(&object->n_reads);
+	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+
 	if (fscache_submit_op(object, &op->op) < 0)
 		goto nobufs_unlock;
 	spin_unlock(&cookie->lock);
@@ -386,6 +389,9 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
 
+	atomic_inc(&object->n_reads);
+	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+
 	if (fscache_submit_op(object, &op->op) < 0)
 		goto nobufs_unlock;
 	spin_unlock(&cookie->lock);
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index beeab44bc31a..1d9e4951a597 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -37,10 +37,20 @@ int __init fscache_proc_init(void)
 		goto error_histogram;
 #endif
 
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+	if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL,
+			 &fscache_objlist_fops))
+		goto error_objects;
+#endif
+
 	_leave(" = 0");
 	return 0;
 
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+error_objects:
+#endif
 #ifdef CONFIG_FSCACHE_HISTOGRAM
+	remove_proc_entry("fs/fscache/histogram", NULL);
 error_histogram:
 #endif
 #ifdef CONFIG_FSCACHE_STATS
@@ -58,6 +68,9 @@ error_dir:
  */
 void fscache_proc_cleanup(void)
 {
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+	remove_proc_entry("fs/fscache/objects", NULL);
+#endif
 #ifdef CONFIG_FSCACHE_HISTOGRAM
 	remove_proc_entry("fs/fscache/histogram", NULL);
 #endif
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 7a9847ccd192..184cbdfbcc99 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -91,6 +91,8 @@ struct fscache_operation {
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
 #define FSCACHE_OP_DEAD		6	/* op is now dead */
+#define FSCACHE_OP_DEC_READ_CNT	7	/* decrement object->n_reads on destruction */
+#define FSCACHE_OP_KEEP_FLAGS	0xc0	/* flags to keep when repurposing an op */
 
 	atomic_t		usage;
 	unsigned		debug_id;	/* debugging ID */
@@ -357,6 +359,7 @@ struct fscache_object {
 	int			n_obj_ops;	/* number of object ops outstanding on object */
 	int			n_in_progress;	/* number of ops in progress */
 	int			n_exclusive;	/* number of exclusive ops queued */
+	atomic_t		n_reads;	/* number of read ops in progress */
 	spinlock_t		lock;		/* state and operations lock */
 
 	unsigned long		lookup_jif;	/* time at which lookup started */
@@ -370,6 +373,7 @@ struct fscache_object {
 #define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
 #define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
 #define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
+#define FSCACHE_OBJECT_EVENTS_MASK	0x7f	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
@@ -385,6 +389,9 @@ struct fscache_object {
 	struct list_head	dependents;	/* FIFO of dependent objects */
 	struct list_head	dep_link;	/* link in parent's dependents list */
 	struct list_head	pending_ops;	/* unstarted operations on this object */
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+	struct rb_node		objlist_link;	/* link in global object list */
+#endif
 	pgoff_t			store_limit;	/* current storage limit */
 };
 
@@ -434,6 +441,12 @@ void fscache_object_init(struct fscache_object *object,
 extern void fscache_object_lookup_negative(struct fscache_object *object);
 extern void fscache_obtained_object(struct fscache_object *object);
 
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+extern void fscache_object_destroy(struct fscache_object *object);
+#else
+#define fscache_object_destroy(object) do {} while(0)
+#endif
+
 /**
  * fscache_object_destroyed - Note destruction of an object in a cache
  * @cache: The cache from which the object came
-- 
cgit v1.2.3


From 1bccf513ac49d44604ba1cddcc29f5886e70f1b6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:25 +0000
Subject: FS-Cache: Fix lock misorder in fscache_write_op()

FS-Cache has two structs internally for keeping track of the internal state of
a cached file: the fscache_cookie struct, which represents the netfs's state,
and fscache_object struct, which represents the cache's state.  Each has a
pointer that points to the other (when both are in existence), and each has a
spinlock for pointer maintenance.

Since netfs operations approach these structures from the cookie side, they get
the cookie lock first, then the object lock.  Cache operations, on the other
hand, approach from the object side, and get the object lock first.  It is not
then permitted for a cache operation to get the cookie lock whilst it is
holding the object lock lest deadlock occur; instead, it must do one of two
things:

 (1) increment the cookie usage counter, drop the object lock and then get both
     locks in order, or

 (2) simply hold the object lock as certain parts of the cookie may not be
     altered whilst the object lock is held.

It is also not permitted to follow either pointer without holding the lock at
the end you start with.  To break the pointers between the cookie and the
object, both locks must be held.

fscache_write_op(), however, violates the locking rules: It attempts to get the
cookie lock without (a) checking that the cookie pointer is a valid pointer,
and (b) holding the object lock to protect the cookie pointer whilst it follows
it.  This is so that it can access the pending page store tree without
interference from __fscache_write_page().

This is fixed by splitting the cookie lock, such that the page store tracking
tree is protected by its own lock, and checking that the cookie pointer is
non-NULL before we attempt to follow it whilst holding the object lock.

The new lock is subordinate to both the cookie lock and the object lock, and so
should be taken after those.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt |  3 ++
 fs/fscache/cookie.c                           |  1 +
 fs/fscache/internal.h                         |  4 +++
 fs/fscache/page.c                             | 52 ++++++++++++++++++---------
 fs/fscache/stats.c                            | 10 ++++--
 include/linux/fscache-cache.h                 |  1 +
 6 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 0a77868f4977..9cf2cfbc81c9 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -269,6 +269,9 @@ proc files.
 		oom=N	Number of store reqs failed -ENOMEM
 		ops=N	Number of store reqs submitted
 		run=N	Number of store reqs granted CPU time
+		pgs=N	Number of pages given store req processing time
+		rxd=N	Number of store reqs deleted from tracking tree
+		olm=N	Number of store reqs over store limit
 	Ops	pend=N	Number of times async ops added to pending queues
 		run=N	Number of times async ops given CPU time
 		enq=N	Number of times async ops queued for processing
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index e6854f5222f5..f979659c1b3f 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -36,6 +36,7 @@ void fscache_cookie_init_once(void *_cookie)
 
 	memset(cookie, 0, sizeof(*cookie));
 	spin_lock_init(&cookie->lock);
+	spin_lock_init(&cookie->stores_lock);
 	INIT_HLIST_HEAD(&cookie->backing_objects);
 }
 
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 50324ad2b194..ba1853fa1ff9 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -17,6 +17,7 @@
  * - cache->object_list_lock
  * - object->lock
  * - object->parent->lock
+ * - cookie->stores_lock
  * - fscache_thread_lock
  *
  */
@@ -174,6 +175,9 @@ extern atomic_t fscache_n_stores_nobufs;
 extern atomic_t fscache_n_stores_oom;
 extern atomic_t fscache_n_store_ops;
 extern atomic_t fscache_n_store_calls;
+extern atomic_t fscache_n_store_pages;
+extern atomic_t fscache_n_store_radix_deletes;
+extern atomic_t fscache_n_store_pages_over_limit;
 
 extern atomic_t fscache_n_marks;
 extern atomic_t fscache_n_uncaches;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index e6f2e61133a1..3ea8897bc217 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -45,16 +45,26 @@ EXPORT_SYMBOL(__fscache_wait_on_page_write);
 /*
  * note that a page has finished being written to the cache
  */
-static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page)
+static void fscache_end_page_write(struct fscache_object *object,
+				   struct page *page)
 {
-	struct page *xpage;
+	struct fscache_cookie *cookie;
+	struct page *xpage = NULL;
 
-	spin_lock(&cookie->lock);
-	xpage = radix_tree_delete(&cookie->stores, page->index);
-	spin_unlock(&cookie->lock);
-	ASSERT(xpage != NULL);
-
-	wake_up_bit(&cookie->flags, 0);
+	spin_lock(&object->lock);
+	cookie = object->cookie;
+	if (cookie) {
+		/* delete the page from the tree if it is now no longer
+		 * pending */
+		spin_lock(&cookie->stores_lock);
+		fscache_stat(&fscache_n_store_radix_deletes);
+		xpage = radix_tree_delete(&cookie->stores, page->index);
+		spin_unlock(&cookie->stores_lock);
+		wake_up_bit(&cookie->flags, 0);
+	}
+	spin_unlock(&object->lock);
+	if (xpage)
+		page_cache_release(xpage);
 }
 
 /*
@@ -591,7 +601,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 	struct fscache_storage *op =
 		container_of(_op, struct fscache_storage, op);
 	struct fscache_object *object = op->op.object;
-	struct fscache_cookie *cookie = object->cookie;
+	struct fscache_cookie *cookie;
 	struct page *page;
 	unsigned n;
 	void *results[1];
@@ -601,16 +611,17 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	fscache_set_op_state(&op->op, "GetPage");
 
-	spin_lock(&cookie->lock);
 	spin_lock(&object->lock);
+	cookie = object->cookie;
 
-	if (!fscache_object_is_active(object)) {
+	if (!fscache_object_is_active(object) || !cookie) {
 		spin_unlock(&object->lock);
-		spin_unlock(&cookie->lock);
 		_leave("");
 		return;
 	}
 
+	spin_lock(&cookie->stores_lock);
+
 	fscache_stat(&fscache_n_store_calls);
 
 	/* find a page to store */
@@ -621,23 +632,25 @@ static void fscache_write_op(struct fscache_operation *_op)
 		goto superseded;
 	page = results[0];
 	_debug("gang %d [%lx]", n, page->index);
-	if (page->index > op->store_limit)
+	if (page->index > op->store_limit) {
+		fscache_stat(&fscache_n_store_pages_over_limit);
 		goto superseded;
+	}
 
 	radix_tree_tag_clear(&cookie->stores, page->index,
 			     FSCACHE_COOKIE_PENDING_TAG);
 
+	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
-	spin_unlock(&cookie->lock);
 
 	if (page) {
 		fscache_set_op_state(&op->op, "Store");
+		fscache_stat(&fscache_n_store_pages);
 		fscache_stat(&fscache_n_cop_write_page);
 		ret = object->cache->ops->write_page(op, page);
 		fscache_stat_d(&fscache_n_cop_write_page);
 		fscache_set_op_state(&op->op, "EndWrite");
-		fscache_end_page_write(cookie, page);
-		page_cache_release(page);
+		fscache_end_page_write(object, page);
 		if (ret < 0) {
 			fscache_set_op_state(&op->op, "Abort");
 			fscache_abort_object(object);
@@ -653,9 +666,9 @@ superseded:
 	/* this writer is going away and there aren't any more things to
 	 * write */
 	_debug("cease");
+	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
-	spin_unlock(&cookie->lock);
 	_leave("");
 }
 
@@ -731,6 +744,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	/* add the page to the pending-storage radix tree on the backing
 	 * object */
 	spin_lock(&object->lock);
+	spin_lock(&cookie->stores_lock);
 
 	_debug("store limit %llx", (unsigned long long) object->store_limit);
 
@@ -751,6 +765,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags))
 		goto already_pending;
 
+	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
 	op->op.debug_id	= atomic_inc_return(&fscache_op_debug_id);
@@ -772,6 +787,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 already_queued:
 	fscache_stat(&fscache_n_stores_again);
 already_pending:
+	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 	spin_unlock(&cookie->lock);
 	radix_tree_preload_end();
@@ -781,7 +797,9 @@ already_pending:
 	return 0;
 
 submit_failed:
+	spin_lock(&cookie->stores_lock);
 	radix_tree_delete(&cookie->stores, page->index);
+	spin_unlock(&cookie->stores_lock);
 	page_cache_release(page);
 	ret = -ENOBUFS;
 	goto nobufs;
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 4c07439d1307..1d53ea68409e 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -58,6 +58,9 @@ atomic_t fscache_n_stores_nobufs;
 atomic_t fscache_n_stores_oom;
 atomic_t fscache_n_store_ops;
 atomic_t fscache_n_store_calls;
+atomic_t fscache_n_store_pages;
+atomic_t fscache_n_store_radix_deletes;
+atomic_t fscache_n_store_pages_over_limit;
 
 atomic_t fscache_n_marks;
 atomic_t fscache_n_uncaches;
@@ -200,9 +203,12 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_stores_again),
 		   atomic_read(&fscache_n_stores_nobufs),
 		   atomic_read(&fscache_n_stores_oom));
-	seq_printf(m, "Stores : ops=%u run=%u\n",
+	seq_printf(m, "Stores : ops=%u run=%u pgs=%u rxd=%u olm=%u\n",
 		   atomic_read(&fscache_n_store_ops),
-		   atomic_read(&fscache_n_store_calls));
+		   atomic_read(&fscache_n_store_calls),
+		   atomic_read(&fscache_n_store_pages),
+		   atomic_read(&fscache_n_store_radix_deletes),
+		   atomic_read(&fscache_n_store_pages_over_limit));
 
 	seq_printf(m, "Ops    : pend=%u run=%u enq=%u can=%u\n",
 		   atomic_read(&fscache_n_op_pend),
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 184cbdfbcc99..f3aa4bdafef6 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -310,6 +310,7 @@ struct fscache_cookie {
 	atomic_t			usage;		/* number of users of this cookie */
 	atomic_t			n_children;	/* number of children of this cookie */
 	spinlock_t			lock;
+	spinlock_t			stores_lock;	/* lock on page store tree */
 	struct hlist_head		backing_objects; /* object(s) backing this file/index */
 	const struct fscache_cookie_def	*def;		/* definition */
 	struct fscache_cookie		*parent;	/* parent of this entry */
-- 
cgit v1.2.3


From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:35 +0000
Subject: FS-Cache: Handle pages pending storage that get evicted under OOM
 conditions

Handle netfs pages that the vmscan algorithm wants to evict from the pagecache
under OOM conditions, but that are waiting for write to the cache.  Under these
conditions, vmscan calls the releasepage() function of the netfs, asking if a
page can be discarded.

The problem is typified by the following trace of a stuck process:

	kslowd005     D 0000000000000000     0  4253      2 0x00000080
	 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007
	 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8
	 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8
	Call Trace:
	 [<ffffffffa00782d8>] __fscache_wait_on_page_write+0x8b/0xa7 [fscache]
	 [<ffffffff8104c0f1>] ? autoremove_wake_function+0x0/0x34
	 [<ffffffffa0078240>] ? __fscache_check_page_write+0x63/0x70 [fscache]
	 [<ffffffffa00b671d>] nfs_fscache_release_page+0x4e/0xc4 [nfs]
	 [<ffffffffa00927f0>] nfs_release_page+0x3c/0x41 [nfs]
	 [<ffffffff810885d3>] try_to_release_page+0x32/0x3b
	 [<ffffffff81093203>] shrink_page_list+0x316/0x4ac
	 [<ffffffff8109372b>] shrink_inactive_list+0x392/0x67c
	 [<ffffffff813532fa>] ? __mutex_unlock_slowpath+0x100/0x10b
	 [<ffffffff81058df0>] ? trace_hardirqs_on_caller+0x10c/0x130
	 [<ffffffff8135330e>] ? mutex_unlock+0x9/0xb
	 [<ffffffff81093aa2>] shrink_list+0x8d/0x8f
	 [<ffffffff81093d1c>] shrink_zone+0x278/0x33c
	 [<ffffffff81052d6c>] ? ktime_get_ts+0xad/0xba
	 [<ffffffff81094b13>] try_to_free_pages+0x22e/0x392
	 [<ffffffff81091e24>] ? isolate_pages_global+0x0/0x212
	 [<ffffffff8108e743>] __alloc_pages_nodemask+0x3dc/0x5cf
	 [<ffffffff81089529>] grab_cache_page_write_begin+0x65/0xaa
	 [<ffffffff8110f8c0>] ext3_write_begin+0x78/0x1eb
	 [<ffffffff81089ec5>] generic_file_buffered_write+0x109/0x28c
	 [<ffffffff8103cb69>] ? current_fs_time+0x22/0x29
	 [<ffffffff8108a509>] __generic_file_aio_write+0x350/0x385
	 [<ffffffff8108a588>] ? generic_file_aio_write+0x4a/0xae
	 [<ffffffff8108a59e>] generic_file_aio_write+0x60/0xae
	 [<ffffffff810b2e82>] do_sync_write+0xe3/0x120
	 [<ffffffff8104c0f1>] ? autoremove_wake_function+0x0/0x34
	 [<ffffffff810b18e1>] ? __dentry_open+0x1a5/0x2b8
	 [<ffffffff810b1a76>] ? dentry_open+0x82/0x89
	 [<ffffffffa00e693c>] cachefiles_write_page+0x298/0x335 [cachefiles]
	 [<ffffffffa0077147>] fscache_write_op+0x178/0x2c2 [fscache]
	 [<ffffffffa0075656>] fscache_op_execute+0x7a/0xd1 [fscache]
	 [<ffffffff81082093>] slow_work_execute+0x18f/0x2d1
	 [<ffffffff8108239a>] slow_work_thread+0x1c5/0x308
	 [<ffffffff8104c0f1>] ? autoremove_wake_function+0x0/0x34
	 [<ffffffff810821d5>] ? slow_work_thread+0x0/0x308
	 [<ffffffff8104be91>] kthread+0x7a/0x82
	 [<ffffffff8100beda>] child_rip+0xa/0x20
	 [<ffffffff8100b87c>] ? restore_args+0x0/0x30
	 [<ffffffff8102ef83>] ? tg_shares_up+0x171/0x227
	 [<ffffffff8104be17>] ? kthread+0x0/0x82
	 [<ffffffff8100bed0>] ? child_rip+0x0/0x20

In the above backtrace, the following is happening:

 (1) A page storage operation is being executed by a slow-work thread
     (fscache_write_op()).

 (2) FS-Cache farms the operation out to the cache to perform
     (cachefiles_write_page()).

 (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's
     standard write (do_sync_write()) under KERNEL_DS directly from the netfs
     page.

 (4) However, for Ext3 to perform the write, it must allocate some memory, in
     particular, it must allocate at least one page cache page into which it
     can copy the data from the netfs page.

 (5) Under OOM conditions, the memory allocator can't immediately come up with
     a page, so it uses vmscan to find something to discard
     (try_to_free_pages()).

 (6) vmscan finds a clean netfs page it might be able to discard (possibly the
     one it's trying to write out).

 (7) The netfs is called to throw the page away (nfs_release_page()) - but it's
     called with __GFP_WAIT, so the netfs decides to wait for the store to
     complete (__fscache_wait_on_page_write()).

 (8) This blocks a slow-work processing thread - possibly against itself.

The system ends up stuck because it can't write out any netfs pages to the
cache without allocating more memory.

To avoid this, we make FS-Cache cancel some writes that aren't in the middle of
actually being performed.  This means that some data won't make it into the
cache this time.  To support this, a new FS-Cache function is added
fscache_maybe_release_page() that replaces what the netfs releasepage()
functions used to do with respect to the cache.

The decisions fscache_maybe_release_page() makes are counted and displayed
through /proc/fs/fscache/stats on a line labelled "VmScan".  There are four
counters provided: "nos=N" - pages that weren't pending storage; "gon=N" -
pages that were pending storage when we first looked, but weren't by the time
we got the object lock; "bsy=N" - pages that we ignored as they were actively
being written when we looked; and "can=N" - pages that we cancelled the storage
of.

What I'd really like to do is alter the behaviour of the cancellation
heuristics, depending on how necessary it is to expel pages.  If there are
plenty of other pages that aren't waiting to be written to the cache that
could be ejected first, then it would be nice to hold up on immediate
cancellation of cache writes - but I don't see a way of doing that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt   |  4 ++
 Documentation/filesystems/caching/netfs-api.txt | 21 ++++++-
 fs/9p/cache.c                                   | 14 +----
 fs/afs/file.c                                   | 15 +----
 fs/fscache/internal.h                           |  5 ++
 fs/fscache/page.c                               | 79 ++++++++++++++++++++++++-
 fs/fscache/stats.c                              | 11 ++++
 fs/nfs/fscache.c                                | 10 +---
 include/linux/fscache-cache.h                   |  1 +
 include/linux/fscache.h                         | 27 +++++++++
 10 files changed, 152 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 057a3c71d524..7097fd29fb3d 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -272,6 +272,10 @@ proc files.
 		pgs=N	Number of pages given store req processing time
 		rxd=N	Number of store reqs deleted from tracking tree
 		olm=N	Number of store reqs over store limit
+	VmScan	nos=N	Number of release reqs against pages with no pending store
+		gon=N	Number of release reqs against pages stored by time lock granted
+		bsy=N	Number of release reqs ignored due to in-progress store
+		can=N	Number of page stores cancelled due to release req
 	Ops	pend=N	Number of times async ops added to pending queues
 		run=N	Number of times async ops given CPU time
 		enq=N	Number of times async ops queued for processing
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 2666b1ed5e9e..1902c57b72ef 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below).
 
 Furthermore, note that this does not cancel the asynchronous read or write
 operation started by the read/alloc and write functions, so the page
-invalidation and release functions must use:
+invalidation functions must use:
 
 	bool fscache_check_page_write(struct fscache_cookie *cookie,
 				      struct page *page);
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and:
 to wait for it to finish if it is.
 
 
+When releasepage() is being implemented, a special FS-Cache function exists to
+manage the heuristics of coping with vmscan trying to eject pages, which may
+conflict with the cache trying to write pages to the cache (which may itself
+need to allocate memory):
+
+	bool fscache_maybe_release_page(struct fscache_cookie *cookie,
+					struct page *page,
+					gfp_t gfp);
+
+This takes the netfs cookie, and the page and gfp arguments as supplied to
+releasepage().  It will return false if the page cannot be released yet for
+some reason and if it returns true, the page has been uncached and can now be
+released.
+
+To make a page available for release, this function may wait for an outstanding
+storage request to complete, or it may attempt to cancel the storage request -
+in which case the page will not be stored in the cache this time.
+
+
 ==========================
 INDEX AND DATA FILE UPDATE
 ==========================
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 51c94e26a346..bcc5357a9069 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -343,18 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
 
 	BUG_ON(!vcookie->fscache);
 
-	if (PageFsCache(page)) {
-		if (fscache_check_page_write(vcookie->fscache, page)) {
-			if (!(gfp & __GFP_WAIT))
-				return 0;
-			fscache_wait_on_page_write(vcookie->fscache, page);
-		}
-
-		fscache_uncache_page(vcookie->fscache, page);
-		ClearPageFsCache(page);
-	}
-
-	return 1;
+	return fscache_maybe_release_page(vnode->cache, page, gfp);
 }
 
 void __v9fs_fscache_invalidate_page(struct page *page)
@@ -368,7 +357,6 @@ void __v9fs_fscache_invalidate_page(struct page *page)
 		fscache_wait_on_page_write(vcookie->fscache, page);
 		BUG_ON(!PageLocked(page));
 		fscache_uncache_page(vcookie->fscache, page);
-		ClearPageFsCache(page);
 	}
 }
 
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 681c2a7b013f..39b301662f22 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -315,7 +315,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset)
 			struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 			fscache_wait_on_page_write(vnode->cache, page);
 			fscache_uncache_page(vnode->cache, page);
-			ClearPageFsCache(page);
 		}
 #endif
 
@@ -349,17 +348,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 	/* deny if page is being written to the cache and the caller hasn't
 	 * elected to wait */
 #ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page)) {
-		if (fscache_check_page_write(vnode->cache, page)) {
-			if (!(gfp_flags & __GFP_WAIT)) {
-				_leave(" = F [cache busy]");
-				return 0;
-			}
-			fscache_wait_on_page_write(vnode->cache, page);
-		}
-
-		fscache_uncache_page(vnode->cache, page);
-		ClearPageFsCache(page);
+	if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
+		_leave(" = F [cache busy]");
+		return 0;
 	}
 #endif
 
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index a0769872b19c..e5046519b153 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -180,6 +180,11 @@ extern atomic_t fscache_n_store_pages;
 extern atomic_t fscache_n_store_radix_deletes;
 extern atomic_t fscache_n_store_pages_over_limit;
 
+extern atomic_t fscache_n_store_vmscan_not_storing;
+extern atomic_t fscache_n_store_vmscan_gone;
+extern atomic_t fscache_n_store_vmscan_busy;
+extern atomic_t fscache_n_store_vmscan_cancelled;
+
 extern atomic_t fscache_n_marks;
 extern atomic_t fscache_n_uncaches;
 
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 022a5da8e130..fc76798bd968 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -42,6 +42,75 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa
 }
 EXPORT_SYMBOL(__fscache_wait_on_page_write);
 
+/*
+ * decide whether a page can be released, possibly by cancelling a store to it
+ * - we're allowed to sleep if __GFP_WAIT is flagged
+ */
+bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
+				  struct page *page,
+				  gfp_t gfp)
+{
+	struct page *xpage;
+	void *val;
+
+	_enter("%p,%p,%x", cookie, page, gfp);
+
+	rcu_read_lock();
+	val = radix_tree_lookup(&cookie->stores, page->index);
+	if (!val) {
+		rcu_read_unlock();
+		fscache_stat(&fscache_n_store_vmscan_not_storing);
+		__fscache_uncache_page(cookie, page);
+		return true;
+	}
+
+	/* see if the page is actually undergoing storage - if so we can't get
+	 * rid of it till the cache has finished with it */
+	if (radix_tree_tag_get(&cookie->stores, page->index,
+			       FSCACHE_COOKIE_STORING_TAG)) {
+		rcu_read_unlock();
+		goto page_busy;
+	}
+
+	/* the page is pending storage, so we attempt to cancel the store and
+	 * discard the store request so that the page can be reclaimed */
+	spin_lock(&cookie->stores_lock);
+	rcu_read_unlock();
+
+	if (radix_tree_tag_get(&cookie->stores, page->index,
+			       FSCACHE_COOKIE_STORING_TAG)) {
+		/* the page started to undergo storage whilst we were looking,
+		 * so now we can only wait or return */
+		spin_unlock(&cookie->stores_lock);
+		goto page_busy;
+	}
+
+	xpage = radix_tree_delete(&cookie->stores, page->index);
+	spin_unlock(&cookie->stores_lock);
+
+	if (xpage) {
+		fscache_stat(&fscache_n_store_vmscan_cancelled);
+		fscache_stat(&fscache_n_store_radix_deletes);
+		ASSERTCMP(xpage, ==, page);
+	} else {
+		fscache_stat(&fscache_n_store_vmscan_gone);
+	}
+
+	wake_up_bit(&cookie->flags, 0);
+	if (xpage)
+		page_cache_release(xpage);
+	__fscache_uncache_page(cookie, page);
+	return true;
+
+page_busy:
+	/* we might want to wait here, but that could deadlock the allocator as
+	 * the slow-work threads writing to the cache may all end up sleeping
+	 * on memory allocation */
+	fscache_stat(&fscache_n_store_vmscan_busy);
+	return false;
+}
+EXPORT_SYMBOL(__fscache_maybe_release_page);
+
 /*
  * note that a page has finished being written to the cache
  */
@@ -57,6 +126,8 @@ static void fscache_end_page_write(struct fscache_object *object,
 		/* delete the page from the tree if it is now no longer
 		 * pending */
 		spin_lock(&cookie->stores_lock);
+		radix_tree_tag_clear(&cookie->stores, page->index,
+				     FSCACHE_COOKIE_STORING_TAG);
 		if (!radix_tree_tag_get(&cookie->stores, page->index,
 					FSCACHE_COOKIE_PENDING_TAG)) {
 			fscache_stat(&fscache_n_store_radix_deletes);
@@ -640,8 +711,12 @@ static void fscache_write_op(struct fscache_operation *_op)
 		goto superseded;
 	}
 
-	radix_tree_tag_clear(&cookie->stores, page->index,
-			     FSCACHE_COOKIE_PENDING_TAG);
+	if (page) {
+		radix_tree_tag_set(&cookie->stores, page->index,
+				   FSCACHE_COOKIE_STORING_TAG);
+		radix_tree_tag_clear(&cookie->stores, page->index,
+				     FSCACHE_COOKIE_PENDING_TAG);
+	}
 
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 045ba396dbf2..cda69994e06d 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -63,6 +63,11 @@ atomic_t fscache_n_store_pages;
 atomic_t fscache_n_store_radix_deletes;
 atomic_t fscache_n_store_pages_over_limit;
 
+atomic_t fscache_n_store_vmscan_not_storing;
+atomic_t fscache_n_store_vmscan_gone;
+atomic_t fscache_n_store_vmscan_busy;
+atomic_t fscache_n_store_vmscan_cancelled;
+
 atomic_t fscache_n_marks;
 atomic_t fscache_n_uncaches;
 
@@ -211,6 +216,12 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_store_radix_deletes),
 		   atomic_read(&fscache_n_store_pages_over_limit));
 
+	seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n",
+		   atomic_read(&fscache_n_store_vmscan_not_storing),
+		   atomic_read(&fscache_n_store_vmscan_gone),
+		   atomic_read(&fscache_n_store_vmscan_busy),
+		   atomic_read(&fscache_n_store_vmscan_cancelled));
+
 	seq_printf(m, "Ops    : pend=%u run=%u enq=%u can=%u rej=%u\n",
 		   atomic_read(&fscache_n_op_pend),
 		   atomic_read(&fscache_n_op_run),
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 70fad69eb959..fa588006588d 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
 
 	BUG_ON(!cookie);
 
-	if (fscache_check_page_write(cookie, page)) {
-		if (!(gfp & __GFP_WAIT))
-			return 0;
-		fscache_wait_on_page_write(cookie, page);
-	}
-
 	if (PageFsCache(page)) {
 		dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
 			 cookie, page, nfsi);
 
-		fscache_uncache_page(cookie, page);
+		if (!fscache_maybe_release_page(cookie, page, gfp))
+			return 0;
+
 		nfs_add_fscache_stats(page->mapping->host,
 				      NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
 	}
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index f3aa4bdafef6..4750d5fb419f 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -317,6 +317,7 @@ struct fscache_cookie {
 	void				*netfs_data;	/* back pointer to netfs */
 	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
 #define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
+#define FSCACHE_COOKIE_STORING_TAG	1		/* pages tag: writing to cache */
 
 	unsigned long			flags;
 #define FSCACHE_COOKIE_LOOKING_UP	0	/* T if non-index cookie being looked up still */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 6d8ee466e0a0..595ce49288b7 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -202,6 +202,8 @@ extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t);
 extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
 extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
 extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
+extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *,
+					 gfp_t);
 
 /**
  * fscache_register_netfs - Register a filesystem as desiring caching services
@@ -615,4 +617,29 @@ void fscache_wait_on_page_write(struct fscache_cookie *cookie,
 		__fscache_wait_on_page_write(cookie, page);
 }
 
+/**
+ * fscache_maybe_release_page - Consider releasing a page, cancelling a store
+ * @cookie: The cookie representing the cache object
+ * @page: The netfs page that is being cached.
+ * @gfp: The gfp flags passed to releasepage()
+ *
+ * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's
+ * releasepage() call.  A storage request on the page may cancelled if it is
+ * not currently being processed.
+ *
+ * The function returns true if the page no longer has a storage request on it,
+ * and false if a storage request is left in place.  If true is returned, the
+ * page will have been passed to fscache_uncache_page().  If false is returned
+ * the page cannot be freed yet.
+ */
+static inline
+bool fscache_maybe_release_page(struct fscache_cookie *cookie,
+				struct page *page,
+				gfp_t gfp)
+{
+	if (fscache_cookie_valid(cookie) && PageFsCache(page))
+		return __fscache_maybe_release_page(cookie, page, gfp);
+	return false;
+}
+
 #endif /* _LINUX_FSCACHE_H */
-- 
cgit v1.2.3


From 60d543ca724be155c2b6166e36a00c80b21bd810 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:45 +0000
Subject: FS-Cache: Start processing an object's operations on that object's
 death

Start processing an object's operations when that object moves into the DYING
state as the object cannot be destroyed until all its outstanding operations
have completed.

Furthermore, make sure that read and allocation operations handle being woken
up on a dead object.  Such events are recorded in the Allocs.abt and
Retrvls.abt statistics as viewable through /proc/fs/fscache/stats.

The code for waiting for object activation for the read and allocation
operations is also extracted into its own function as it is much the same in
all cases, differing only in the stats incremented.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt |   2 +
 fs/fscache/internal.h                         |   5 ++
 fs/fscache/object.c                           |   1 +
 fs/fscache/page.c                             | 112 +++++++++++++-------------
 fs/fscache/stats.c                            |  12 ++-
 include/linux/fscache-cache.h                 |   4 +
 6 files changed, 75 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 7097fd29fb3d..3c23411956bb 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -253,6 +253,7 @@ proc files.
 		int=N	Number of alloc reqs aborted -ERESTARTSYS
 		ops=N	Number of alloc reqs submitted
 		owt=N	Number of alloc reqs waited for CPU time
+		abt=N	Number of alloc reqs aborted due to object death
 	Retrvls	n=N	Number of retrieval (read) requests seen
 		ok=N	Number of successful retr reqs
 		wt=N	Number of retr reqs that waited on lookup completion
@@ -262,6 +263,7 @@ proc files.
 		oom=N	Number of retr reqs failed -ENOMEM
 		ops=N	Number of retr reqs submitted
 		owt=N	Number of retr reqs waited for CPU time
+		abt=N	Number of retr reqs aborted due to object death
 	Stores	n=N	Number of storage (write) requests seen
 		ok=N	Number of successful store reqs
 		agn=N	Number of store reqs on a page already pending storage
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 2bf463d26080..5b49a373689b 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -156,6 +156,7 @@ extern atomic_t fscache_n_allocs_ok;
 extern atomic_t fscache_n_allocs_wait;
 extern atomic_t fscache_n_allocs_nobufs;
 extern atomic_t fscache_n_allocs_intr;
+extern atomic_t fscache_n_allocs_object_dead;
 extern atomic_t fscache_n_alloc_ops;
 extern atomic_t fscache_n_alloc_op_waits;
 
@@ -166,6 +167,7 @@ extern atomic_t fscache_n_retrievals_nodata;
 extern atomic_t fscache_n_retrievals_nobufs;
 extern atomic_t fscache_n_retrievals_intr;
 extern atomic_t fscache_n_retrievals_nomem;
+extern atomic_t fscache_n_retrievals_object_dead;
 extern atomic_t fscache_n_retrieval_ops;
 extern atomic_t fscache_n_retrieval_op_waits;
 
@@ -249,9 +251,12 @@ static inline void fscache_stat_d(atomic_t *stat)
 	atomic_dec(stat);
 }
 
+#define __fscache_stat(stat) (stat)
+
 extern const struct file_operations fscache_stats_fops;
 #else
 
+#define __fscache_stat(stat) (NULL)
 #define fscache_stat(stat) do {} while (0)
 #endif
 
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 74bc562a2cbc..c85c9f582166 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -201,6 +201,7 @@ static void fscache_object_state_machine(struct fscache_object *object)
 		}
 		spin_unlock(&object->lock);
 		fscache_enqueue_dependents(object);
+		fscache_start_operations(object);
 		goto terminal_transit;
 
 		/* handle an abort during initialisation */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index fc76798bd968..c598ea4c4e7d 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -313,6 +313,43 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 	return 0;
 }
 
+/*
+ * wait for an object to become active (or dead)
+ */
+static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
+						 struct fscache_retrieval *op,
+						 atomic_t *stat_op_waits,
+						 atomic_t *stat_object_dead)
+{
+	int ret;
+
+	if (!test_bit(FSCACHE_OP_WAITING, &op->op.flags))
+		goto check_if_dead;
+
+	_debug(">>> WT");
+	fscache_stat(stat_op_waits);
+	if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+			fscache_wait_bit_interruptible,
+			TASK_INTERRUPTIBLE) < 0) {
+		ret = fscache_cancel_op(&op->op);
+		if (ret == 0)
+			return -ERESTARTSYS;
+
+		/* it's been removed from the pending queue by another party,
+		 * so we should get to run shortly */
+		wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+			    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+	}
+	_debug("<<< GO");
+
+check_if_dead:
+	if (unlikely(fscache_object_is_dead(object))) {
+		fscache_stat(stat_object_dead);
+		return -ENOBUFS;
+	}
+	return 0;
+}
+
 /*
  * read a page from the cache or allocate a block in which to store it
  * - we return:
@@ -376,25 +413,12 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 
 	/* we wait for the operation to become active, and then process it
 	 * *here*, in this thread, and not in the thread pool */
-	if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
-		_debug(">>> WT");
-		fscache_stat(&fscache_n_retrieval_op_waits);
-		if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				fscache_wait_bit_interruptible,
-				TASK_INTERRUPTIBLE) < 0) {
-			ret = fscache_cancel_op(&op->op);
-			if (ret == 0) {
-				ret = -ERESTARTSYS;
-				goto error;
-			}
-
-			/* it's been removed from the pending queue by another
-			 * party, so we should get to run shortly */
-			wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-		}
-		_debug("<<< GO");
-	}
+	ret = fscache_wait_for_retrieval_activation(
+		object, op,
+		__fscache_stat(&fscache_n_retrieval_op_waits),
+		__fscache_stat(&fscache_n_retrievals_object_dead));
+	if (ret < 0)
+		goto error;
 
 	/* ask the cache to honour the operation */
 	if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
@@ -506,25 +530,12 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 
 	/* we wait for the operation to become active, and then process it
 	 * *here*, in this thread, and not in the thread pool */
-	if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
-		_debug(">>> WT");
-		fscache_stat(&fscache_n_retrieval_op_waits);
-		if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				fscache_wait_bit_interruptible,
-				TASK_INTERRUPTIBLE) < 0) {
-			ret = fscache_cancel_op(&op->op);
-			if (ret == 0) {
-				ret = -ERESTARTSYS;
-				goto error;
-			}
-
-			/* it's been removed from the pending queue by another
-			 * party, so we should get to run shortly */
-			wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-		}
-		_debug("<<< GO");
-	}
+	ret = fscache_wait_for_retrieval_activation(
+		object, op,
+		__fscache_stat(&fscache_n_retrieval_op_waits),
+		__fscache_stat(&fscache_n_retrievals_object_dead));
+	if (ret < 0)
+		goto error;
 
 	/* ask the cache to honour the operation */
 	if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
@@ -612,25 +623,12 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 
 	fscache_stat(&fscache_n_alloc_ops);
 
-	if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
-		_debug(">>> WT");
-		fscache_stat(&fscache_n_alloc_op_waits);
-		if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				fscache_wait_bit_interruptible,
-				TASK_INTERRUPTIBLE) < 0) {
-			ret = fscache_cancel_op(&op->op);
-			if (ret == 0) {
-				ret = -ERESTARTSYS;
-				goto error;
-			}
-
-			/* it's been removed from the pending queue by another
-			 * party, so we should get to run shortly */
-			wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
-				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-		}
-		_debug("<<< GO");
-	}
+	ret = fscache_wait_for_retrieval_activation(
+		object, op,
+		__fscache_stat(&fscache_n_alloc_op_waits),
+		__fscache_stat(&fscache_n_allocs_object_dead));
+	if (ret < 0)
+		goto error;
 
 	/* ask the cache to honour the operation */
 	fscache_stat(&fscache_n_cop_allocate_page);
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 9e15289eb5c1..05f77caf4a2d 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -39,6 +39,7 @@ atomic_t fscache_n_allocs_ok;
 atomic_t fscache_n_allocs_wait;
 atomic_t fscache_n_allocs_nobufs;
 atomic_t fscache_n_allocs_intr;
+atomic_t fscache_n_allocs_object_dead;
 atomic_t fscache_n_alloc_ops;
 atomic_t fscache_n_alloc_op_waits;
 
@@ -49,6 +50,7 @@ atomic_t fscache_n_retrievals_nodata;
 atomic_t fscache_n_retrievals_nobufs;
 atomic_t fscache_n_retrievals_intr;
 atomic_t fscache_n_retrievals_nomem;
+atomic_t fscache_n_retrievals_object_dead;
 atomic_t fscache_n_retrieval_ops;
 atomic_t fscache_n_retrieval_op_waits;
 
@@ -188,9 +190,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_allocs_wait),
 		   atomic_read(&fscache_n_allocs_nobufs),
 		   atomic_read(&fscache_n_allocs_intr));
-	seq_printf(m, "Allocs : ops=%u owt=%u\n",
+	seq_printf(m, "Allocs : ops=%u owt=%u abt=%u\n",
 		   atomic_read(&fscache_n_alloc_ops),
-		   atomic_read(&fscache_n_alloc_op_waits));
+		   atomic_read(&fscache_n_alloc_op_waits),
+		   atomic_read(&fscache_n_allocs_object_dead));
 
 	seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u"
 		   " int=%u oom=%u\n",
@@ -201,9 +204,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_retrievals_nobufs),
 		   atomic_read(&fscache_n_retrievals_intr),
 		   atomic_read(&fscache_n_retrievals_nomem));
-	seq_printf(m, "Retrvls: ops=%u owt=%u\n",
+	seq_printf(m, "Retrvls: ops=%u owt=%u abt=%u\n",
 		   atomic_read(&fscache_n_retrieval_ops),
-		   atomic_read(&fscache_n_retrieval_op_waits));
+		   atomic_read(&fscache_n_retrieval_op_waits),
+		   atomic_read(&fscache_n_retrievals_object_dead));
 
 	seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n",
 		   atomic_read(&fscache_n_stores),
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 4750d5fb419f..907bb56c5888 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -404,6 +404,10 @@ extern const char *fscache_object_states[];
 	 (obj)->state >= FSCACHE_OBJECT_AVAILABLE &&	      \
 	 (obj)->state < FSCACHE_OBJECT_DYING)
 
+#define fscache_object_is_dead(obj)				\
+	(test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) &&	\
+	 (obj)->state >= FSCACHE_OBJECT_DYING)
+
 extern const struct slow_work_ops fscache_object_slow_work_ops;
 
 /**
-- 
cgit v1.2.3


From a17754fb8c28af19cd70dcbec6d5b0773b94e0c1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:11:52 +0000
Subject: CacheFiles: Don't write a full page if there's only a partial page to
 cache

cachefiles_write_page() writes a full page to the backing file for the last
page of the netfs file, even if the netfs file's last page is only a partial
page.

This causes the EOF on the backing file to be extended beyond the EOF of the
netfs, and thus the backing file will be truncated by cachefiles_attr_changed()
called from cachefiles_lookup_object().

So we need to limit the write we make to the backing file on that last page
such that it doesn't push the EOF too far.

Also, if a backing file that has a partial page at the end is expanded, we
discard the partial page and refetch it on the basis that we then have a hole
in the file with invalid data, and should the power go out...  A better way to
deal with this could be to record a note that the partial page contains invalid
data until the correct data is written into it.

This isn't a problem for netfs's that discard the whole backing file if the
file size changes (such as NFS).

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/interface.c     | 20 +++++++++++++++++---
 fs/cachefiles/rdwr.c          | 23 +++++++++++++++++++----
 include/linux/fscache-cache.h |  3 +++
 3 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index dd7f852746cb..8e67abf05985 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -404,12 +404,26 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
 	if (oi_size == ni_size)
 		return 0;
 
-	newattrs.ia_size = ni_size;
-	newattrs.ia_valid = ATTR_SIZE;
-
 	cachefiles_begin_secure(cache, &saved_cred);
 	mutex_lock(&object->backer->d_inode->i_mutex);
+
+	/* if there's an extension to a partial page at the end of the backing
+	 * file, we need to discard the partial page so that we pick up new
+	 * data after it */
+	if (oi_size & ~PAGE_MASK && ni_size > oi_size) {
+		_debug("discard tail %llx", oi_size);
+		newattrs.ia_valid = ATTR_SIZE;
+		newattrs.ia_size = oi_size & PAGE_MASK;
+		ret = notify_change(object->backer, &newattrs);
+		if (ret < 0)
+			goto truncate_failed;
+	}
+
+	newattrs.ia_valid = ATTR_SIZE;
+	newattrs.ia_size = ni_size;
 	ret = notify_change(object->backer, &newattrs);
+
+truncate_failed:
 	mutex_unlock(&object->backer->d_inode->i_mutex);
 	cachefiles_end_secure(cache, saved_cred);
 
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 3304646dae84..5a84fd7109ad 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -803,7 +803,8 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 	struct cachefiles_cache *cache;
 	mm_segment_t old_fs;
 	struct file *file;
-	loff_t pos;
+	loff_t pos, eof;
+	size_t len;
 	void *data;
 	int ret;
 
@@ -837,15 +838,29 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 		ret = -EIO;
 		if (file->f_op->write) {
 			pos = (loff_t) page->index << PAGE_SHIFT;
+
+			/* we mustn't write more data than we have, so we have
+			 * to beware of a partial page at EOF */
+			eof = object->fscache.store_limit_l;
+			len = PAGE_SIZE;
+			if (eof & ~PAGE_MASK) {
+				ASSERTCMP(pos, <, eof);
+				if (eof - pos < PAGE_SIZE) {
+					_debug("cut short %llx to %llx",
+					       pos, eof);
+					len = eof - pos;
+					ASSERTCMP(pos + len, ==, eof);
+				}
+			}
+
 			data = kmap(page);
 			old_fs = get_fs();
 			set_fs(KERNEL_DS);
 			ret = file->f_op->write(
-				file, (const void __user *) data, PAGE_SIZE,
-				&pos);
+				file, (const void __user *) data, len, &pos);
 			set_fs(old_fs);
 			kunmap(page);
-			if (ret != PAGE_SIZE)
+			if (ret != len)
 				ret = -EIO;
 		}
 		fput(file);
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 907bb56c5888..5db50002f3b5 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -395,6 +395,7 @@ struct fscache_object {
 	struct rb_node		objlist_link;	/* link in global object list */
 #endif
 	pgoff_t			store_limit;	/* current storage limit */
+	loff_t			store_limit_l;	/* current storage limit */
 };
 
 extern const char *fscache_object_states[];
@@ -439,6 +440,7 @@ void fscache_object_init(struct fscache_object *object,
 	object->events = object->event_mask = 0;
 	object->flags = 0;
 	object->store_limit = 0;
+	object->store_limit_l = 0;
 	object->cache = cache;
 	object->cookie = cookie;
 	object->parent = NULL;
@@ -491,6 +493,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object)
 static inline
 void fscache_set_store_limit(struct fscache_object *object, loff_t i_size)
 {
+	object->store_limit_l = i_size;
 	object->store_limit = i_size >> PAGE_SHIFT;
 	if (i_size & ~PAGE_MASK)
 		object->store_limit++;
-- 
cgit v1.2.3


From fee096deb4f33897937b974cb2c5168bab7935be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Nov 2009 18:12:05 +0000
Subject: CacheFiles: Catch an overly long wait for an old active object

Catch an overly long wait for an old, dying active object when we want to
replace it with a new one.  The probability is that all the slow-work threads
are hogged, and the delete can't get a look in.

What we do instead is:

 (1) if there's nothing in the slow work queue, we sleep until either the dying
     object has finished dying or there is something in the slow work queue
     behind which we can queue our object.

 (2) if there is something in the slow work queue, we return ETIMEDOUT to
     fscache_lookup_object(), which then puts us back on the slow work queue,
     presumably behind the deletion that we're blocked by.  We are then
     deferred for a while until we work our way back through the queue -
     without blocking a slow-work thread unnecessarily.

A backtrace similar to the following may appear in the log without this patch:

	INFO: task kslowd004:5711 blocked for more than 120 seconds.
	"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
	kslowd004     D 0000000000000000     0  5711      2 0x00000080
	 ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000
	 ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8
	 000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8
	Call Trace:
	 [<ffffffff81058e21>] ? trace_hardirqs_on+0xd/0xf
	 [<ffffffffa011c4d8>] ? cachefiles_wait_bit+0x0/0xd [cachefiles]
	 [<ffffffffa011c4e1>] cachefiles_wait_bit+0x9/0xd [cachefiles]
	 [<ffffffff81353153>] __wait_on_bit+0x43/0x76
	 [<ffffffff8111ae39>] ? ext3_xattr_get+0x1ec/0x270
	 [<ffffffff813531ef>] out_of_line_wait_on_bit+0x69/0x74
	 [<ffffffffa011c4d8>] ? cachefiles_wait_bit+0x0/0xd [cachefiles]
	 [<ffffffff8104c125>] ? wake_bit_function+0x0/0x2e
	 [<ffffffffa011bc79>] cachefiles_mark_object_active+0x203/0x23b [cachefiles]
	 [<ffffffffa011c209>] cachefiles_walk_to_object+0x558/0x827 [cachefiles]
	 [<ffffffffa011a429>] cachefiles_lookup_object+0xac/0x12a [cachefiles]
	 [<ffffffffa00aa1e9>] fscache_lookup_object+0x1c7/0x214 [fscache]
	 [<ffffffffa00aafc5>] fscache_object_state_machine+0xa5/0x52d [fscache]
	 [<ffffffffa00ab4ac>] fscache_object_slow_work_execute+0x5f/0xa0 [fscache]
	 [<ffffffff81082093>] slow_work_execute+0x18f/0x2d1
	 [<ffffffff8108239a>] slow_work_thread+0x1c5/0x308
	 [<ffffffff8104c0f1>] ? autoremove_wake_function+0x0/0x34
	 [<ffffffff810821d5>] ? slow_work_thread+0x0/0x308
	 [<ffffffff8104be91>] kthread+0x7a/0x82
	 [<ffffffff8100beda>] child_rip+0xa/0x20
	 [<ffffffff8100b87c>] ? restore_args+0x0/0x30
	 [<ffffffff8104be17>] ? kthread+0x0/0x82
	 [<ffffffff8100bed0>] ? child_rip+0x0/0x20
	1 lock held by kslowd004/5711:
	 #0:  (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [<ffffffffa011be64>] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles]

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/fscache.txt |  1 +
 fs/cachefiles/interface.c                     |  6 +-
 fs/cachefiles/namei.c                         | 87 +++++++++++++++++++++------
 fs/fscache/internal.h                         |  1 +
 fs/fscache/object.c                           | 10 ++-
 fs/fscache/stats.c                            |  4 +-
 include/linux/fscache-cache.h                 |  6 +-
 7 files changed, 90 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 3c23411956bb..a91e2e2095b0 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -235,6 +235,7 @@ proc files.
 		neg=N	Number of negative lookups made
 		pos=N	Number of positive lookups made
 		crt=N	Number of objects created by lookup
+		tmo=N	Number of lookups timed out and requeued
 	Updates	n=N	Number of update cookie requests seen
 		nul=N	Number of upd reqs given a NULL parent
 		run=N	Number of upd reqs granted CPU time
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 8e67abf05985..9d3c426044ae 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -114,8 +114,9 @@ nomem_lookup_data:
 
 /*
  * attempt to look up the nominated node in this cache
+ * - return -ETIMEDOUT to be scheduled again
  */
-static void cachefiles_lookup_object(struct fscache_object *_object)
+static int cachefiles_lookup_object(struct fscache_object *_object)
 {
 	struct cachefiles_lookup_data *lookup_data;
 	struct cachefiles_object *parent, *object;
@@ -145,13 +146,14 @@ static void cachefiles_lookup_object(struct fscache_object *_object)
 	    object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
 		cachefiles_attr_changed(&object->fscache);
 
-	if (ret < 0) {
+	if (ret < 0 && ret != -ETIMEDOUT) {
 		printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n",
 		       ret);
 		fscache_object_lookup_error(&object->fscache);
 	}
 
 	_leave(" [%d]", ret);
+	return ret;
 }
 
 /*
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 00a0cda8f47a..14ac4806e291 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -21,12 +21,6 @@
 #include <linux/security.h>
 #include "internal.h"
 
-static int cachefiles_wait_bit(void *flags)
-{
-	schedule();
-	return 0;
-}
-
 #define CACHEFILES_KEYBUF_SIZE 512
 
 /*
@@ -100,8 +94,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
 /*
  * record the fact that an object is now active
  */
-static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
-					  struct cachefiles_object *object)
+static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
+					 struct cachefiles_object *object)
 {
 	struct cachefiles_object *xobject;
 	struct rb_node **_p, *_parent = NULL;
@@ -139,8 +133,8 @@ try_again:
 	rb_insert_color(&object->active_node, &cache->active_nodes);
 
 	write_unlock(&cache->active_lock);
-	_leave("");
-	return;
+	_leave(" = 0");
+	return 0;
 
 	/* an old object from a previous incarnation is hogging the slot - we
 	 * need to wait for it to be destroyed */
@@ -155,13 +149,64 @@ wait_for_old_object:
 	atomic_inc(&xobject->usage);
 	write_unlock(&cache->active_lock);
 
-	_debug(">>> wait");
-	wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE,
-		    cachefiles_wait_bit, TASK_UNINTERRUPTIBLE);
-	_debug("<<< waited");
+	if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
+		wait_queue_head_t *wq;
+
+		signed long timeout = 60 * HZ;
+		wait_queue_t wait;
+		bool requeue;
+
+		/* if the object we're waiting for is queued for processing,
+		 * then just put ourselves on the queue behind it */
+		if (slow_work_is_queued(&xobject->fscache.work)) {
+			_debug("queue OBJ%x behind OBJ%x immediately",
+			       object->fscache.debug_id,
+			       xobject->fscache.debug_id);
+			goto requeue;
+		}
+
+		/* otherwise we sleep until either the object we're waiting for
+		 * is done, or the slow-work facility wants the thread back to
+		 * do other work */
+		wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
+		init_wait(&wait);
+		requeue = false;
+		do {
+			prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+			if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
+				break;
+			requeue = slow_work_sleep_till_thread_needed(
+				&object->fscache.work, &timeout);
+		} while (timeout > 0 && !requeue);
+		finish_wait(wq, &wait);
+
+		if (requeue &&
+		    test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
+			_debug("queue OBJ%x behind OBJ%x after wait",
+			       object->fscache.debug_id,
+			       xobject->fscache.debug_id);
+			goto requeue;
+		}
+
+		if (timeout <= 0) {
+			printk(KERN_ERR "\n");
+			printk(KERN_ERR "CacheFiles: Error: Overlong"
+			       " wait for old active object to go away\n");
+			cachefiles_printk_object(object, xobject);
+			goto requeue;
+		}
+	}
+
+	ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
 
 	cache->cache.ops->put_object(&xobject->fscache);
 	goto try_again;
+
+requeue:
+	clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
+	cache->cache.ops->put_object(&xobject->fscache);
+	_leave(" = -ETIMEDOUT");
+	return -ETIMEDOUT;
 }
 
 /*
@@ -466,12 +511,15 @@ lookup_again:
 	}
 
 	/* note that we're now using this object */
-	cachefiles_mark_object_active(cache, object);
+	ret = cachefiles_mark_object_active(cache, object);
 
 	mutex_unlock(&dir->d_inode->i_mutex);
 	dput(dir);
 	dir = NULL;
 
+	if (ret == -ETIMEDOUT)
+		goto mark_active_timed_out;
+
 	_debug("=== OBTAINED_OBJECT ===");
 
 	if (object->new) {
@@ -515,6 +563,10 @@ create_error:
 		cachefiles_io_error(cache, "Create/mkdir failed");
 	goto error;
 
+mark_active_timed_out:
+	_debug("mark active timed out");
+	goto release_dentry;
+
 check_error:
 	_debug("check error %d", ret);
 	write_lock(&cache->active_lock);
@@ -522,7 +574,7 @@ check_error:
 	clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
 	wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
 	write_unlock(&cache->active_lock);
-
+release_dentry:
 	dput(object->dentry);
 	object->dentry = NULL;
 	goto error_out;
@@ -543,9 +595,6 @@ error:
 error_out2:
 	dput(dir);
 error_out:
-	if (ret == -ENOSPC)
-		ret = -ENOBUFS;
-
 	_leave(" = error %d", -ret);
 	return ret;
 }
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 5b49a373689b..0ca2566e038c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -215,6 +215,7 @@ extern atomic_t fscache_n_object_no_alloc;
 extern atomic_t fscache_n_object_lookups;
 extern atomic_t fscache_n_object_lookups_negative;
 extern atomic_t fscache_n_object_lookups_positive;
+extern atomic_t fscache_n_object_lookups_timed_out;
 extern atomic_t fscache_n_object_created;
 extern atomic_t fscache_n_object_avail;
 extern atomic_t fscache_n_object_dead;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index f3f952cf887e..e513ac599c8e 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -468,6 +468,7 @@ static void fscache_lookup_object(struct fscache_object *object)
 {
 	struct fscache_cookie *cookie = object->cookie;
 	struct fscache_object *parent;
+	int ret;
 
 	_enter("");
 
@@ -493,12 +494,19 @@ static void fscache_lookup_object(struct fscache_object *object)
 
 	fscache_stat(&fscache_n_object_lookups);
 	fscache_stat(&fscache_n_cop_lookup_object);
-	object->cache->ops->lookup_object(object);
+	ret = object->cache->ops->lookup_object(object);
 	fscache_stat_d(&fscache_n_cop_lookup_object);
 
 	if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events))
 		set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
 
+	if (ret == -ETIMEDOUT) {
+		/* probably stuck behind another object, so move this one to
+		 * the back of the queue */
+		fscache_stat(&fscache_n_object_lookups_timed_out);
+		set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+	}
+
 	_leave("");
 }
 
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 05f77caf4a2d..46435f3aae68 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -98,6 +98,7 @@ atomic_t fscache_n_object_no_alloc;
 atomic_t fscache_n_object_lookups;
 atomic_t fscache_n_object_lookups_negative;
 atomic_t fscache_n_object_lookups_positive;
+atomic_t fscache_n_object_lookups_timed_out;
 atomic_t fscache_n_object_created;
 atomic_t fscache_n_object_avail;
 atomic_t fscache_n_object_dead;
@@ -160,10 +161,11 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_acquires_nobufs),
 		   atomic_read(&fscache_n_acquires_oom));
 
-	seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n",
+	seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u tmo=%u\n",
 		   atomic_read(&fscache_n_object_lookups),
 		   atomic_read(&fscache_n_object_lookups_negative),
 		   atomic_read(&fscache_n_object_lookups_positive),
+		   atomic_read(&fscache_n_object_lookups_timed_out),
 		   atomic_read(&fscache_n_object_created));
 
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 5db50002f3b5..7be0c6fbe880 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -234,8 +234,10 @@ struct fscache_cache_ops {
 	struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
 					       struct fscache_cookie *cookie);
 
-	/* look up the object for a cookie */
-	void (*lookup_object)(struct fscache_object *object);
+	/* look up the object for a cookie
+	 * - return -ETIMEDOUT to be requeued
+	 */
+	int (*lookup_object)(struct fscache_object *object);
 
 	/* finished looking up */
 	void (*lookup_complete)(struct fscache_object *object);
-- 
cgit v1.2.3


From 308efab5e231d1510cd35931d87629bf5171caae Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 19 Nov 2009 13:30:36 +0000
Subject: vt: Fix use of "new" in a struct field

As this struct is exposed to user space and the API was added for this
release it's a bit of a pain for the C++ world and we still have time to
fix it. Rename the fields before we end up with that pain in an actual
release.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Reported-by: Olivier Goffart
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt_ioctl.c | 6 +++---
 include/linux/vt.h      | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index ed86d3bf249a..6aa10284104a 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -103,8 +103,8 @@ void vt_event_post(unsigned int event, unsigned int old, unsigned int new)
 		ve->event.event = event;
 		/* kernel view is consoles 0..n-1, user space view is
 		   console 1..n with 0 meaning current, so we must bias */
-		ve->event.old = old + 1;
-		ve->event.new = new + 1;
+		ve->event.oldev = old + 1;
+		ve->event.newev = new + 1;
 		wake = 1;
 		ve->done = 1;
 	}
@@ -186,7 +186,7 @@ int vt_waitactive(int n)
 		vt_event_wait(&vw);
 		if (vw.done == 0)
 			return -EINTR;
-	} while (vw.event.new != n);
+	} while (vw.event.newev != n);
 	return 0;
 }
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 7afca0d72139..7ffa11f06232 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -70,8 +70,8 @@ struct vt_event {
 #define VT_EVENT_UNBLANK	0x0004	/* Screen unblank */
 #define VT_EVENT_RESIZE		0x0008	/* Resize display */
 #define VT_MAX_EVENT		0x000F
-	unsigned int old;		/* Old console */
-	unsigned int new;		/* New console (if changing) */
+	unsigned int oldev;		/* Old console */
+	unsigned int newev;		/* New console (if changing) */
 	unsigned int pad[4];		/* Padding for expansion */
 };
 
-- 
cgit v1.2.3


From 4ced24c8973f79113444d1e00ee8bd9e74fbf43e Mon Sep 17 00:00:00 2001
From: Kevin Wells <kevin.wells@nxp.com>
Date: Thu, 12 Nov 2009 00:23:00 +0100
Subject: i2c: i2c-pnx: Made buf type unsigned to prevent sign extension

Made buf type unsigned to prevent sign extension

Signed-off-by: Kevin Wells <kevin.wells@nxp.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 include/linux/i2c-pnx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index f13255e06406..9eb07bbc6522 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -21,7 +21,7 @@ struct i2c_pnx_mif {
 	int			mode;		/* Interface mode */
 	struct completion	complete;	/* I/O completion */
 	struct timer_list	timer;		/* Timeout */
-	char *			buf;		/* Data buffer */
+	u8 *			buf;		/* Data buffer */
 	int			len;		/* Length of data buffer */
 };
 
-- 
cgit v1.2.3


From 6600b9dd8e0d4a60c610f216b78d992a598bc52a Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Mon, 9 Nov 2009 19:10:11 +0900
Subject: nilfs2: move definition of struct nilfs_btree_node

This is a trivial patch to expose struct nilfs_fs_btree_node.
The struct should be exposed outside of kernel, for it is disk format.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/btree.h         | 22 ----------------------
 include/linux/nilfs2_fs.h | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 0e72bbbc6b64..4b82d84ade75 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -33,28 +33,6 @@
 struct nilfs_btree;
 struct nilfs_btree_path;
 
-/**
- * struct nilfs_btree_node - B-tree node
- * @bn_flags: flags
- * @bn_level: level
- * @bn_nchildren: number of children
- * @bn_pad: padding
- */
-struct nilfs_btree_node {
-	__u8 bn_flags;
-	__u8 bn_level;
-	__le16 bn_nchildren;
-	__le32 bn_pad;
-};
-
-/* flags */
-#define NILFS_BTREE_NODE_ROOT	0x01
-
-/* level */
-#define NILFS_BTREE_LEVEL_DATA		0
-#define NILFS_BTREE_LEVEL_NODE_MIN	(NILFS_BTREE_LEVEL_DATA + 1)
-#define NILFS_BTREE_LEVEL_MAX		14
-
 /**
  * struct nilfs_btree - B-tree structure
  * @bt_bmap: bmap base structure
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index ce520402e840..72289d2bb341 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -402,6 +402,28 @@ struct nilfs_segment_summary {
 #define NILFS_SS_SYNDT  0x0008  /* includes data only updates */
 #define NILFS_SS_GC     0x0010  /* segment written for cleaner operation */
 
+/**
+ * struct nilfs_btree_node - B-tree node
+ * @bn_flags: flags
+ * @bn_level: level
+ * @bn_nchildren: number of children
+ * @bn_pad: padding
+ */
+struct nilfs_btree_node {
+	__u8 bn_flags;
+	__u8 bn_level;
+	__le16 bn_nchildren;
+	__le32 bn_pad;
+};
+
+/* flags */
+#define NILFS_BTREE_NODE_ROOT   0x01
+
+/* level */
+#define NILFS_BTREE_LEVEL_DATA          0
+#define NILFS_BTREE_LEVEL_NODE_MIN      (NILFS_BTREE_LEVEL_DATA + 1)
+#define NILFS_BTREE_LEVEL_MAX           14
+
 /**
  * struct nilfs_palloc_group_desc - block group descriptor
  * @pg_nfrees: number of free entries in block group
-- 
cgit v1.2.3


From 0234576d041b9b2cc7043691ea61d2c2ca597aaa Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 20 Nov 2009 03:28:01 +0900
Subject: nilfs2: add norecovery mount option

This adds "norecovery" mount option which disables temporal write
access to read-only mounts or snapshots during mount/recovery.
Without this option, write access will be even performed for those
types of mounts; the temporal write access is needed to mount root
file system read-only after an unclean shutdown.

This option will be helpful when user wants to prevent any write
access to the device.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Eric Sandeen <sandeen@redhat.com>
---
 Documentation/filesystems/nilfs2.txt |  4 ++++
 fs/nilfs2/super.c                    | 16 +++++++++++++++-
 fs/nilfs2/the_nilfs.c                | 20 ++++++++++++++++++--
 include/linux/nilfs2_fs.h            |  2 ++
 4 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index cbd877978c80..4949fcaa6b6a 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -70,6 +70,10 @@ order=strict		Apply strict in-order semantics that preserves sequence
 			blocks.  That means, it is guaranteed that no
 			overtaking of events occurs in the recovered file
 			system after a crash.
+norecovery		Disable recovery of the filesystem on mount.
+			This disables every write access on the device for
+			read-only mounts or snapshots.  This option will fail
+			for r/w mounts on an unclean volume.
 
 NILFS2 usage
 ============
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 990ead43a833..5403b3ef3a42 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -479,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",errors=panic");
 	if (nilfs_test_opt(sbi, STRICT_ORDER))
 		seq_printf(seq, ",order=strict");
+	if (nilfs_test_opt(sbi, NORECOVERY))
+		seq_printf(seq, ",norecovery");
 
 	return 0;
 }
@@ -547,7 +549,7 @@ static const struct export_operations nilfs_export_ops = {
 
 enum {
 	Opt_err_cont, Opt_err_panic, Opt_err_ro,
-	Opt_nobarrier, Opt_snapshot, Opt_order,
+	Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
 	Opt_err,
 };
 
@@ -558,6 +560,7 @@ static match_table_t tokens = {
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_snapshot, "cp=%u"},
 	{Opt_order, "order=%s"},
+	{Opt_norecovery, "norecovery"},
 	{Opt_err, NULL}
 };
 
@@ -608,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb)
 			sbi->s_snapshot_cno = option;
 			nilfs_set_opt(sbi, SNAPSHOT);
 			break;
+		case Opt_norecovery:
+			nilfs_set_opt(sbi, NORECOVERY);
+			break;
 		default:
 			printk(KERN_ERR
 			       "NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -863,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if (!nilfs_valid_fs(nilfs)) {
+		printk(KERN_WARNING "NILFS (device %s): couldn't "
+		       "remount because the filesystem is in an "
+		       "incomplete recovery state.\n", sb->s_id);
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		goto out;
 	if (*flags & MS_RDONLY) {
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 890a8d3886cf..6241e1722efc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -264,8 +264,14 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 	int valid_fs = nilfs_valid_fs(nilfs);
 	int err;
 
-	if (nilfs_loaded(nilfs))
-		return 0;
+	if (nilfs_loaded(nilfs)) {
+		if (valid_fs ||
+		    ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY)))
+			return 0;
+		printk(KERN_ERR "NILFS: the filesystem is in an incomplete "
+		       "recovery state.\n");
+		return -EINVAL;
+	}
 
 	if (!valid_fs) {
 		printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
@@ -295,6 +301,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 		goto skip_recovery;
 
 	if (s_flags & MS_RDONLY) {
+		if (nilfs_test_opt(sbi, NORECOVERY)) {
+			printk(KERN_INFO "NILFS: norecovery option specified. "
+			       "skipping roll-forward recovery\n");
+			goto skip_recovery;
+		}
 		if (really_read_only) {
 			printk(KERN_ERR "NILFS: write access "
 			       "unavailable, cannot proceed.\n");
@@ -302,6 +313,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 			goto failed_unload;
 		}
 		sbi->s_super->s_flags &= ~MS_RDONLY;
+	} else if (nilfs_test_opt(sbi, NORECOVERY)) {
+		printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
+		       "option was specified for a read/write mount\n");
+		err = -EINVAL;
+		goto failed_unload;
 	}
 
 	err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 72289d2bb341..3fe02cf8b65a 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -151,6 +151,8 @@ struct nilfs_super_root {
 #define NILFS_MOUNT_BARRIER		0x1000  /* Use block barriers */
 #define NILFS_MOUNT_STRICT_ORDER	0x2000  /* Apply strict in-order
 						   semantics also for data */
+#define NILFS_MOUNT_NORECOVERY		0x4000  /* Disable write access during
+						   mount-time recovery */
 
 
 /**
-- 
cgit v1.2.3


From fb141597550243b471f3bd526fe689aa3b74df25 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Sun, 8 Nov 2009 19:45:54 -0800
Subject: Input: ucb1400_ts - allow passing IRQ through platfrom_data

This patch allows UCB1400 to get IRQ GPIO from platform data. In case
platform_data are not supplied or the IRQ supplied in the platform_data
is negative, fall back to the old IRQ detection algorithm.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ucb1400_ts.c | 11 +++++++----
 drivers/mfd/ucb1400_core.c             |  7 +++++++
 include/linux/ucb1400.h                |  4 ++++
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 095f84b1f56e..89dcbe7b4b02 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -355,10 +355,13 @@ static int ucb1400_ts_probe(struct platform_device *dev)
 		goto err;
 	}
 
-	error = ucb1400_ts_detect_irq(ucb);
-	if (error) {
-		printk(KERN_ERR "UCB1400: IRQ probe failed\n");
-		goto err_free_devs;
+	/* Only in case the IRQ line wasn't supplied, try detecting it */
+	if (ucb->irq < 0) {
+		error = ucb1400_ts_detect_irq(ucb);
+		if (error) {
+			printk(KERN_ERR "UCB1400: IRQ probe failed\n");
+			goto err_free_devs;
+		}
 	}
 
 	init_waitqueue_head(&ucb->ts_wait);
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index fa294b6d600a..85fd9421be94 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -51,6 +51,7 @@ static int ucb1400_core_probe(struct device *dev)
 	struct ucb1400_ts ucb_ts;
 	struct ucb1400_gpio ucb_gpio;
 	struct snd_ac97 *ac97;
+	struct ucb1400_pdata *pdata = dev->platform_data;
 
 	memset(&ucb_ts, 0, sizeof(ucb_ts));
 	memset(&ucb_gpio, 0, sizeof(ucb_gpio));
@@ -88,6 +89,12 @@ static int ucb1400_core_probe(struct device *dev)
 
 	/* TOUCHSCREEN */
 	ucb_ts.ac97 = ac97;
+
+	if (pdata != NULL && pdata->irq >= 0)
+		ucb_ts.irq = pdata->irq;
+	else
+		ucb_ts.irq = -1;
+
 	ucb->ucb1400_ts = platform_device_alloc("ucb1400_ts", -1);
 	if (!ucb->ucb1400_ts) {
 		err = -ENOMEM;
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index adb44066680c..1b4790911052 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -110,6 +110,10 @@ struct ucb1400 {
 	struct platform_device	*ucb1400_gpio;
 };
 
+struct ucb1400_pdata {
+	int	irq;
+};
+
 static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg)
 {
 	return ac97->bus->ops->read(ac97, reg);
-- 
cgit v1.2.3


From dad725d089b94bce8bbc769b7471dcfba3fbda0e Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Fri, 13 Nov 2009 21:13:22 -0800
Subject: Input: input-polldev - add sysfs interface for controlling poll
 interval

Sysfs entry for reading and setting of the polling interval. If the
interval is set to 0, polling is stopped. Polling is restarted when
interval is changed to non-zero.

sysfs entries:
poll = current polling interval in msec (RW)
max = max allowed polling interval (RO)
min = min allowed polling interval (RO)

Minimum and maximum limit for interval can be set while setting up the
device.

Interval can be adjusted even if the input device is not currently open.

[dtor@mail.ru: add kernel doc markup for the new fields]
Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input-polldev.c | 111 +++++++++++++++++++++++++++++++++++++++---
 include/linux/input-polldev.h |  11 ++++-
 2 files changed, 115 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 910220c127cb..31874275fed0 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -56,14 +56,10 @@ static void input_polldev_stop_workqueue(void)
 	mutex_unlock(&polldev_mutex);
 }
 
-static void input_polled_device_work(struct work_struct *work)
+static void input_polldev_queue_work(struct input_polled_dev *dev)
 {
-	struct input_polled_dev *dev =
-		container_of(work, struct input_polled_dev, work.work);
 	unsigned long delay;
 
-	dev->poll(dev);
-
 	delay = msecs_to_jiffies(dev->poll_interval);
 	if (delay >= HZ)
 		delay = round_jiffies_relative(delay);
@@ -71,6 +67,15 @@ static void input_polled_device_work(struct work_struct *work)
 	queue_delayed_work(polldev_wq, &dev->work, delay);
 }
 
+static void input_polled_device_work(struct work_struct *work)
+{
+	struct input_polled_dev *dev =
+		container_of(work, struct input_polled_dev, work.work);
+
+	dev->poll(dev);
+	input_polldev_queue_work(dev);
+}
+
 static int input_open_polled_device(struct input_dev *input)
 {
 	struct input_polled_dev *dev = input_get_drvdata(input);
@@ -100,6 +105,83 @@ static void input_close_polled_device(struct input_dev *input)
 		dev->close(dev);
 }
 
+/* SYSFS interface */
+
+static ssize_t input_polldev_get_poll(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct input_polled_dev *polldev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", polldev->poll_interval);
+}
+
+static ssize_t input_polldev_set_poll(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct input_polled_dev *polldev = dev_get_drvdata(dev);
+	struct input_dev *input = polldev->input;
+	unsigned long interval;
+
+	if (strict_strtoul(buf, 0, &interval))
+		return -EINVAL;
+
+	if (interval < polldev->poll_interval_min)
+		return -EINVAL;
+
+	if (interval > polldev->poll_interval_max)
+		return -EINVAL;
+
+	mutex_lock(&input->mutex);
+
+	polldev->poll_interval = interval;
+
+	if (input->users) {
+		cancel_delayed_work_sync(&polldev->work);
+		if (polldev->poll_interval > 0)
+			input_polldev_queue_work(polldev);
+	}
+
+	mutex_unlock(&input->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(poll, S_IRUGO | S_IWUSR, input_polldev_get_poll,
+					    input_polldev_set_poll);
+
+
+static ssize_t input_polldev_get_max(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct input_polled_dev *polldev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", polldev->poll_interval_max);
+}
+
+static DEVICE_ATTR(max, S_IRUGO, input_polldev_get_max, NULL);
+
+static ssize_t input_polldev_get_min(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct input_polled_dev *polldev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", polldev->poll_interval_min);
+}
+
+static DEVICE_ATTR(min, S_IRUGO, input_polldev_get_min, NULL);
+
+static struct attribute *sysfs_attrs[] = {
+	&dev_attr_poll.attr,
+	&dev_attr_max.attr,
+	&dev_attr_min.attr,
+	NULL
+};
+
+static struct attribute_group input_polldev_attribute_group = {
+	.attrs = sysfs_attrs
+};
+
 /**
  * input_allocate_polled_device - allocated memory polled device
  *
@@ -153,15 +235,29 @@ EXPORT_SYMBOL(input_free_polled_device);
 int input_register_polled_device(struct input_polled_dev *dev)
 {
 	struct input_dev *input = dev->input;
+	int error;
 
 	input_set_drvdata(input, dev);
 	INIT_DELAYED_WORK(&dev->work, input_polled_device_work);
 	if (!dev->poll_interval)
 		dev->poll_interval = 500;
+	if (!dev->poll_interval_max)
+		dev->poll_interval_max = dev->poll_interval;
 	input->open = input_open_polled_device;
 	input->close = input_close_polled_device;
 
-	return input_register_device(input);
+	error = input_register_device(input);
+	if (error)
+		return error;
+
+	error = sysfs_create_group(&input->dev.kobj,
+				   &input_polldev_attribute_group);
+	if (error) {
+		input_unregister_device(input);
+		return error;
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(input_register_polled_device);
 
@@ -177,6 +273,9 @@ EXPORT_SYMBOL(input_register_polled_device);
  */
 void input_unregister_polled_device(struct input_polled_dev *dev)
 {
+	sysfs_remove_group(&dev->input->dev.kobj,
+			   &input_polldev_attribute_group);
+
 	input_unregister_device(dev->input);
 	dev->input = NULL;
 }
diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h
index 5c0ec68a965e..5e3dddf8f562 100644
--- a/include/linux/input-polldev.h
+++ b/include/linux/input-polldev.h
@@ -21,7 +21,12 @@
  *	longer being polled. Used to put device into low power mode.
  * @poll: driver-supplied method that polls the device and posts
  *	input events (mandatory).
- * @poll_interval: specifies how often the poll() method shoudl be called.
+ * @poll_interval: specifies how often the poll() method should be called.
+ *	Defaults to 500 msec unless overriden when registering the device.
+ * @poll_interval_max: specifies upper bound for the poll interval.
+ *	Defaults to the initial value of @poll_interval.
+ * @poll_interval_min: specifies lower bound for the poll interval.
+ *	Defaults to 0.
  * @input: input device structire associated with the polled device.
  *	Must be properly initialized by the driver (id, name, phys, bits).
  *
@@ -36,8 +41,12 @@ struct input_polled_dev {
 	void (*close)(struct input_polled_dev *dev);
 	void (*poll)(struct input_polled_dev *dev);
 	unsigned int poll_interval; /* msec */
+	unsigned int poll_interval_max; /* msec */
+	unsigned int poll_interval_min; /* msec */
 
 	struct input_dev *input;
+
+/* private: */
 	struct delayed_work work;
 };
 
-- 
cgit v1.2.3


From d69249f4b6857c0b23ceca270ae591381b16bba9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 16 Nov 2009 22:12:20 -0800
Subject: Input: input-polldev, matrix-keypad - include in kernel doc

Make sure that polled input device and matrix keypad APIs are included
with the rest of input API when generating kernel documentation. Also
description of absres was missing as well.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/DocBook/device-drivers.tmpl | 9 +++++++++
 include/linux/input.h                     | 1 +
 include/linux/input/matrix_keypad.h       | 3 +++
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 94a20fe8fedf..e994d1d9fbe6 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -293,10 +293,19 @@ X!Idrivers/video/console/fonts.c
 
   <chapter id="input_subsystem">
      <title>Input Subsystem</title>
+     <sect1><title>Input core</title>
 !Iinclude/linux/input.h
 !Edrivers/input/input.c
 !Edrivers/input/ff-core.c
 !Edrivers/input/ff-memless.c
+     </sect1>
+     <sect1><title>Polled input devices</title>
+!Iinclude/linux/input-polldev.h
+!Edrivers/input/input-polldev.c
+     </sect1>
+     <sect1><title>Matrix keyboars/keypads</title>
+!Iinclude/linux/input/matrix_keypad.h
+     </sect1>
   </chapter>
 
   <chapter id="spi">
diff --git a/include/linux/input.h b/include/linux/input.h
index 9a04e26daab2..56d8e048c646 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1040,6 +1040,7 @@ struct ff_effect {
  * @absmin: minimum values for events coming from absolute axes
  * @absfuzz: describes noisiness for axes
  * @absflat: size of the center flat position (used by joydev)
+ * @absres: resolution used for events coming form absolute axes
  * @open: this method is called when the very first user calls
  *	input_open_device(). The driver must prepare the device
  *	to start generating events (start polling thread,
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index b3cd42d50e16..3bd018baae20 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -41,6 +41,9 @@ struct matrix_keymap_data {
  * @col_scan_delay_us: delay, measured in microseconds, that is
  *	needed before we can keypad after activating column gpio
  * @debounce_ms: debounce interval in milliseconds
+ * @active_low: gpio polarity
+ * @wakeup: controls whether the device should be set up as wakeup
+ *	source
  *
  * This structure represents platform-specific data that use used by
  * matrix_keypad driver to perform proper initialization.
-- 
cgit v1.2.3


From 8629ea2eaba8ca0de2e38ce1b4a825e16255976e Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 3 Sep 2009 16:32:53 +0800
Subject: hrtimer: Fix /proc/timer_list regression

commit 507e1231 (timer stats: Optimize by adding quick check to avoid
function calls) introduced a regression in /proc/timer_list.

/proc/timer_list shows now
 #0: <c27d46b0>, tick_sched_timer, S:01, <(null)>, /-1
instead of
 #0: <c27d46b0>, tick_sched_timer, S:01, hrtimer_start, swapper/0

Revert the hrtimer quick check for now. The optimization needs more
thought, but this is neither 2.6.32-rc7 nor stable material.

[ tglx: - Removed unrelated changes from the original patch
  	- Prevent unneccesary call to timer_stats_update_stats
	- massaged the changelog ]

Signed-off-by: Feng Tang <feng.tang@intel.com>
LKML-Reference: <alpine.LFD.2.00.0911181933540.24119@localhost.localdomain>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: stable@kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index ff037f0b1b4e..9bace4b9f4fe 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -446,7 +446,7 @@ extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 
 static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
 {
-	if (likely(!timer->start_site))
+	if (likely(!timer_stats_active))
 		return;
 	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
 				 timer->function, timer->start_comm, 0);
@@ -457,8 +457,6 @@ extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
 
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
 {
-	if (likely(!timer_stats_active))
-		return;
 	__timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
 }
 
-- 
cgit v1.2.3


From a3f62bd2b20c769ddc989b242ddd274179e19ee6 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Sun, 18 Oct 2009 13:55:55 -0500
Subject: powerpc/fsl: Add PCI device ids for new QoirQ chips

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 14 ++++++++++++++
 include/linux/pci_ids.h       | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index cf57a42c72af..4e3a3e345ab3 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -392,8 +392,22 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8536, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641D, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8610, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1011E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1011, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header);
 #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9ceb392cb984..f988a0d68636 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2288,6 +2288,20 @@
 #define PCI_DEVICE_ID_MPC8536		0x0051
 #define PCI_DEVICE_ID_P2020E		0x0070
 #define PCI_DEVICE_ID_P2020		0x0071
+#define PCI_DEVICE_ID_P2010E		0x0078
+#define PCI_DEVICE_ID_P2010		0x0079
+#define PCI_DEVICE_ID_P1020E		0x0100
+#define PCI_DEVICE_ID_P1020		0x0101
+#define PCI_DEVICE_ID_P1011E		0x0108
+#define PCI_DEVICE_ID_P1011		0x0109
+#define PCI_DEVICE_ID_P1022E		0x0110
+#define PCI_DEVICE_ID_P1022		0x0111
+#define PCI_DEVICE_ID_P1013E		0x0118
+#define PCI_DEVICE_ID_P1013		0x0119
+#define PCI_DEVICE_ID_P4080E		0x0400
+#define PCI_DEVICE_ID_P4080		0x0401
+#define PCI_DEVICE_ID_P4040E		0x0408
+#define PCI_DEVICE_ID_P4040		0x0409
 #define PCI_DEVICE_ID_MPC8641		0x7010
 #define PCI_DEVICE_ID_MPC8641D		0x7011
 #define PCI_DEVICE_ID_MPC8610		0x7018
-- 
cgit v1.2.3


From 8964be4a9a5ca8cab1219bb046db2f6d1936227c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 20 Nov 2009 15:35:04 -0800
Subject: net: rename skb->iif to skb->skb_iif

To help grep games, rename iif to skb_iif

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee802154/fakehard.c     | 2 +-
 drivers/net/ifb.c                 | 6 +++---
 include/linux/skbuff.h            | 4 ++--
 include/net/pkt_cls.h             | 4 ++--
 net/core/dev.c                    | 6 +++---
 net/core/skbuff.c                 | 2 +-
 net/netlabel/netlabel_unlabeled.c | 2 +-
 net/sched/act_mirred.c            | 2 +-
 net/sched/cls_flow.c              | 2 +-
 security/selinux/hooks.c          | 6 +++---
 security/smack/smack_lsm.c        | 4 ++--
 11 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index f877f13e3ab3..617549f30ef9 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -282,7 +282,7 @@ static int ieee802154_fake_close(struct net_device *dev)
 static netdev_tx_t ieee802154_fake_xmit(struct sk_buff *skb,
 					      struct net_device *dev)
 {
-	skb->iif = dev->ifindex;
+	skb->skb_iif = dev->ifindex;
 	skb->dev = dev;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 69c25668dd63..f4081c0a2d9c 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -99,7 +99,7 @@ static void ri_tasklet(unsigned long dev)
 		stats->tx_bytes +=skb->len;
 
 		rcu_read_lock();
-		skb->dev = dev_get_by_index_rcu(&init_net, skb->iif);
+		skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
 		if (!skb->dev) {
 			rcu_read_unlock();
 			dev_kfree_skb(skb);
@@ -107,7 +107,7 @@ static void ri_tasklet(unsigned long dev)
 			break;
 		}
 		rcu_read_unlock();
-		skb->iif = _dev->ifindex;
+		skb->skb_iif = _dev->ifindex;
 
 		if (from & AT_EGRESS) {
 			dp->st_rx_frm_egr++;
@@ -172,7 +172,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
 
-	if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->iif) {
+	if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
 		dev_kfree_skb(skb);
 		stats->rx_dropped++;
 		return NETDEV_TX_OK;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 63f47426977a..89eed8cdd318 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -299,7 +299,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@nfctinfo: Relationship of this skb to the connection
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
- *	@iif: ifindex of device we arrived on
+ *	@skb_iif: ifindex of device we arrived on
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
@@ -366,7 +366,7 @@ struct sk_buff {
 	struct nf_bridge_info	*nf_bridge;
 #endif
 
-	int			iif;
+	int			skb_iif;
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 3dd210d073ca..dd3031aed9d5 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -343,9 +343,9 @@ tcf_match_indev(struct sk_buff *skb, char *indev)
 	struct net_device *dev;
 
 	if (indev[0]) {
-		if  (!skb->iif)
+		if  (!skb->skb_iif)
 			return 0;
-		dev = __dev_get_by_index(dev_net(skb->dev), skb->iif);
+		dev = __dev_get_by_index(dev_net(skb->dev), skb->skb_iif);
 		if (!dev || strcmp(indev, dev->name))
 			return 0;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 9977288583b8..09f3d6b9c0c8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2287,7 +2287,7 @@ static int ing_filter(struct sk_buff *skb)
 	if (MAX_RED_LOOP < ttl++) {
 		printk(KERN_WARNING
 		       "Redir loop detected Dropping packet (%d->%d)\n",
-		       skb->iif, dev->ifindex);
+		       skb->skb_iif, dev->ifindex);
 		return TC_ACT_SHOT;
 	}
 
@@ -2395,8 +2395,8 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (netpoll_receive_skb(skb))
 		return NET_RX_DROP;
 
-	if (!skb->iif)
-		skb->iif = skb->dev->ifindex;
+	if (!skb->skb_iif)
+		skb->skb_iif = skb->dev->ifindex;
 
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 739b8f4dd327..bfa3e7865a8c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -549,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 	new->protocol		= old->protocol;
 	new->mark		= old->mark;
-	new->iif		= old->iif;
+	new->skb_iif		= old->skb_iif;
 	__nf_copy(new, old);
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 3dfe2bac8623..98ed22ee2ff4 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1550,7 +1550,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 
 	rcu_read_lock();
-	iface = netlbl_unlhsh_search_iface_def(skb->iif);
+	iface = netlbl_unlhsh_search_iface_def(skb->skb_iif);
 	if (iface == NULL)
 		goto unlabel_getattr_nolabel;
 	switch (family) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 797479369881..d329170243cb 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -185,7 +185,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
 	skb2->dev = dev;
-	skb2->iif = skb->dev->ifindex;
+	skb2->skb_iif = skb->dev->ifindex;
 	dev_queue_xmit(skb2);
 	err = 0;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 9402a7fd3785..e054c62857e1 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -171,7 +171,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 
 static u32 flow_get_iif(const struct sk_buff *skb)
 {
-	return skb->iif;
+	return skb->skb_iif;
 }
 
 static u32 flow_get_priority(const struct sk_buff *skb)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index bb230d5d7085..83a4aada0b4c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4085,7 +4085,7 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 	char *addrp;
 
 	COMMON_AUDIT_DATA_INIT(&ad, NET);
-	ad.u.net.netif = skb->iif;
+	ad.u.net.netif = skb->skb_iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
 	if (err)
@@ -4147,7 +4147,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	COMMON_AUDIT_DATA_INIT(&ad, NET);
-	ad.u.net.netif = skb->iif;
+	ad.u.net.netif = skb->skb_iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
 	if (err)
@@ -4159,7 +4159,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		err = selinux_skb_peerlbl_sid(skb, family, &peer_sid);
 		if (err)
 			return err;
-		err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family,
+		err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family,
 					       peer_sid, &ad);
 		if (err) {
 			selinux_netlbl_err(skb, err, 0);
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c33b6bb9b6dd..529c9ca65878 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2602,7 +2602,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_AUDIT
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
 	ad.a.u.net.family = sk->sk_family;
-	ad.a.u.net.netif = skb->iif;
+	ad.a.u.net.netif = skb->skb_iif;
 	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
 #endif
 	/*
@@ -2757,7 +2757,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 #ifdef CONFIG_AUDIT
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
 	ad.a.u.net.family = family;
-	ad.a.u.net.netif = skb->iif;
+	ad.a.u.net.netif = skb->skb_iif;
 	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
 #endif
 	/*
-- 
cgit v1.2.3


From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 20 Nov 2009 22:19:43 +0100
Subject: perf: Allow for custom overflow handlers

in-kernel perf users might wish to have custom actions on the
sample interrupt.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091120212508.222339539@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 6 ++++++
 kernel/perf_event.c        | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b5cdac0de370..a430ac3074af 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -567,6 +567,8 @@ struct perf_pending_entry {
 
 typedef void (*perf_callback_t)(struct perf_event *, void *);
 
+struct perf_sample_data;
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -658,6 +660,10 @@ struct perf_event {
 	struct pid_namespace		*ns;
 	u64				id;
 
+	void (*overflow_handler)(struct perf_event *event,
+			int nmi, struct perf_sample_data *data,
+			struct pt_regs *regs);
+
 #ifdef CONFIG_EVENT_PROFILE
 	struct event_filter		*filter;
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3852e2656bb0..1dfb6cc4fdea 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3710,7 +3710,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 			perf_event_disable(event);
 	}
 
-	perf_event_output(event, nmi, data, regs);
+	if (event->overflow_handler)
+		event->overflow_handler(event, nmi, data, regs);
+	else
+		perf_event_output(event, nmi, data, regs);
+
 	return ret;
 }
 
@@ -4836,6 +4840,8 @@ inherit_event(struct perf_event *parent_event,
 	if (parent_event->attr.freq)
 		child_event->hw.sample_period = parent_event->hw.sample_period;
 
+	child_event->overflow_handler = parent_event->overflow_handler;
+
 	/*
 	 * Link it up in the child's context:
 	 */
-- 
cgit v1.2.3


From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 20 Nov 2009 22:19:55 +0100
Subject: perf: Fix event scaling for inherited counters

Properly account the full hierarchy of counters for both the
count (we already did so) and the scale times (new).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091120212509.153379276@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  3 ++-
 kernel/perf_event.c        | 48 +++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a430ac3074af..36fe89f72641 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
 				pid_t pid,
 				perf_callback_t callback);
-extern u64 perf_event_read_value(struct perf_event *event);
+extern u64 perf_event_read_value(struct perf_event *event,
+				 u64 *enabled, u64 *running);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fdfae888a67c..80f40da9a01e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1774,14 +1774,25 @@ static int perf_event_read_size(struct perf_event *event)
 	return size;
 }
 
-u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
 	u64 total = 0;
 
+	*enabled = 0;
+	*running = 0;
+
 	total += perf_event_read(event);
-	list_for_each_entry(child, &event->child_list, child_list)
+	*enabled += event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	*running += event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	list_for_each_entry(child, &event->child_list, child_list) {
 		total += perf_event_read(child);
+		*enabled += child->total_time_enabled;
+		*running += child->total_time_running;
+	}
 
 	return total;
 }
@@ -1793,19 +1804,15 @@ static int perf_event_read_group(struct perf_event *event,
 	struct perf_event *leader = event->group_leader, *sub;
 	int n = 0, size = 0, ret = 0;
 	u64 values[5];
-	u64 count;
+	u64 count, enabled, running;
 
-	count = perf_event_read_value(leader);
+	count = perf_event_read_value(leader, &enabled, &running);
 
 	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
 	values[n++] = count;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
@@ -1820,7 +1827,7 @@ static int perf_event_read_group(struct perf_event *event,
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
-		values[n++] = perf_event_read_value(sub);
+		values[n++] = perf_event_read_value(sub, &enabled, &running);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
 
@@ -1838,18 +1845,15 @@ static int perf_event_read_group(struct perf_event *event,
 static int perf_event_read_one(struct perf_event *event,
 				 u64 read_format, char __user *buf)
 {
+	u64 enabled, running;
 	u64 values[4];
 	int n = 0;
 
-	values[n++] = perf_event_read_value(event);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = event->total_time_enabled +
-			atomic64_read(&event->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = event->total_time_running +
-			atomic64_read(&event->child_total_time_running);
-	}
+	values[n++] = perf_event_read_value(event, &enabled, &running);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
 
-- 
cgit v1.2.3


From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 22 Nov 2009 05:26:55 +0100
Subject: tracing: Use the perf recursion protection from trace event

When we commit a trace to perf, we first check if we are
recursing in the same buffer so that we don't mess-up the buffer
with a recursing trace. But later on, we do the same check from
perf to avoid commit recursion. The recursion check is desired
early before we touch the buffer but we want to do this check
only once.

Then export the recursion protection from perf and use it from
the trace events before submitting a trace.

v2: Put appropriate Reported-by tag

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  9 ++---
 include/linux/perf_event.h         |  4 +++
 include/trace/ftrace.h             | 23 +++++++------
 kernel/perf_event.c                | 68 +++++++++++++++++++++++++-------------
 kernel/trace/trace_event_profile.c | 14 ++++----
 kernel/trace/trace_kprobe.c        | 48 ++++++++++-----------------
 kernel/trace/trace_syscalls.c      | 47 ++++++++++----------------
 7 files changed, 106 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 43360c1d8f70..47bbdf9c38d0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,13 +137,8 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-struct perf_trace_buf {
-	char	buf[FTRACE_MAX_PROFILE_SIZE];
-	int	recursion;
-};
-
-extern struct perf_trace_buf	*perf_trace_buf;
-extern struct perf_trace_buf	*perf_trace_buf_nmi;
+extern char *perf_trace_buf;
+extern char *perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 36fe89f72641..74e98b1d3391 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
+extern int perf_swevent_get_recursion_context(int **recursion);
+extern void perf_swevent_put_recursion_context(int *recursion);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
+static int perf_swevent_get_recursion_context(int **recursion)	{ return -1; }
+static void perf_swevent_put_recursion_context(int *recursion)		{ }
 
 #endif
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4945d1c99864..c222ef5238bf 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	extern int perf_swevent_get_recursion_context(int **recursion); \
+	extern void perf_swevent_put_recursion_context(int *recursion); \
 	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
-	struct perf_trace_buf *trace_buf;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
 	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *trace_buf;						\
 	char *raw_data;							\
+	int *recursion;							\
 	int __cpu;							\
 	int pc;								\
 									\
@@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto)				\
 		return;							\
 									\
 	local_irq_save(irq_flags);					\
+									\
+	if (perf_swevent_get_recursion_context(&recursion))		\
+		goto end_recursion;						\
+									\
 	__cpu = smp_processor_id();					\
 									\
 	if (in_nmi())							\
@@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto)				\
 	if (!trace_buf)							\
 		goto end;						\
 									\
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);			\
-	if (trace_buf->recursion++)					\
-		goto end_recursion;					\
-									\
-	barrier();							\
-									\
-	raw_data = trace_buf->buf;					\
+	raw_data = per_cpu_ptr(trace_buf, __cpu);			\
 									\
 	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
 	entry = (struct ftrace_raw_##call *)raw_data;			\
@@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
-end_recursion:								\
-	trace_buf->recursion--;						\
-end:									\
+end:								\
+	perf_swevent_put_recursion_context(recursion);			\
+end_recursion:									\
 	local_irq_restore(irq_flags);					\
 									\
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 718fa939b1a7..aba822722300 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 	}
 }
 
-static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+/*
+ * Must be called with preemption disabled
+ */
+int perf_swevent_get_recursion_context(int **recursion)
 {
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
 	if (in_nmi())
-		return &cpuctx->recursion[3];
+		*recursion = &cpuctx->recursion[3];
+	else if (in_irq())
+		*recursion = &cpuctx->recursion[2];
+	else if (in_softirq())
+		*recursion = &cpuctx->recursion[1];
+	else
+		*recursion = &cpuctx->recursion[0];
 
-	if (in_irq())
-		return &cpuctx->recursion[2];
+	if (**recursion)
+		return -1;
 
-	if (in_softirq())
-		return &cpuctx->recursion[1];
+	(**recursion)++;
 
-	return &cpuctx->recursion[0];
+	return 0;
 }
 
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
+void perf_swevent_put_recursion_context(int *recursion)
 {
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swevent_recursion_context(cpuctx);
-	struct perf_event_context *ctx;
-
-	if (*recursion)
-		goto out;
+	(*recursion)--;
+}
 
-	(*recursion)++;
-	barrier();
+static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
+			       u64 nr, int nmi,
+			       struct perf_sample_data *data,
+			       struct pt_regs *regs)
+{
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
 	rcu_read_lock();
 	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
@@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 	if (ctx)
 		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
 	rcu_read_unlock();
+}
 
-	barrier();
-	(*recursion)--;
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	int *recursion;
+
+	preempt_disable();
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto out;
+
+	__do_perf_sw_event(type, event_id, nr, nmi, data, regs);
 
+	perf_swevent_put_recursion_context(recursion);
 out:
-	put_cpu_var(perf_cpu_context);
+	preempt_enable();
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 	if (!regs)
 		regs = task_pt_regs(current);
 
-	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+	/* Trace events already protected against recursion */
+	__do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
 				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index e0d351b01f5a..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -9,31 +9,33 @@
 #include "trace.h"
 
 
-struct perf_trace_buf *perf_trace_buf;
+char *perf_trace_buf;
 EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-struct perf_trace_buf *perf_trace_buf_nmi;
+char *perf_trace_buf_nmi;
 EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
 
+typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
+
 /* Count the events in use (per event id, not per instance) */
 static int	total_profile_count;
 
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
-	struct perf_trace_buf *buf;
+	char *buf;
 	int ret = -ENOMEM;
 
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
 	if (!total_profile_count) {
-		buf = alloc_percpu(struct perf_trace_buf);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf;
 
 		rcu_assign_pointer(perf_trace_buf, buf);
 
-		buf = alloc_percpu(struct perf_trace_buf);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf_nmi;
 
@@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
-	struct perf_trace_buf *buf, *nmi_buf;
+	char *buf, *nmi_buf;
 
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3696476f307d..22e6f68b05b3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
-	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 
 	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	 * This also protects the rcu read side
 	 */
 	local_irq_save(irq_flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
 
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(irq_flags);
 
 	return 0;
@@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
-	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
+	char *trace_buf;
+	int *recursion;
 	char *raw_data;
 
 	pc = preempt_count();
@@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	 * This also protects the rcu read side
 	 */
 	local_irq_save(irq_flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
 
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(irq_flags);
 
 	return 0;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 51213b0aa81b..0bb934875263 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,10 +477,11 @@ static int sys_prof_refcount_exit;
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
-	struct perf_trace_buf *trace_buf;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 	int syscall_nr;
 	int size;
 	int cpu;
@@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 			       (unsigned long *)&rec->args);
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(flags);
 }
 
@@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
-	struct perf_trace_buf *trace_buf;
 	unsigned long flags;
 	int syscall_nr;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 	int size;
 	int cpu;
 
@@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From 5093ebad5f2348076fdc3dac7d2358b1ad7f85f7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 22 Nov 2009 05:21:35 +0100
Subject: hw-breakpoints: Separate the kernel part from breakpoint headers

So that we can include this header from userspace tools, like
perf tools, to get the breakpoint types and len definitions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258863695-10464-4-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 0b98cbf76da7..4659e0c55ea6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,6 +16,7 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
+#ifdef __KERNEL__
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
@@ -133,5 +134,6 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 }
 
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* __KERNEL__ */
 
 #endif /* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3


From 9f680ab41485edfdc96331b70afa7513aa0a7720 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 22 Nov 2009 08:53:49 -0800
Subject: rcu: Eliminate unneeded function wrapping

The functions rcu_init() is a wrapper for __rcu_init(), and also
sets up the CPU-hotplug notifier for rcu_barrier_cpu_hotplug().
But TINY_RCU doesn't need CPU-hotplug notification, and the
rcu_barrier_cpu_hotplug() is a simple wrapper for
rcu_cpu_notify().

So push rcu_init() out to kernel/rcutree.c and kernel/rcutiny.c
and get rid of the wrapper function rcu_barrier_cpu_hotplug().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12589088302320-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  2 --
 include/linux/rcutree.h |  3 ---
 kernel/rcupdate.c       | 22 ----------------------
 kernel/rcutiny.c        | 11 +----------
 kernel/rcutree.c        | 17 ++++++++++++++---
 5 files changed, 15 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 2c1fe8373e71..a3b6272af2dd 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -38,7 +38,6 @@ void rcu_bh_qs(int cpu);
 
 #define rcu_init_sched()	do { } while (0)
 extern void rcu_check_callbacks(int cpu, int user);
-extern void __rcu_init(void);
 
 /*
  * Return the number of grace periods.
@@ -69,7 +68,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 }
 
 struct notifier_block;
-extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu);
 
 #ifdef CONFIG_NO_HZ
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 9642c6bcb399..111a65257350 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,8 +34,6 @@ struct notifier_block;
 
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
-extern int rcu_cpu_notify(struct notifier_block *self,
-			  unsigned long action, void *hcpu);
 extern int rcu_needs_cpu(int cpu);
 extern int rcu_expedited_torture_stats(char *page);
 
@@ -83,7 +81,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched_expedited();
 }
 
-extern void __rcu_init(void);
 extern void rcu_check_callbacks(int cpu, int user);
 
 extern long rcu_batches_completed(void);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 7625f207f65e..eb6b534db318 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -161,28 +161,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
 #endif /* #ifndef CONFIG_TINY_RCU */
 
-static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
-		unsigned long action, void *hcpu)
-{
-	return rcu_cpu_notify(self, action, hcpu);
-}
-
-void __init rcu_init(void)
-{
-	int i;
-
-	__rcu_init();
-	cpu_notifier(rcu_barrier_cpu_hotplug, 0);
-
-	/*
-	 * We don't need protection against CPU-hotplug here because
-	 * this is called early in boot, before either interrupts
-	 * or the scheduler are operational.
-	 */
-	for_each_online_cpu(i)
-		rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
-}
-
 void rcu_scheduler_starting(void)
 {
 	WARN_ON(num_online_cpus() != 1);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index b33ec3aa377a..9f6d9ff2572c 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -177,15 +177,6 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	__rcu_process_callbacks(&rcu_bh_ctrlblk);
 }
 
-/*
- * Null function to handle CPU being onlined.  Longer term, we want to
- * make TINY_RCU avoid using rcupdate.c, but later...
- */
-int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	return NOTIFY_OK;
-}
-
 /*
  * Wait for a grace period to elapse.  But it is illegal to invoke
  * synchronize_sched() from within an RCU read-side critical section.
@@ -285,7 +276,7 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-void __rcu_init(void)
+void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b79bfcd28e95..e3d3bbddbcd5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1644,8 +1644,8 @@ static void __cpuinit rcu_online_cpu(int cpu)
 /*
  * Handle CPU online/offline notification events.
  */
-int __cpuinit rcu_cpu_notify(struct notifier_block *self,
-			     unsigned long action, void *hcpu)
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
 
@@ -1781,8 +1781,10 @@ do { \
 	} \
 } while (0)
 
-void __init __rcu_init(void)
+void __init rcu_init(void)
 {
+	int i;
+
 	rcu_bootup_announce();
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
@@ -1791,6 +1793,15 @@ void __init __rcu_init(void)
 	RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
 	__rcu_init_preempt();
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+
+	/*
+	 * We don't need protection against CPU-hotplug here because
+	 * this is called early in boot, before either interrupts
+	 * or the scheduler are operational.
+	 */
+	cpu_notifier(rcu_cpu_notify, 0);
+	for_each_online_cpu(i)
+		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
 }
 
 #include "rcutree_plugin.h"
-- 
cgit v1.2.3


From 6ebb237bece23275d1da149b61a342f0d4d06a08 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 22 Nov 2009 08:53:50 -0800
Subject: rcu: Re-arrange code to reduce #ifdef pain

Remove #ifdefs from kernel/rcupdate.c and
include/linux/rcupdate.h by moving code to
include/linux/rcutiny.h, include/linux/rcutree.h, and
kernel/rcutree.c.

Also remove some definitions that are no longer used.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1258908830885-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h |  12 ------
 include/linux/rcutiny.h  |  11 +++++
 include/linux/rcutree.h  |   4 +-
 kernel/rcupdate.c        | 104 -----------------------------------------------
 kernel/rcutree.c         |  80 ++++++++++++++++++++++++++++++++++++
 kernel/rcutree_plugin.h  |  24 +++++++++++
 6 files changed, 118 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2f1bc42a3b82..24440f4bf476 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,11 +52,6 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
-#ifdef CONFIG_TREE_PREEMPT_RCU
-extern void synchronize_rcu(void);
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#define synchronize_rcu synchronize_sched
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 extern void synchronize_rcu_bh(void);
 extern void synchronize_sched(void);
 extern void rcu_barrier(void);
@@ -67,13 +62,6 @@ extern int sched_expedited_torture_stats(char *page);
 
 /* Internal to kernel */
 extern void rcu_init(void);
-extern void rcu_scheduler_starting(void);
-#ifndef CONFIG_TINY_RCU
-extern int rcu_needs_cpu(int cpu);
-#else
-static inline int rcu_needs_cpu(int cpu) { return 0; }
-#endif
-extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index a3b6272af2dd..c4ba9a78721e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -39,6 +39,11 @@ void rcu_bh_qs(int cpu);
 #define rcu_init_sched()	do { } while (0)
 extern void rcu_check_callbacks(int cpu, int user);
 
+static inline int rcu_needs_cpu(int cpu)
+{
+	return 0;
+}
+
 /*
  * Return the number of grace periods.
  */
@@ -57,6 +62,8 @@ static inline long rcu_batches_completed_bh(void)
 
 extern int rcu_expedited_torture_stats(char *page);
 
+#define synchronize_rcu synchronize_sched
+
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();
@@ -86,6 +93,10 @@ static inline void rcu_exit_nohz(void)
 
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
+static inline void rcu_scheduler_starting(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 111a65257350..c93eee5911b0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,12 +35,14 @@ struct notifier_block;
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern int rcu_needs_cpu(int cpu);
+extern void rcu_scheduler_starting(void);
 extern int rcu_expedited_torture_stats(char *page);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 extern void __rcu_read_lock(void);
 extern void __rcu_read_unlock(void);
+extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
@@ -55,7 +57,7 @@ static inline void __rcu_read_unlock(void)
 	preempt_enable();
 }
 
-#define __synchronize_sched() synchronize_rcu()
+#define synchronize_rcu synchronize_sched
 
 static inline void exit_rcu(void)
 {
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index eb6b534db318..9b7fd4723878 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,7 +44,6 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <linux/kernel_stat.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map =
 EXPORT_SYMBOL_GPL(rcu_lock_map);
 #endif
 
-int rcu_scheduler_active __read_mostly;
-
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
@@ -66,104 +63,3 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	rcu = container_of(head, struct rcu_synchronize, head);
 	complete(&rcu->completion);
 }
-
-#ifndef CONFIG_TINY_RCU
-
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-/**
- * synchronize_rcu - wait until a grace period has elapsed.
- *
- * Control will return to the caller some time after a full grace
- * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- */
-void synchronize_rcu(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (!rcu_scheduler_active)
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu);
-
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed.   These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-void synchronize_sched(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (rcu_blocking_is_gp())
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu_sched(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- */
-void synchronize_rcu_bh(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (rcu_blocking_is_gp())
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu_bh(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
-#endif /* #ifndef CONFIG_TINY_RCU */
-
-void rcu_scheduler_starting(void)
-{
-	WARN_ON(num_online_cpus() != 1);
-	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = 1;
-}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e3d3bbddbcd5..4ca7e0292fd8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,6 +46,7 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/time.h>
+#include <linux/kernel_stat.h>
 
 #include "rcutree.h"
 
@@ -79,6 +80,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+static int rcu_scheduler_active __read_mostly;
+
 
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
@@ -1396,6 +1399,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 
+/**
+ * synchronize_sched - wait until an rcu-sched grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * hardware-interrupt handlers, in progress on entry will have completed
+ * before this primitive returns.  However, this does not guarantee that
+ * softirq handlers will have completed, since in some kernels, these
+ * handlers can run in process context, and can block.
+ *
+ * This primitive provides the guarantees made by the (now removed)
+ * synchronize_kernel() API.  In contrast, synchronize_rcu() only
+ * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
+ */
+void synchronize_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+/**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu_bh grace
+ * period has elapsed, in other words after all currently executing rcu_bh
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ * and may be nested.
+ */
+void synchronize_rcu_bh(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_bh(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1480,6 +1545,21 @@ int rcu_needs_cpu(int cpu)
 	       rcu_preempt_needs_cpu(cpu);
 }
 
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process.  Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system).  After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.
+ */
+void rcu_scheduler_starting(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	WARN_ON(nr_context_switches() > 0);
+	rcu_scheduler_active = 1;
+}
+
 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 0bdb592eee66..1d295c789d3d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -425,6 +425,30 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/**
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_rcu(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (!rcu_scheduler_active)
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
 /*
  * Wait for an rcu-preempt grace period.  We are supposed to expedite the
  * grace period, but this is the crude slow compatability hack, so just
-- 
cgit v1.2.3


From 3066eec68d21cf4d468809c0b7b1fe9ee59c8f32 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 22 Oct 2009 13:26:45 +0300
Subject: MFD: twl4030: add twl4030_codec MFD as a new child to the core

New MFD child to twl4030 MFD device.

Reason for the twl4030_codec MFD: the vibra control is actually in the codec
part of the twl4030. If both the vibra and the audio functionality is needed
from the twl4030 at the same time, than they need to control the codec power
and APLL at the same time without breaking the other driver.
Also these two has to be able to work without the need for the other driver.

This MFD device will be used by the drivers, which needs resources
from the twl4030 codec like audio and vibra.

The platform specific configuration data is passed along to the
child drivers (audio, vibra).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/Kconfig               |   6 +
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/twl4030-codec.c       | 241 +++++++++++++++++++++++++++++++++
 drivers/mfd/twl4030-core.c        |  14 ++
 include/linux/i2c/twl4030.h       |  18 +++
 include/linux/mfd/twl4030-codec.h | 271 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 551 insertions(+)
 create mode 100644 drivers/mfd/twl4030-codec.c
 create mode 100644 include/linux/mfd/twl4030-codec.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 570be139f9df..08f2d07bf56a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -121,6 +121,12 @@ config TWL4030_POWER
 	  and load scripts controling which resources are switched off/on
 	  or reset when a sleep, wakeup or warm reset event occurs.
 
+config TWL4030_CODEC
+	bool
+	depends on TWL4030_CORE
+	select MFD_CORE
+	default n
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f3b277b90d40..af0fc903cec8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
+obj-$(CONFIG_TWL4030_CODEC)	+= twl4030-codec.o
 
 obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
 
diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
new file mode 100644
index 000000000000..97103078dc2a
--- /dev/null
+++ b/drivers/mfd/twl4030-codec.c
@@ -0,0 +1,241 @@
+/*
+ * MFD driver for twl4030 codec submodule
+ *
+ * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * Copyright:   (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/twl4030.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/twl4030-codec.h>
+
+#define TWL4030_CODEC_CELLS	2
+
+static struct platform_device *twl4030_codec_dev;
+
+struct twl4030_codec_resource {
+	int request_count;
+	u8 reg;
+	u8 mask;
+};
+
+struct twl4030_codec {
+	struct mutex mutex;
+	struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX];
+	struct mfd_cell cells[TWL4030_CODEC_CELLS];
+};
+
+/*
+ * Modify the resource, the function returns the content of the register
+ * after the modification.
+ */
+static int twl4030_codec_set_resource(enum twl4030_codec_res id, int enable)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	u8 val;
+
+	twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+			codec->resource[id].reg);
+
+	if (enable)
+		val |= codec->resource[id].mask;
+	else
+		val &= ~codec->resource[id].mask;
+
+	twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+					val, codec->resource[id].reg);
+
+	return val;
+}
+
+static inline int twl4030_codec_get_resource(enum twl4030_codec_res id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	u8 val;
+
+	twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+			codec->resource[id].reg);
+
+	return val;
+}
+
+/*
+ * Enable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_enable_resource(enum twl4030_codec_res id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	int val;
+
+	if (id >= TWL4030_CODEC_RES_MAX) {
+		dev_err(&twl4030_codec_dev->dev,
+				"Invalid resource ID (%u)\n", id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&codec->mutex);
+	if (!codec->resource[id].request_count)
+		/* Resource was disabled, enable it */
+		val = twl4030_codec_set_resource(id, 1);
+	else
+		val = twl4030_codec_get_resource(id);
+
+	codec->resource[id].request_count++;
+	mutex_unlock(&codec->mutex);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_enable_resource);
+
+/*
+ * Disable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_disable_resource(unsigned id)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+	int val;
+
+	if (id >= TWL4030_CODEC_RES_MAX) {
+		dev_err(&twl4030_codec_dev->dev,
+				"Invalid resource ID (%u)\n", id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&codec->mutex);
+	if (!codec->resource[id].request_count) {
+		dev_err(&twl4030_codec_dev->dev,
+			"Resource has been disabled already (%u)\n", id);
+		mutex_unlock(&codec->mutex);
+		return -EPERM;
+	}
+	codec->resource[id].request_count--;
+
+	if (!codec->resource[id].request_count)
+		/* Resource can be disabled now */
+		val = twl4030_codec_set_resource(id, 0);
+	else
+		val = twl4030_codec_get_resource(id);
+
+	mutex_unlock(&codec->mutex);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource);
+
+static int __devinit twl4030_codec_probe(struct platform_device *pdev)
+{
+	struct twl4030_codec *codec;
+	struct twl4030_codec_data *pdata = pdev->dev.platform_data;
+	struct mfd_cell *cell = NULL;
+	int ret, childs = 0;
+
+	codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL);
+	if (!codec)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, codec);
+
+	twl4030_codec_dev = pdev;
+	mutex_init(&codec->mutex);
+
+	/* Codec power */
+	codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE;
+	codec->resource[TWL4030_CODEC_RES_POWER].mask = TWL4030_CODECPDZ;
+
+	/* PLL */
+	codec->resource[TWL4030_CODEC_RES_APLL].reg = TWL4030_REG_APLL_CTL;
+	codec->resource[TWL4030_CODEC_RES_APLL].mask = TWL4030_APLL_EN;
+
+	if (pdata->audio) {
+		cell = &codec->cells[childs];
+		cell->name = "twl4030_codec_audio";
+		cell->platform_data = pdata->audio;
+		cell->data_size = sizeof(*pdata->audio);
+		childs++;
+	}
+	if (pdata->vibra) {
+		cell = &codec->cells[childs];
+		cell->name = "twl4030_codec_vibra";
+		cell->platform_data = pdata->vibra;
+		cell->data_size = sizeof(*pdata->vibra);
+		childs++;
+	}
+
+	if (childs)
+		ret = mfd_add_devices(&pdev->dev, pdev->id, codec->cells,
+				      childs, NULL, 0);
+	else {
+		dev_err(&pdev->dev, "No platform data found for childs\n");
+		ret = -ENODEV;
+	}
+
+	if (!ret)
+		return 0;
+
+	platform_set_drvdata(pdev, NULL);
+	kfree(codec);
+	twl4030_codec_dev = NULL;
+	return ret;
+}
+
+static int __devexit twl4030_codec_remove(struct platform_device *pdev)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(pdev);
+
+	mfd_remove_devices(&pdev->dev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(codec);
+	twl4030_codec_dev = NULL;
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:twl4030_codec");
+
+static struct platform_driver twl4030_codec_driver = {
+	.probe		= twl4030_codec_probe,
+	.remove		= __devexit_p(twl4030_codec_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "twl4030_codec",
+	},
+};
+
+static int __devinit twl4030_codec_init(void)
+{
+	return platform_driver_register(&twl4030_codec_driver);
+}
+module_init(twl4030_codec_init);
+
+static void __devexit twl4030_codec_exit(void)
+{
+	platform_driver_unregister(&twl4030_codec_driver);
+}
+module_exit(twl4030_codec_exit);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 6cb12502e306..cf462b90dee2 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -114,6 +114,12 @@
 #define twl_has_watchdog()        false
 #endif
 
+#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
+#define twl_has_codec()	true
+#else
+#define twl_has_codec()	false
+#endif
+
 /* Triton Core internal information (BEGIN) */
 
 /* Last - for index max*/
@@ -601,6 +607,14 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
+	if (twl_has_codec() && pdata->codec) {
+		child = add_child(1, "twl4030_codec",
+				pdata->codec, sizeof(*pdata->codec),
+				false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
 	if (twl_has_regulator()) {
 		/*
 		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 508824ee35e6..ba61add3e05a 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -401,6 +401,23 @@ struct twl4030_power_data {
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
 
+struct twl4030_codec_audio_data {
+	unsigned int	audio_mclk;
+	unsigned int ramp_delay_value;
+	unsigned int hs_extmute:1;
+	void (*set_hs_extmute)(int mute);
+};
+
+struct twl4030_codec_vibra_data {
+	unsigned int	audio_mclk;
+	unsigned int	coexist;
+};
+
+struct twl4030_codec_data {
+	struct twl4030_codec_audio_data		*audio;
+	struct twl4030_codec_vibra_data		*vibra;
+};
+
 struct twl4030_platform_data {
 	unsigned				irq_base, irq_end;
 	struct twl4030_bci_platform_data	*bci;
@@ -409,6 +426,7 @@ struct twl4030_platform_data {
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
 	struct twl4030_power_data		*power;
+	struct twl4030_codec_data		*codec;
 
 	/* LDO regulators */
 	struct regulator_init_data		*vdac;
diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h
new file mode 100644
index 000000000000..ef0a3041d759
--- /dev/null
+++ b/include/linux/mfd/twl4030-codec.h
@@ -0,0 +1,271 @@
+/*
+ * MFD driver for twl4030 codec submodule
+ *
+ * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * Copyright:   (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __TWL4030_CODEC_H__
+#define __TWL4030_CODEC_H__
+
+/* Codec registers */
+#define TWL4030_REG_CODEC_MODE		0x01
+#define TWL4030_REG_OPTION		0x02
+#define TWL4030_REG_UNKNOWN		0x03
+#define TWL4030_REG_MICBIAS_CTL		0x04
+#define TWL4030_REG_ANAMICL		0x05
+#define TWL4030_REG_ANAMICR		0x06
+#define TWL4030_REG_AVADC_CTL		0x07
+#define TWL4030_REG_ADCMICSEL		0x08
+#define TWL4030_REG_DIGMIXING		0x09
+#define TWL4030_REG_ATXL1PGA		0x0A
+#define TWL4030_REG_ATXR1PGA		0x0B
+#define TWL4030_REG_AVTXL2PGA		0x0C
+#define TWL4030_REG_AVTXR2PGA		0x0D
+#define TWL4030_REG_AUDIO_IF		0x0E
+#define TWL4030_REG_VOICE_IF		0x0F
+#define TWL4030_REG_ARXR1PGA		0x10
+#define TWL4030_REG_ARXL1PGA		0x11
+#define TWL4030_REG_ARXR2PGA		0x12
+#define TWL4030_REG_ARXL2PGA		0x13
+#define TWL4030_REG_VRXPGA		0x14
+#define TWL4030_REG_VSTPGA		0x15
+#define TWL4030_REG_VRX2ARXPGA		0x16
+#define TWL4030_REG_AVDAC_CTL		0x17
+#define TWL4030_REG_ARX2VTXPGA		0x18
+#define TWL4030_REG_ARXL1_APGA_CTL	0x19
+#define TWL4030_REG_ARXR1_APGA_CTL	0x1A
+#define TWL4030_REG_ARXL2_APGA_CTL	0x1B
+#define TWL4030_REG_ARXR2_APGA_CTL	0x1C
+#define TWL4030_REG_ATX2ARXPGA		0x1D
+#define TWL4030_REG_BT_IF		0x1E
+#define TWL4030_REG_BTPGA		0x1F
+#define TWL4030_REG_BTSTPGA		0x20
+#define TWL4030_REG_EAR_CTL		0x21
+#define TWL4030_REG_HS_SEL		0x22
+#define TWL4030_REG_HS_GAIN_SET		0x23
+#define TWL4030_REG_HS_POPN_SET		0x24
+#define TWL4030_REG_PREDL_CTL		0x25
+#define TWL4030_REG_PREDR_CTL		0x26
+#define TWL4030_REG_PRECKL_CTL		0x27
+#define TWL4030_REG_PRECKR_CTL		0x28
+#define TWL4030_REG_HFL_CTL		0x29
+#define TWL4030_REG_HFR_CTL		0x2A
+#define TWL4030_REG_ALC_CTL		0x2B
+#define TWL4030_REG_ALC_SET1		0x2C
+#define TWL4030_REG_ALC_SET2		0x2D
+#define TWL4030_REG_BOOST_CTL		0x2E
+#define TWL4030_REG_SOFTVOL_CTL		0x2F
+#define TWL4030_REG_DTMF_FREQSEL	0x30
+#define TWL4030_REG_DTMF_TONEXT1H	0x31
+#define TWL4030_REG_DTMF_TONEXT1L	0x32
+#define TWL4030_REG_DTMF_TONEXT2H	0x33
+#define TWL4030_REG_DTMF_TONEXT2L	0x34
+#define TWL4030_REG_DTMF_TONOFF		0x35
+#define TWL4030_REG_DTMF_WANONOFF	0x36
+#define TWL4030_REG_I2S_RX_SCRAMBLE_H	0x37
+#define TWL4030_REG_I2S_RX_SCRAMBLE_M	0x38
+#define TWL4030_REG_I2S_RX_SCRAMBLE_L	0x39
+#define TWL4030_REG_APLL_CTL		0x3A
+#define TWL4030_REG_DTMF_CTL		0x3B
+#define TWL4030_REG_DTMF_PGA_CTL2	0x3C
+#define TWL4030_REG_DTMF_PGA_CTL1	0x3D
+#define TWL4030_REG_MISC_SET_1		0x3E
+#define TWL4030_REG_PCMBTMUX		0x3F
+#define TWL4030_REG_RX_PATH_SEL		0x43
+#define TWL4030_REG_VDL_APGA_CTL	0x44
+#define TWL4030_REG_VIBRA_CTL		0x45
+#define TWL4030_REG_VIBRA_SET		0x46
+#define TWL4030_REG_VIBRA_PWM_SET	0x47
+#define TWL4030_REG_ANAMIC_GAIN		0x48
+#define TWL4030_REG_MISC_SET_2		0x49
+
+/* Bitfield Definitions */
+
+/* TWL4030_CODEC_MODE (0x01) Fields */
+#define TWL4030_APLL_RATE		0xF0
+#define TWL4030_APLL_RATE_8000		0x00
+#define TWL4030_APLL_RATE_11025		0x10
+#define TWL4030_APLL_RATE_12000		0x20
+#define TWL4030_APLL_RATE_16000		0x40
+#define TWL4030_APLL_RATE_22050		0x50
+#define TWL4030_APLL_RATE_24000		0x60
+#define TWL4030_APLL_RATE_32000		0x80
+#define TWL4030_APLL_RATE_44100		0x90
+#define TWL4030_APLL_RATE_48000		0xA0
+#define TWL4030_APLL_RATE_96000		0xE0
+#define TWL4030_SEL_16K			0x08
+#define TWL4030_CODECPDZ		0x02
+#define TWL4030_OPT_MODE		0x01
+#define TWL4030_OPTION_1		(1 << 0)
+#define TWL4030_OPTION_2		(0 << 0)
+
+/* TWL4030_OPTION (0x02) Fields */
+#define TWL4030_ATXL1_EN		(1 << 0)
+#define TWL4030_ATXR1_EN		(1 << 1)
+#define TWL4030_ATXL2_VTXL_EN		(1 << 2)
+#define TWL4030_ATXR2_VTXR_EN		(1 << 3)
+#define TWL4030_ARXL1_VRX_EN		(1 << 4)
+#define TWL4030_ARXR1_EN		(1 << 5)
+#define TWL4030_ARXL2_EN		(1 << 6)
+#define TWL4030_ARXR2_EN		(1 << 7)
+
+/* TWL4030_REG_MICBIAS_CTL (0x04) Fields */
+#define TWL4030_MICBIAS2_CTL		0x40
+#define TWL4030_MICBIAS1_CTL		0x20
+#define TWL4030_HSMICBIAS_EN		0x04
+#define TWL4030_MICBIAS2_EN		0x02
+#define TWL4030_MICBIAS1_EN		0x01
+
+/* ANAMICL (0x05) Fields */
+#define TWL4030_CNCL_OFFSET_START	0x80
+#define TWL4030_OFFSET_CNCL_SEL		0x60
+#define TWL4030_OFFSET_CNCL_SEL_ARX1	0x00
+#define TWL4030_OFFSET_CNCL_SEL_ARX2	0x20
+#define TWL4030_OFFSET_CNCL_SEL_VRX	0x40
+#define TWL4030_OFFSET_CNCL_SEL_ALL	0x60
+#define TWL4030_MICAMPL_EN		0x10
+#define TWL4030_CKMIC_EN		0x08
+#define TWL4030_AUXL_EN			0x04
+#define TWL4030_HSMIC_EN		0x02
+#define TWL4030_MAINMIC_EN		0x01
+
+/* ANAMICR (0x06) Fields */
+#define TWL4030_MICAMPR_EN		0x10
+#define TWL4030_AUXR_EN			0x04
+#define TWL4030_SUBMIC_EN		0x01
+
+/* AVADC_CTL (0x07) Fields */
+#define TWL4030_ADCL_EN			0x08
+#define TWL4030_AVADC_CLK_PRIORITY	0x04
+#define TWL4030_ADCR_EN			0x02
+
+/* TWL4030_REG_ADCMICSEL (0x08) Fields */
+#define TWL4030_DIGMIC1_EN		0x08
+#define TWL4030_TX2IN_SEL		0x04
+#define TWL4030_DIGMIC0_EN		0x02
+#define TWL4030_TX1IN_SEL		0x01
+
+/* AUDIO_IF (0x0E) Fields */
+#define TWL4030_AIF_SLAVE_EN		0x80
+#define TWL4030_DATA_WIDTH		0x60
+#define TWL4030_DATA_WIDTH_16S_16W	0x00
+#define TWL4030_DATA_WIDTH_32S_16W	0x40
+#define TWL4030_DATA_WIDTH_32S_24W	0x60
+#define TWL4030_AIF_FORMAT		0x18
+#define TWL4030_AIF_FORMAT_CODEC	0x00
+#define TWL4030_AIF_FORMAT_LEFT		0x08
+#define TWL4030_AIF_FORMAT_RIGHT	0x10
+#define TWL4030_AIF_FORMAT_TDM		0x18
+#define TWL4030_AIF_TRI_EN		0x04
+#define TWL4030_CLK256FS_EN		0x02
+#define TWL4030_AIF_EN			0x01
+
+/* VOICE_IF (0x0F) Fields */
+#define TWL4030_VIF_SLAVE_EN		0x80
+#define TWL4030_VIF_DIN_EN		0x40
+#define TWL4030_VIF_DOUT_EN		0x20
+#define TWL4030_VIF_SWAP		0x10
+#define TWL4030_VIF_FORMAT		0x08
+#define TWL4030_VIF_TRI_EN		0x04
+#define TWL4030_VIF_SUB_EN		0x02
+#define TWL4030_VIF_EN			0x01
+
+/* EAR_CTL (0x21) */
+#define TWL4030_EAR_GAIN		0x30
+
+/* HS_GAIN_SET (0x23) Fields */
+#define TWL4030_HSR_GAIN		0x0C
+#define TWL4030_HSR_GAIN_PWR_DOWN	0x00
+#define TWL4030_HSR_GAIN_PLUS_6DB	0x04
+#define TWL4030_HSR_GAIN_0DB		0x08
+#define TWL4030_HSR_GAIN_MINUS_6DB	0x0C
+#define TWL4030_HSL_GAIN		0x03
+#define TWL4030_HSL_GAIN_PWR_DOWN	0x00
+#define TWL4030_HSL_GAIN_PLUS_6DB	0x01
+#define TWL4030_HSL_GAIN_0DB		0x02
+#define TWL4030_HSL_GAIN_MINUS_6DB	0x03
+
+/* HS_POPN_SET (0x24) Fields */
+#define TWL4030_VMID_EN			0x40
+#define	TWL4030_EXTMUTE			0x20
+#define TWL4030_RAMP_DELAY		0x1C
+#define TWL4030_RAMP_DELAY_20MS		0x00
+#define TWL4030_RAMP_DELAY_40MS		0x04
+#define TWL4030_RAMP_DELAY_81MS		0x08
+#define TWL4030_RAMP_DELAY_161MS	0x0C
+#define TWL4030_RAMP_DELAY_323MS	0x10
+#define TWL4030_RAMP_DELAY_645MS	0x14
+#define TWL4030_RAMP_DELAY_1291MS	0x18
+#define TWL4030_RAMP_DELAY_2581MS	0x1C
+#define TWL4030_RAMP_EN			0x02
+
+/* PREDL_CTL (0x25) */
+#define TWL4030_PREDL_GAIN		0x30
+
+/* PREDR_CTL (0x26) */
+#define TWL4030_PREDR_GAIN		0x30
+
+/* PRECKL_CTL (0x27) */
+#define TWL4030_PRECKL_GAIN		0x30
+
+/* PRECKR_CTL (0x28) */
+#define TWL4030_PRECKR_GAIN		0x30
+
+/* HFL_CTL (0x29, 0x2A) Fields */
+#define TWL4030_HF_CTL_HB_EN		0x04
+#define TWL4030_HF_CTL_LOOP_EN		0x08
+#define TWL4030_HF_CTL_RAMP_EN		0x10
+#define TWL4030_HF_CTL_REF_EN		0x20
+
+/* APLL_CTL (0x3A) Fields */
+#define TWL4030_APLL_EN			0x10
+#define TWL4030_APLL_INFREQ		0x0F
+#define TWL4030_APLL_INFREQ_19200KHZ	0x05
+#define TWL4030_APLL_INFREQ_26000KHZ	0x06
+#define TWL4030_APLL_INFREQ_38400KHZ	0x0F
+
+/* REG_MISC_SET_1 (0x3E) Fields */
+#define TWL4030_CLK64_EN		0x80
+#define TWL4030_SCRAMBLE_EN		0x40
+#define TWL4030_FMLOOP_EN		0x20
+#define TWL4030_SMOOTH_ANAVOL_EN	0x02
+#define TWL4030_DIGMIC_LR_SWAP_EN	0x01
+
+/* VIBRA_CTL (0x45) */
+#define TWL4030_VIBRA_EN		0x01
+#define TWL4030_VIBRA_DIR		0x02
+#define TWL4030_VIBRA_AUDIO_SEL_L1	(0x00 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_R1	(0x01 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_L2	(0x02 << 2)
+#define TWL4030_VIBRA_AUDIO_SEL_R2	(0x03 << 2)
+#define TWL4030_VIBRA_SEL		0x10
+#define TWL4030_VIBRA_DIR_SEL		0x20
+
+/* TWL4030 codec resource IDs */
+enum twl4030_codec_res {
+	TWL4030_CODEC_RES_POWER = 0,
+	TWL4030_CODEC_RES_APLL,
+	TWL4030_CODEC_RES_MAX,
+};
+
+int twl4030_codec_disable_resource(enum twl4030_codec_res id);
+int twl4030_codec_enable_resource(enum twl4030_codec_res id);
+
+#endif	/* End of __TWL4030_CODEC_H__ */
-- 
cgit v1.2.3


From 26276069d2f51955cf549faab5d3a71a4b37ba23 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 4 Nov 2009 09:58:17 +0200
Subject: MFD: TWL4030: Add audio_mclk to the codec platform data

Add audio_mclk to the platform data struct for the
twl4030-codec MFD driver.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/i2c/twl4030.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index ba61add3e05a..5306a759cbde 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -414,6 +414,7 @@ struct twl4030_codec_vibra_data {
 };
 
 struct twl4030_codec_data {
+	unsigned int	audio_mclk;
 	struct twl4030_codec_audio_data		*audio;
 	struct twl4030_codec_vibra_data		*vibra;
 };
-- 
cgit v1.2.3


From cfaf6d2c1cb231f77e24109cb1460db429608bd4 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 4 Nov 2009 09:58:19 +0200
Subject: MFD: twl4030-codec: APLL_INFREQ handling in the MFD driver

Configure the APLL_INFREQ field in the APLL_CTL register
based on the platform data.
Provide also a function for childs to query the audio_mclk
frequency.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/twl4030-codec.c       | 35 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/twl4030-codec.h |  1 +
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
index 97103078dc2a..77b914907d7c 100644
--- a/drivers/mfd/twl4030-codec.c
+++ b/drivers/mfd/twl4030-codec.c
@@ -41,6 +41,7 @@ struct twl4030_codec_resource {
 };
 
 struct twl4030_codec {
+	unsigned int audio_mclk;
 	struct mutex mutex;
 	struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX];
 	struct mfd_cell cells[TWL4030_CODEC_CELLS];
@@ -145,12 +146,45 @@ int twl4030_codec_disable_resource(unsigned id)
 }
 EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource);
 
+unsigned int twl4030_codec_get_mclk(void)
+{
+	struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+
+	return codec->audio_mclk;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_get_mclk);
+
 static int __devinit twl4030_codec_probe(struct platform_device *pdev)
 {
 	struct twl4030_codec *codec;
 	struct twl4030_codec_data *pdata = pdev->dev.platform_data;
 	struct mfd_cell *cell = NULL;
 	int ret, childs = 0;
+	u8 val;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Platform data is missing\n");
+		return -EINVAL;
+	}
+
+	/* Configure APLL_INFREQ and disable APLL if enabled */
+	val = 0;
+	switch (pdata->audio_mclk) {
+	case 19200000:
+		val |= TWL4030_APLL_INFREQ_19200KHZ;
+		break;
+	case 26000000:
+		val |= TWL4030_APLL_INFREQ_26000KHZ;
+		break;
+	case 38400000:
+		val |= TWL4030_APLL_INFREQ_38400KHZ;
+		break;
+	default:
+		dev_err(&pdev->dev, "Invalid audio_mclk\n");
+		return -EINVAL;
+	}
+	twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+					val, TWL4030_REG_APLL_CTL);
 
 	codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL);
 	if (!codec)
@@ -160,6 +194,7 @@ static int __devinit twl4030_codec_probe(struct platform_device *pdev)
 
 	twl4030_codec_dev = pdev;
 	mutex_init(&codec->mutex);
+	codec->audio_mclk = pdata->audio_mclk;
 
 	/* Codec power */
 	codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE;
diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h
index ef0a3041d759..2ec317c68e59 100644
--- a/include/linux/mfd/twl4030-codec.h
+++ b/include/linux/mfd/twl4030-codec.h
@@ -267,5 +267,6 @@ enum twl4030_codec_res {
 
 int twl4030_codec_disable_resource(enum twl4030_codec_res id);
 int twl4030_codec_enable_resource(enum twl4030_codec_res id);
+unsigned int twl4030_codec_get_mclk(void);
 
 #endif	/* End of __TWL4030_CODEC_H__ */
-- 
cgit v1.2.3


From 76b5c84f77c3abc92a3c4e185e7b78f17a0ed204 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Thu, 5 Nov 2009 22:59:46 -0800
Subject: Input: add new keycodes useful in mobile devices

Add new codes for camera focus key, and camera lens cover, keypad slide,
front proximity switches.

Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index c2b1a7d244d9..84b501ab0d8f 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -595,6 +595,8 @@ struct input_absinfo {
 #define KEY_NUMERIC_STAR	0x20a
 #define KEY_NUMERIC_POUND	0x20b
 
+#define KEY_CAMERA_FOCUS	0x210
+
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x2ff
@@ -677,6 +679,9 @@ struct input_absinfo {
 #define SW_LINEOUT_INSERT	0x06  /* set = inserted */
 #define SW_JACK_PHYSICAL_INSERT 0x07  /* set = mechanical switch set */
 #define SW_VIDEOOUT_INSERT	0x08  /* set = inserted */
+#define SW_CAMERA_LENS_COVER	0x09  /* set = lens covered */
+#define SW_KEYPAD_SLIDE		0x0a  /* set = keypad slide out */
+#define SW_FRONT_PROXIMITY	0x0b  /* set = front proximity sensor active */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
-- 
cgit v1.2.3


From c4832c7bbc3f7a4813347e871d7238651bf437d3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 23 Nov 2009 10:34:39 +0100
Subject: netfilter: nf_ct_tcp: improve out-of-sync situation in TCP tracking

Without this patch, if we receive a SYN packet from the client while
the firewall is out-of-sync, we let it go through. Then, if we see
the SYN/ACK reply coming from the server, we destroy the conntrack
entry and drop the packet to trigger a new retransmission. Then,
the retransmision from the client is used to start a new clean
session.

This patch improves the current handling. Basically, if we see an
unexpected SYN packet, we annotate the TCP options. Then, if we
see the reply SYN/ACK, this means that the firewall was indeed
out-of-sync. Therefore, we set a clean new session from the existing
entry based on the annotated values.

This patch adds two new 8-bits fields that fit in a 16-bits gap of
the ip_ct_tcp structure.

This patch is particularly useful for conntrackd since the
asynchronous nature of the state-synchronization allows to have
backup nodes that are not perfect copies of the master. This helps
to improve the recovery under some worst-case scenarios.

I have tested this by creating lots of conntrack entries in wrong
state:

for ((i=1024;i<65535;i++)); do conntrack -I -p tcp -s 192.168.2.101 -d 192.168.2.2 --sport $i --dport 80 -t 800 --state ESTABLISHED -u ASSURED,SEEN_REPLY; done

Then, I make some TCP connections:

$ echo GET / | nc 192.168.2.2 80

The events show the result:

 [UPDATE] tcp      6 60 SYN_RECV src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED]
 [UPDATE] tcp      6 432000 ESTABLISHED src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED]
 [UPDATE] tcp      6 120 FIN_WAIT src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED]
 [UPDATE] tcp      6 30 LAST_ACK src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED]
 [UPDATE] tcp      6 120 TIME_WAIT src=192.168.2.101 dst=192.168.2.2 sport=33220 dport=80 src=192.168.2.2 dst=192.168.2.101 sport=80 dport=33220 [ASSURED]

and tcpdump shows no retransmissions:

20:47:57.271951 IP 192.168.2.101.33221 > 192.168.2.2.www: S 435402517:435402517(0) win 5840 <mss 1460,sackOK,timestamp 4294961827 0,nop,wscale 6>
20:47:57.273538 IP 192.168.2.2.www > 192.168.2.101.33221: S 3509927945:3509927945(0) ack 435402518 win 5792 <mss 1460,sackOK,timestamp 235681024 4294961827,nop,wscale 4>
20:47:57.273608 IP 192.168.2.101.33221 > 192.168.2.2.www: . ack 3509927946 win 92 <nop,nop,timestamp 4294961827 235681024>
20:47:57.273693 IP 192.168.2.101.33221 > 192.168.2.2.www: P 435402518:435402524(6) ack 3509927946 win 92 <nop,nop,timestamp 4294961827 235681024>
20:47:57.275492 IP 192.168.2.2.www > 192.168.2.101.33221: . ack 435402524 win 362 <nop,nop,timestamp 235681024 4294961827>
20:47:57.276492 IP 192.168.2.2.www > 192.168.2.101.33221: P 3509927946:3509928082(136) ack 435402524 win 362 <nop,nop,timestamp 235681025 4294961827>
20:47:57.276515 IP 192.168.2.101.33221 > 192.168.2.2.www: . ack 3509928082 win 108 <nop,nop,timestamp 4294961828 235681025>
20:47:57.276521 IP 192.168.2.2.www > 192.168.2.101.33221: F 3509928082:3509928082(0) ack 435402524 win 362 <nop,nop,timestamp 235681025 4294961827>
20:47:57.277369 IP 192.168.2.101.33221 > 192.168.2.2.www: F 435402524:435402524(0) ack 3509928083 win 108 <nop,nop,timestamp 4294961828 235681025>
20:47:57.279491 IP 192.168.2.2.www > 192.168.2.101.33221: . ack 435402525 win 362 <nop,nop,timestamp 235681025 4294961828>

I also added a rule to log invalid packets, with no occurrences  :-) .

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  3 ++
 net/netfilter/nf_conntrack_proto_tcp.c     | 51 ++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 4352feed2377..ece22e94dcbf 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -67,6 +67,9 @@ struct ip_ct_tcp
 	u_int32_t	last_ack;	/* Last sequence number seen in opposite dir */
 	u_int32_t	last_end;	/* Last seq + len */
 	u_int16_t	last_win;	/* Last window advertisement seen in dir */
+	/* For SYN packets while we may be out-of-sync */
+	u_int8_t	last_wscale;	/* Last window scaling factor seen */
+	u_int8_t	last_flags;	/* Last flags set */
 };
 
 #endif /* __KERNEL__ */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 97a82ba75376..9cc6b5cb06af 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -908,23 +908,54 @@ static int tcp_packet(struct nf_conn *ct,
 			/* b) This SYN/ACK acknowledges a SYN that we earlier
 			 * ignored as invalid. This means that the client and
 			 * the server are both in sync, while the firewall is
-			 * not. We kill this session and block the SYN/ACK so
-			 * that the client cannot but retransmit its SYN and
-			 * thus initiate a clean new session.
+			 * not. We get in sync from the previously annotated
+			 * values.
 			 */
-			spin_unlock_bh(&ct->lock);
-			if (LOG_INVALID(net, IPPROTO_TCP))
-				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-					  "nf_ct_tcp: killing out of sync session ");
-			nf_ct_kill(ct);
-			return NF_DROP;
+			old_state = TCP_CONNTRACK_SYN_SENT;
+			new_state = TCP_CONNTRACK_SYN_RECV;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
+				ct->proto.tcp.last_end;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
+				ct->proto.tcp.last_end;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
+				ct->proto.tcp.last_win == 0 ?
+					1 : ct->proto.tcp.last_win;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
+				ct->proto.tcp.last_wscale;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
+				ct->proto.tcp.last_flags;
+			memset(&ct->proto.tcp.seen[dir], 0,
+			       sizeof(struct ip_ct_tcp_state));
+			break;
 		}
 		ct->proto.tcp.last_index = index;
 		ct->proto.tcp.last_dir = dir;
 		ct->proto.tcp.last_seq = ntohl(th->seq);
 		ct->proto.tcp.last_end =
 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
-
+		ct->proto.tcp.last_win = ntohs(th->window);
+
+		/* a) This is a SYN in ORIGINAL. The client and the server
+		 * may be in sync but we are not. In that case, we annotate
+		 * the TCP options and let the packet go through. If it is a
+		 * valid SYN packet, the server will reply with a SYN/ACK, and
+		 * then we'll get in sync. Otherwise, the server ignores it. */
+		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
+			struct ip_ct_tcp_state seen = {};
+
+			ct->proto.tcp.last_flags =
+			ct->proto.tcp.last_wscale = 0;
+			tcp_options(skb, dataoff, th, &seen);
+			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+				ct->proto.tcp.last_flags |=
+					IP_CT_TCP_FLAG_WINDOW_SCALE;
+				ct->proto.tcp.last_wscale = seen.td_scale;
+			}
+			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
+				ct->proto.tcp.last_flags |=
+					IP_CT_TCP_FLAG_SACK_PERM;
+			}
+		}
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-- 
cgit v1.2.3


From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Nov 2009 11:37:29 +0100
Subject: perf_events: Undo some recursion damage

Make perf_swevent_get_recursion_context return a context number
and disable preemption.

This could be used to remove the IRQ disable from the trace bit
and index the per-cpu buffer with.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091123103819.993226816@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h    |  8 ++---
 include/trace/ftrace.h        | 17 ++++++-----
 kernel/perf_event.c           | 71 +++++++++++++++++++------------------------
 kernel/trace/trace_kprobe.c   | 14 +++++----
 kernel/trace/trace_syscalls.c | 14 +++++----
 5 files changed, 61 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 74e98b1d3391..43adbd7f0010 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
-extern int perf_swevent_get_recursion_context(int **recursion);
-extern void perf_swevent_put_recursion_context(int *recursion);
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
-static int perf_swevent_get_recursion_context(int **recursion)	{ return -1; }
-static void perf_swevent_put_recursion_context(int *recursion)		{ }
+static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 
 #endif
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c222ef5238bf..c3417c13e3ed 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	extern int perf_swevent_get_recursion_context(int **recursion); \
-	extern void perf_swevent_put_recursion_context(int *recursion); \
+	extern int perf_swevent_get_recursion_context(void);		\
+	extern void perf_swevent_put_recursion_context(int rctx);	\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
@@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto)				\
 	int __data_size;						\
 	char *trace_buf;						\
 	char *raw_data;							\
-	int *recursion;							\
 	int __cpu;							\
+	int rctx;							\
 	int pc;								\
 									\
 	pc = preempt_count();						\
@@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto)				\
 									\
 	local_irq_save(irq_flags);					\
 									\
-	if (perf_swevent_get_recursion_context(&recursion))		\
-		goto end_recursion;						\
+	rctx = perf_swevent_get_recursion_context();			\
+	if (rctx < 0)							\
+		goto end_recursion;					\
 									\
 	__cpu = smp_processor_id();					\
 									\
@@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
-end:								\
-	perf_swevent_put_recursion_context(recursion);			\
-end_recursion:									\
+end:									\
+	perf_swevent_put_recursion_context(rctx);			\
+end_recursion:								\
 	local_irq_restore(irq_flags);					\
 									\
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 50f11b5f8c3d..0b0d5f72fe7d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 	}
 }
 
-/*
- * Must be called with preemption disabled
- */
-int perf_swevent_get_recursion_context(int **recursion)
+int perf_swevent_get_recursion_context(void)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int rctx;
 
 	if (in_nmi())
-		*recursion = &cpuctx->recursion[3];
+		rctx = 3;
 	else if (in_irq())
-		*recursion = &cpuctx->recursion[2];
+		rctx = 2;
 	else if (in_softirq())
-		*recursion = &cpuctx->recursion[1];
+		rctx = 1;
 	else
-		*recursion = &cpuctx->recursion[0];
+		rctx = 0;
 
-	if (**recursion)
+	if (cpuctx->recursion[rctx]) {
+		put_cpu_var(perf_cpu_context);
 		return -1;
+	}
 
-	(**recursion)++;
+	cpuctx->recursion[rctx]++;
+	barrier();
 
-	return 0;
+	return rctx;
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int *recursion)
+void perf_swevent_put_recursion_context(int rctx)
 {
-	(*recursion)--;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	barrier();
+	cpuctx->recursion[rctx]++;
+	put_cpu_var(perf_cpu_context);
 }
 EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
 
-static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
-			       u64 nr, int nmi,
-			       struct perf_sample_data *data,
-			       struct pt_regs *regs)
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
+	cpuctx = &__get_cpu_var(perf_cpu_context);
 	rcu_read_lock();
 	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
 				 nr, nmi, data, regs);
@@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
 	rcu_read_unlock();
 }
 
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	int *recursion;
-
-	preempt_disable();
-
-	if (perf_swevent_get_recursion_context(&recursion))
-		goto out;
-
-	__do_perf_sw_event(type, event_id, nr, nmi, data, regs);
-
-	perf_swevent_put_recursion_context(recursion);
-out:
-	preempt_enable();
-}
-
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data;
+	int rctx;
+
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		return;
 
 	data.addr = addr;
 	data.raw  = NULL;
 
 	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+
+	perf_swevent_put_recursion_context(rctx);
 }
 
 static void perf_swevent_read(struct perf_event *event)
@@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 		regs = task_pt_regs(current);
 
 	/* Trace events already protected against recursion */
-	__do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
 				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 22e6f68b05b3..79ce6a2bd74f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	unsigned long irq_flags;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
+	int rctx;
 
 	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	 */
 	local_irq_save(irq_flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	__cpu = smp_processor_id();
@@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(irq_flags);
 
@@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
 	char *trace_buf;
-	int *recursion;
 	char *raw_data;
+	int rctx;
 
 	pc = preempt_count();
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	 */
 	local_irq_save(irq_flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	__cpu = smp_processor_id();
@@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(irq_flags);
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 41b6dd963daa..9189cbe86079 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	unsigned long flags;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
 	int syscall_nr;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	cpu = smp_processor_id();
@@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(flags);
 }
@@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	int syscall_nr;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	cpu = smp_processor_id();
@@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From e6db4876575f3fdd5b1df2cbff826df95ab9af6a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 23 Nov 2009 15:42:32 +0100
Subject: hw-breakpoints: Include only linux/perf_event.h from kernel part of
 bp headers

As userspace only needs the breakpoints enum types from the
breakpoints headers.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258987355-8751-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 4659e0c55ea6..76a48ab9a81e 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_HW_BREAKPOINT_H
 #define _LINUX_HW_BREAKPOINT_H
 
-#include <linux/perf_event.h>
-
 enum {
 	HW_BREAKPOINT_LEN_1 = 1,
 	HW_BREAKPOINT_LEN_2 = 2,
@@ -19,6 +17,8 @@ enum {
 #ifdef __KERNEL__
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+#include <linux/perf_event.h>
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
-- 
cgit v1.2.3


From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 6 Aug 2009 15:41:34 -0400
Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load
 averages"

This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad.

This helps in an entirely cached workload but not necessarily in
workloads that require waiting on disk.

Conflicts:

	include/linux/sunrpc/svc.h
	net/sunrpc/svc_xprt.c

Reported-by: Simon Kirby <sim@hostway.ca>
Tested-by: Jesper Krogh <jesper@krogh.cc>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h |  3 ---
 net/sunrpc/svc_xprt.c      | 31 +++++++++----------------------
 2 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 52e8cb0a7569..d1567d627557 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -29,7 +29,6 @@ struct svc_pool_stats {
 	unsigned long	packets;
 	unsigned long	sockets_queued;
 	unsigned long	threads_woken;
-	unsigned long	overloads_avoided;
 	unsigned long	threads_timedout;
 };
 
@@ -50,7 +49,6 @@ struct svc_pool {
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
-	int			sp_nwaking;	/* number of threads woken but not yet active */
 	struct svc_pool_stats	sp_stats;	/* statistics on pool operation */
 } ____cacheline_aligned_in_smp;
 
@@ -284,7 +282,6 @@ struct svc_rqst {
 						 * cache pages */
 	wait_queue_head_t	rq_wait;	/* synchronization */
 	struct task_struct	*rq_task;	/* service thread */
-	int			rq_waking;	/* 1 if thread is being woken */
 };
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index df124f78ee48..2c58b75a236f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -16,8 +16,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-#define SVC_MAX_WAKING 5
-
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
-	int thread_avail;
 
 	if (!(xprt->xpt_flags &
 	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -318,6 +315,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 
 	spin_lock_bh(&pool->sp_lock);
 
+	if (!list_empty(&pool->sp_threads) &&
+	    !list_empty(&pool->sp_sockets))
+		printk(KERN_ERR
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
+
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		/* Don't enqueue dead transports */
 		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
@@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	}
 
  process:
-	/* Work out whether threads are available */
-	thread_avail = !list_empty(&pool->sp_threads);	/* threads are asleep */
-	if (pool->sp_nwaking >= SVC_MAX_WAKING) {
-		/* too many threads are runnable and trying to wake up */
-		thread_avail = 0;
-		pool->sp_stats.overloads_avoided++;
-	}
-
-	if (thread_avail) {
+	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
 				   rq_list);
@@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-		rqstp->rq_waking = 1;
-		pool->sp_nwaking++;
 		pool->sp_stats.threads_woken++;
 		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
@@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EINTR;
 
 	spin_lock_bh(&pool->sp_lock);
-	if (rqstp->rq_waking) {
-		rqstp->rq_waking = 0;
-		pool->sp_nwaking--;
-		BUG_ON(pool->sp_nwaking < 0);
-	}
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
@@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 	struct svc_pool *pool = p;
 
 	if (p == SEQ_START_TOKEN) {
-		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n");
 		return 0;
 	}
 
-	seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+	seq_printf(m, "%u %lu %lu %lu %lu\n",
 		pool->sp_id,
 		pool->sp_stats.packets,
 		pool->sp_stats.sockets_queued,
 		pool->sp_stats.threads_woken,
-		pool->sp_stats.overloads_avoided,
 		pool->sp_stats.threads_timedout);
 
 	return 0;
-- 
cgit v1.2.3


From 475cba4ec8ee6b427cc3567692e6f48dd483c069 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 23 Nov 2009 15:53:52 -0500
Subject: sctp: implement definition for SACK-IMMEDIATELY extension

This patch implement the definition for SACK-IMMEDIATELY
extension.

Section 3.  The I-bit in the DATA Chunk Header

   The following Figure 1 shows the extended DATA chunk.

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |   Type = 0    |  Res  |I|U|B|E|           Length              |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                              TSN                              |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |        Stream Identifier      |     Stream Sequence Number    |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                  Payload Protocol Identifier                  |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   \                                                               \
   /                           User Data                           /
   \                                                               \
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

                                 Figure 1

   The only difference between the DATA chunk in Figure 1 and the DATA
   chunk defined in [RFC4960] is the addition of the I-bit in the flags
   field of the chunk header.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/linux/sctp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index b464b9d3d242..c20d3ce673c0 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -242,6 +242,7 @@ enum {
 	SCTP_DATA_FIRST_FRAG	= 0x02,
 	SCTP_DATA_NOT_FRAG	= 0x03,
 	SCTP_DATA_UNORDERED	= 0x04,
+	SCTP_DATA_SACK_IMM	= 0x08,
 };
 enum { SCTP_DATA_FRAG_MASK = 0x03, };
 
-- 
cgit v1.2.3


From fa7c27ee9394fc0d52404b2a89882e95868a60b9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 23 Nov 2009 22:30:12 +0100
Subject: hw-breakpoints: Fix misordered ifdef

Fix a misplaced ifdef. We need the perf event headers also in
off-case to avoid the following build error:

 include/linux/hw_breakpoint.h:94: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:102: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:109: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:116: error: expected declaration specifiers or '...' before 'perf_callback_t'

Reported-by: Kisskb-bot by Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1259011812-8093-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 76a48ab9a81e..c9f7f7c7b0e0 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -15,10 +15,11 @@ enum {
 };
 
 #ifdef __KERNEL__
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 #include <linux/perf_event.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
-- 
cgit v1.2.3


From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 23 Nov 2009 16:21:30 -0600
Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option

As far as I know, all distros currently ship kernels with default
CONFIG_SECURITY_FILE_CAPABILITIES=y.  Since having the option on
leaves a 'no_file_caps' option to boot without file capabilities,
the main reason to keep the option is that turning it off saves
you (on my s390x partition) 5k.  In particular, vmlinux sizes
came to:

without patch fscaps=n:		 	53598392
without patch fscaps=y:		 	53603406
with this patch applied:		53603342

with the security-next tree.

Against this we must weigh the fact that there is no simple way for
userspace to figure out whether file capabilities are supported,
while things like per-process securebits, capability bounding
sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported
with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for
applications wanting to know whether they can use them and/or why
something failed.

It also adds another subtly different set of semantics which we must
maintain at the risk of severe security regressions.

So this patch removes the SECURITY_FILE_CAPABILITIES compile
option.  It drops the kernel size by about 50k over the stock
SECURITY_FILE_CAPABILITIES=y kernel, by removing the
cap_limit_ptraced_target() function.

Changelog:
	Nov 20: remove cap_limit_ptraced_target() as it's logic
		was ifndef'ed.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan" <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |  2 --
 include/linux/init_task.h  |  4 ---
 kernel/capability.c        |  2 --
 security/Kconfig           |  9 ------
 security/commoncap.c       | 72 ++--------------------------------------------
 5 files changed, 2 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c8f2a5f70ed5..39e5ff512fbe 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -92,9 +92,7 @@ struct vfs_cap_data {
 #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
 #define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 extern int file_caps_enabled;
-#endif
 
 typedef struct kernel_cap_struct {
 	__u32 cap[_KERNEL_CAPABILITY_U32S];
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 21a6f5d9af22..8d10aa7fd4c9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -83,16 +83,12 @@ extern struct group_info init_groups;
 #define INIT_IDS
 #endif
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
  * Because of the reduced scope of CAP_SETPCAP when filesystem
  * capabilities are in effect, it is safe to allow CAP_SETPCAP to
  * be available in the default configuration.
  */
 # define CAP_INIT_BSET  CAP_FULL_SET
-#else
-# define CAP_INIT_BSET  CAP_INIT_EFF_SET
-#endif
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
diff --git a/kernel/capability.c b/kernel/capability.c
index c450375e855f..7f876e60521f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set);
 EXPORT_SYMBOL(__cap_full_set);
 EXPORT_SYMBOL(__cap_init_eff_set);
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 int file_caps_enabled = 1;
 
 static int __init file_caps_disable(char *str)
@@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str)
 	return 1;
 }
 __setup("no_file_caps", file_caps_disable);
-#endif
 
 /*
  * More recent versions of libcap are available from:
diff --git a/security/Kconfig b/security/Kconfig
index 95cc08913ca1..226b9556b25f 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -91,15 +91,6 @@ config SECURITY_PATH
 	  implement pathname based access controls.
 	  If you are unsure how to answer this question, answer N.
 
-config SECURITY_FILE_CAPABILITIES
-	bool "File POSIX Capabilities"
-	default n
-	help
-	  This enables filesystem capabilities, allowing you to give
-	  binaries a subset of root's powers without using setuid 0.
-
-	  If in doubt, answer N.
-
 config INTEL_TXT
 	bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
 	depends on HAVE_INTEL_TXT
diff --git a/security/commoncap.c b/security/commoncap.c
index 45b87af4ae5d..f800fdb3de94 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -173,7 +173,6 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective,
  */
 static inline int cap_inh_is_capped(void)
 {
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
@@ -181,7 +180,6 @@ static inline int cap_inh_is_capped(void)
 	if (cap_capable(current, current_cred(), CAP_SETPCAP,
 			SECURITY_CAP_AUDIT) == 0)
 		return 0;
-#endif
 	return 1;
 }
 
@@ -239,8 +237,6 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
 	bprm->cap_effective = false;
 }
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
-
 /**
  * cap_inode_need_killpriv - Determine if inode change affects privileges
  * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
@@ -421,49 +417,6 @@ out:
 	return rc;
 }
 
-#else
-int cap_inode_need_killpriv(struct dentry *dentry)
-{
-	return 0;
-}
-
-int cap_inode_killpriv(struct dentry *dentry)
-{
-	return 0;
-}
-
-int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
-{
-	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
- 	return -ENODATA;
-}
-
-static inline int get_file_caps(struct linux_binprm *bprm, bool *effective)
-{
-	bprm_clear_caps(bprm);
-	return 0;
-}
-#endif
-
-/*
- * Determine whether a exec'ing process's new permitted capabilities should be
- * limited to just what it already has.
- *
- * This prevents processes that are being ptraced from gaining access to
- * CAP_SETPCAP, unless the process they're tracing already has it, and the
- * binary they're executing has filecaps that elevate it.
- *
- *  Returns 1 if they should be limited, 0 if they are not.
- */
-static inline int cap_limit_ptraced_target(void)
-{
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
-	if (capable(CAP_SETPCAP))
-		return 0;
-#endif
-	return 1;
-}
-
 /**
  * cap_bprm_set_creds - Set up the proposed credentials for execve().
  * @bprm: The execution parameters, including the proposed creds
@@ -523,9 +476,8 @@ skip:
 			new->euid = new->uid;
 			new->egid = new->gid;
 		}
-		if (cap_limit_ptraced_target())
-			new->cap_permitted = cap_intersect(new->cap_permitted,
-							   old->cap_permitted);
+		new->cap_permitted = cap_intersect(new->cap_permitted,
+						   old->cap_permitted);
 	}
 
 	new->suid = new->fsuid = new->euid;
@@ -739,7 +691,6 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
 	return 0;
 }
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
  * Rationale: code calling task_setscheduler, task_setioprio, and
  * task_setnice, assumes that
@@ -820,22 +771,6 @@ static long cap_prctl_drop(struct cred *new, unsigned long cap)
 	return 0;
 }
 
-#else
-int cap_task_setscheduler (struct task_struct *p, int policy,
-			   struct sched_param *lp)
-{
-	return 0;
-}
-int cap_task_setioprio (struct task_struct *p, int ioprio)
-{
-	return 0;
-}
-int cap_task_setnice (struct task_struct *p, int nice)
-{
-	return 0;
-}
-#endif
-
 /**
  * cap_task_prctl - Implement process control functions for this security module
  * @option: The process control function requested
@@ -866,7 +801,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		error = !!cap_raised(new->cap_bset, arg2);
 		goto no_change;
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 	case PR_CAPBSET_DROP:
 		error = cap_prctl_drop(new, arg2);
 		if (error < 0)
@@ -917,8 +851,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		error = new->securebits;
 		goto no_change;
 
-#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
-
 	case PR_GET_KEEPCAPS:
 		if (issecure(SECURE_KEEP_CAPS))
 			error = 1;
-- 
cgit v1.2.3


From 7d6709a20866a885916214590b7c394a21be9e25 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 24 Nov 2009 16:06:48 +1100
Subject: powerpc: Fix build of some FSL platforms

Commit 87ec0e98cfdd8b68da6a7f9e70142ffc0e404fbb in kumar's next branch
broke one of my test configs since it looks like Anton forgot about
that mpc832x_rdb platform which still uses the old style probing for
the SPI stuff.

I'll let them do a cleaner fix that probably involves changing the
probing method and getting rid of the platform device but for now
this will do to fix it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +-
 drivers/spi/spi_mpc8xxx.c                 | 5 -----
 include/linux/fsl_devices.h               | 5 +++++
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 567ded7c3b9b..17f99745f0e4 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -74,7 +74,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
 
 		prop = of_get_property(np, "mode", NULL);
 		if (prop && !strcmp(prop, "cpu-qe"))
-			pdata.qe_mode = 1;
+			pdata.flags = SPI_QE_CPU_MODE;
 
 		for (j = 0; j < num_board_infos; j++) {
 			if (board_infos[j].bus_num == pdata.bus_num)
diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c
index 394b6581e17f..930135dc73ba 100644
--- a/drivers/spi/spi_mpc8xxx.c
+++ b/drivers/spi/spi_mpc8xxx.c
@@ -163,11 +163,6 @@ struct mpc8xxx_spi {
 	u32 tx_shift;		/* TX data reg shift when in qe mode */
 
 	unsigned int flags;
-#define SPI_QE_CPU_MODE		(1 << 0) /* QE CPU ("PIO") mode */
-#define SPI_CPM_MODE		(1 << 1) /* CPM/QE ("DMA") mode */
-#define SPI_CPM1		(1 << 2) /* SPI unit is in CPM1 block */
-#define SPI_CPM2		(1 << 3) /* SPI unit is in CPM2 block */
-#define SPI_QE			(1 << 4) /* SPI unit is in QE block */
 
 	struct workqueue_struct *workqueue;
 	struct work_struct work;
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 47188d512b8f..28e33fea5107 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -75,6 +75,11 @@ struct fsl_spi_platform_data {
 	u32 	initial_spmode;	/* initial SPMODE value */
 	s16	bus_num;
 	unsigned int flags;
+#define SPI_QE_CPU_MODE		(1 << 0) /* QE CPU ("PIO") mode */
+#define SPI_CPM_MODE		(1 << 1) /* CPM/QE ("DMA") mode */
+#define SPI_CPM1		(1 << 2) /* SPI unit is in CPM1 block */
+#define SPI_CPM2		(1 << 3) /* SPI unit is in CPM2 block */
+#define SPI_QE			(1 << 4) /* SPI unit is in QE block */
 	/* board specific information */
 	u16	max_chipselect;
 	void	(*cs_control)(struct spi_device *spi, bool on);
-- 
cgit v1.2.3


From b57102841846d9840dcb1b8b308f6d7369b8e5c5 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Mon, 28 Sep 2009 21:10:11 +0200
Subject: UBI: Add ubi_open_volume_path

Add an 'ubi_open_volume_path(path, mode)' function which works like
'open_bdev_exclusive(path, mode, ...)' where path is the special file
representing the UBI volume, typically /dev/ubi0_0.

This is needed to teach UBIFS being able to mount UBI character devices.

[Comments and the patch were amended a bit by Artem]

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/kapi.c  | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/ubi.h |  2 ++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 88a72e9c8beb..277786ebaa2c 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -22,6 +22,8 @@
 
 #include <linux/module.h>
 #include <linux/err.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
 #include <asm/div64.h>
 #include "ubi.h"
 
@@ -279,6 +281,44 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
 }
 EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
 
+/**
+ * ubi_open_volume_path - open UBI volume by its character device node path.
+ * @pathname: volume character device node path
+ * @mode: open mode
+ *
+ * This function is similar to 'ubi_open_volume()', but opens a volume the path
+ * to its character device node.
+ */
+struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
+{
+	int error, ubi_num, vol_id;
+	struct ubi_volume_desc *ret;
+	struct inode *inode;
+	struct path path;
+
+	dbg_gen("open volume %s, mode %d", pathname, mode);
+
+	if (!pathname || !*pathname)
+		return ERR_PTR(-EINVAL);
+
+	error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+	if (error)
+		return ERR_PTR(error);
+
+	inode = path.dentry->d_inode;
+	ubi_num = ubi_major2num(imajor(inode));
+	vol_id = iminor(inode) - 1;
+
+	if (vol_id >= 0 && ubi_num >= 0)
+		ret = ubi_open_volume(ubi_num, vol_id, mode);
+	else
+		ret = ERR_PTR(-ENODEV);
+
+	path_put(&path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ubi_open_volume_path);
+
 /**
  * ubi_close_volume - close UBI volume.
  * @desc: volume descriptor
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 6913b71d9ab2..b31bd9e9bca3 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -174,6 +174,8 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc,
 struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode);
 struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
 					   int mode);
+struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode);
+
 int ubi_register_volume_notifier(struct notifier_block *nb,
 				 int ignore_existing);
 int ubi_unregister_volume_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From c9286b7e293a1ea054e857ff3f5a23d0ad8d4f36 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 19:50:38 +0100
Subject: locking: Remove unused prototype

commit 910067d1(remove generic__raw_read_trylock()) removed the
implementation but left the prototype around. Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/spinlock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index f0ca7a7a1757..faf1482028df 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -79,8 +79,6 @@
  */
 #include <linux/spinlock_types.h>
 
-extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
-
 /*
  * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
  */
-- 
cgit v1.2.3


From a49ed0bf427a8328a3296eebedc7697fe5098dbf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 19:57:50 +0100
Subject: locking: Use __[SPIN|RW]_LOCK_UNLOCKED in [spin|rw]_lock_init()

SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. Replace them
with the __*_LOCK_UNLOCKED variants.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index faf1482028df..71dccfeb0d88 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -100,7 +100,7 @@ do {								\
 
 #else
 # define spin_lock_init(lock)					\
-	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
+	do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -114,7 +114,7 @@ do {								\
 } while (0)
 #else
 # define rwlock_init(lock)					\
-	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+	do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
-- 
cgit v1.2.3


From ad85dfe67bbf13d5fa20764e4ce801a1e6e526d8 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 18 Nov 2009 15:52:51 +0100
Subject: DRBD: Now the code is 8.3.6 + 3 fixes (without compat crap)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 18942ad115d9..99a4d76694ed 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.5"
+#define REL_VERSION "8.3.6"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From e2f74f355e9e2914483db10c05d70e69e0b7ae04 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Thu, 19 Nov 2009 12:31:01 +0100
Subject: [ACPI/CPUFREQ] Introduce bios_limit per cpu cpufreq sysfs interface

This interface is mainly intended (and implemented) for ACPI _PPC BIOS
frequency limitations, but other cpufreq drivers can also use it for
similar use-cases.

Why is this needed:

Currently it's not obvious why cpufreq got limited.
People see cpufreq/scaling_max_freq reduced, but this could have
happened by:
  - any userspace prog writing to scaling_max_freq
  - thermal limitations
  - hardware (_PPC in ACPI case) limitiations

Therefore export bios_limit (in kHz) to:
  - Point the user that it's the BIOS (broken or intended) which limits
    frequency
  - Export it as a sysfs interface for userspace progs.
    While this was a rarely used feature on laptops, there will appear
    more and more server implemenations providing "Green IT" features like
    allowing the service processor to limit the frequency. People want
    to know about HW/BIOS frequency limitations.

All ACPI P-state driven cpufreq drivers are covered with this patch:
  - powernow-k8
  - powernow-k7
  - acpi-cpufreq

Tested with a patched DSDT which limits the first two cores (_PPC returns 1)
via _PPC, exposed by bios_limit:
# echo 2200000 >cpu2/cpufreq/scaling_max_freq
# cat cpu*/cpufreq/scaling_max_freq
2600000
2600000
2200000
2200000
# #scaling_max_freq shows general user/thermal/BIOS limitations

# cat cpu*/cpufreq/bios_limit
2600000
2600000
2800000
2800000
# #bios_limit only shows the HW/BIOS limitation

CC: Pallipadi Venkatesh <venkatesh.pallipadi@intel.com>
CC: Len Brown <lenb@kernel.org>
CC: davej@codemonkey.org.uk
CC: linux@dominikbrodowski.net

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 Documentation/cpu-freq/user-guide.txt      | 11 +++++++++++
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 17 +++++++++--------
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c  | 19 +++++++++++--------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c  | 17 +++++++++--------
 drivers/acpi/processor_perflib.c           | 13 +++++++++++++
 drivers/cpufreq/cpufreq.c                  | 21 +++++++++++++++++++++
 include/acpi/processor.h                   |  6 ++++++
 include/linux/cpufreq.h                    |  1 +
 8 files changed, 81 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 2a5b850847c0..04f6b32993e6 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -203,6 +203,17 @@ scaling_cur_freq :		Current frequency of the CPU as determined by
 				the frequency the kernel thinks the CPU runs
 				at.
 
+bios_limit :			If the BIOS tells the OS to limit a CPU to
+				lower frequencies, the user can read out the
+				maximum available frequency from this file.
+				This typically can happen through (often not
+				intended) BIOS settings, restrictions
+				triggered through a service processor or other
+				BIOS/HW based implementations.
+				This does not cover thermal ACPI limitations
+				which can be detected through the generic
+				thermal driver.
+
 If you have selected the "userspace" governor which allows you to
 set the CPU operating frequency to a specific value, you can read out
 the current frequency in
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b581d3905cb..d2e7c77c1ea4 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
 };
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
-	.verify = acpi_cpufreq_verify,
-	.target = acpi_cpufreq_target,
-	.init = acpi_cpufreq_cpu_init,
-	.exit = acpi_cpufreq_cpu_exit,
-	.resume = acpi_cpufreq_resume,
-	.name = "acpi-cpufreq",
-	.owner = THIS_MODULE,
-	.attr = acpi_cpufreq_attr,
+	.verify		= acpi_cpufreq_verify,
+	.target		= acpi_cpufreq_target,
+	.bios_limit	= acpi_processor_get_bios_limit,
+	.init		= acpi_cpufreq_cpu_init,
+	.exit		= acpi_cpufreq_cpu_exit,
+	.resume		= acpi_cpufreq_resume,
+	.name		= "acpi-cpufreq",
+	.owner		= THIS_MODULE,
+	.attr		= acpi_cpufreq_attr,
 };
 
 static int __init acpi_cpufreq_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index d47c775eb0ab..9a97116f89e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = {
 };
 
 static struct cpufreq_driver powernow_driver = {
-	.verify	= powernow_verify,
-	.target	= powernow_target,
-	.get	= powernow_get,
-	.init	= powernow_cpu_init,
-	.exit	= powernow_cpu_exit,
-	.name	= "powernow-k7",
-	.owner	= THIS_MODULE,
-	.attr	= powernow_table_attr,
+	.verify		= powernow_verify,
+	.target		= powernow_target,
+	.get		= powernow_get,
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+	.bios_limit	= acpi_processor_get_bios_limit,
+#endif
+	.init		= powernow_cpu_init,
+	.exit		= powernow_cpu_exit,
+	.name		= "powernow-k7",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_table_attr,
 };
 
 static int __init powernow_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f30d25383940..a9df9441a9a2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1398,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = {
 };
 
 static struct cpufreq_driver cpufreq_amd64_driver = {
-	.verify = powernowk8_verify,
-	.target = powernowk8_target,
-	.init = powernowk8_cpu_init,
-	.exit = __devexit_p(powernowk8_cpu_exit),
-	.get = powernowk8_get,
-	.name = "powernow-k8",
-	.owner = THIS_MODULE,
-	.attr = powernow_k8_attr,
+	.verify		= powernowk8_verify,
+	.target		= powernowk8_target,
+	.bios_limit	= acpi_processor_get_bios_limit,
+	.init		= powernowk8_cpu_init,
+	.exit		= __devexit_p(powernowk8_cpu_exit),
+	.get		= powernowk8_get,
+	.name		= "powernow-k8",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_k8_attr,
 };
 
 /* driver entry point for init */
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 8ba0ed0b9ddb..01e366d2b6fb 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -167,6 +167,19 @@ int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
 		return cpufreq_update_policy(pr->id);
 }
 
+int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
+{
+	struct acpi_processor *pr;
+
+	pr = per_cpu(processors, cpu);
+	if (!pr || !pr->performance || !pr->performance->state_count)
+		return -ENODEV;
+	*limit = pr->performance->states[pr->performance_platform_limit].
+		core_frequency * 1000;
+	return 0;
+}
+EXPORT_SYMBOL(acpi_processor_get_bios_limit);
+
 void acpi_processor_ppc_init(void)
 {
 	if (!cpufreq_register_notifier
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5b9b1c8c4950..f20668c09ce0 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -647,6 +647,21 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
 	return policy->governor->show_setspeed(policy, buf);
 }
 
+/**
+ * show_scaling_driver - show the current cpufreq HW/BIOS limitation
+ */
+static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
+{
+	unsigned int limit;
+	int ret;
+	if (cpufreq_driver->bios_limit) {
+		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+		if (!ret)
+			return sprintf(buf, "%u\n", limit);
+	}
+	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
+}
+
 #define define_one_ro(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0444, show_##_name, NULL)
@@ -666,6 +681,7 @@ define_one_ro(cpuinfo_transition_latency);
 define_one_ro(scaling_available_governors);
 define_one_ro(scaling_driver);
 define_one_ro(scaling_cur_freq);
+define_one_ro(bios_limit);
 define_one_ro(related_cpus);
 define_one_ro(affected_cpus);
 define_one_rw(scaling_min_freq);
@@ -905,6 +921,11 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 		if (ret)
 			goto err_out_kobj_put;
 	}
+	if (cpufreq_driver->bios_limit) {
+		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
+		if (ret)
+			goto err_out_kobj_put;
+	}
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	for_each_cpu(j, policy->cpus) {
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 740ac3ad8fd0..8b668ead6d6e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -295,6 +295,7 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
 void acpi_processor_ppc_init(void);
 void acpi_processor_ppc_exit(void);
 int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
+extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit);
 #else
 static inline void acpi_processor_ppc_init(void)
 {
@@ -316,6 +317,11 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
 	}
 	return 0;
 }
+static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
+{
+	return -ENODEV;
+}
+
 #endif				/* CONFIG_CPU_FREQ */
 
 /* in processor_throttling.c */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 79a2340d83cd..4de02b10007f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -232,6 +232,7 @@ struct cpufreq_driver {
 	/* optional */
 	unsigned int (*getavg)	(struct cpufreq_policy *policy,
 				 unsigned int cpu);
+	int	(*bios_limit)	(int cpu, unsigned int *limit);
 
 	int	(*exit)		(struct cpufreq_policy *policy);
 	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
-- 
cgit v1.2.3


From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 18 Nov 2009 20:27:27 -0500
Subject: tracing: Create new TRACE_EVENT_TEMPLATE

There are some places in the kernel that define several tracepoints and
they are all identical besides the name. The code to enable, disable and
record is created for every trace point even if most of the code is
identical.

This patch adds TRACE_EVENT_TEMPLATE that lets the developer create
a template TRACE_EVENT and create trace points with DEFINE_EVENT, which
is based off of a given template. Each trace point used by this
will share most of the code, and bring down the size of the kernel
when there are several duplicate events.

Usage is:

TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print);

Which would be the same as defining a normal TRACE_EVENT.

To create the trace events that the trace points will use:

DEFINE_EVENT(template, name, proto, args) is done. The template
is the name of the TRACE_EVENT_TEMPLATE to use. The name is the
name of the trace point. The parameters proto and args must be the same
as the proto and args of the template. If they are not the same,
then a compile error will result. I tried hard removing this duplication
but the C preprocessor is not powerful enough (or my CPP magic
experience points is not at a high enough level) to not need them.

A lot of trace events are coming in with new XFS development. Most of
the trace points are identical except for the name. The following shows
the advantage of having TRACE_EVENT_TEMPLATE:

$ size fs/xfs/xfs.o.*
    text          data     bss     dec     hex filename
  452114          2788    3520  458422   6feb6 fs/xfs/xfs.o.old
  638482         38116    3744  680342   a6196 fs/xfs/xfs.o.template
  996954         38116    4480 1039550   fdcbe fs/xfs/xfs.o.trace

xfs.o.old is without any tracepoints.
xfs.o.template uses the new TRACE_EVENT_TEMPLATE.
xfs.o.trace uses the current TRACE_EVENT macros.

Requested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |   4 ++
 include/trace/define_trace.h |   6 ++
 include/trace/ftrace.h       | 149 +++++++++++++++++++++++++++++++------------
 3 files changed, 117 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2aac8a83e89b..88a5b5a809ec 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void)
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
+#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args)		\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 2a4b3bf74033..244985814a43 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -31,6 +31,10 @@
 		assign, print, reg, unreg)			\
 	DEFINE_TRACE_FN(name, reg, unreg)
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args) \
+	DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -63,6 +67,8 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_TEMPLATE
+#undef DEFINE_EVENT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c3417c13e3ed..2969f65d8002 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,6 +18,26 @@
 
 #include <linux/ftrace_event.h>
 
+/*
+ * TRACE_EVENT_TEMPLATE can be used to add a generic function
+ * handlers for events. That is, if all events have the same
+ * parameters and just have distinct trace points.
+ * Each tracepoint can be defined with DEFINE_EVENT and that
+ * will map the TRACE_EVENT_TEMPLATE to the tracepoint.
+ *
+ * TRACE_EVENT is a one to one mapping between tracepoint and template.
+ */
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+	TRACE_EVENT_TEMPLATE(name,			       \
+			     PARAMS(proto),		       \
+			     PARAMS(args),		       \
+			     PARAMS(tstruct),		       \
+			     PARAMS(assign),		       \
+			     PARAMS(print));		       \
+	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
+
 #undef __field
 #define __field(type, item)		type	item;
 
@@ -36,13 +56,15 @@
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-		char			__data[0];		\
-	};							\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {					\
+		struct trace_entry	ent;				\
+		tstruct							\
+		char			__data[0];			\
+	};
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)	\
 	static struct ftrace_event_call event_##name
 
 #undef __cpparg
@@ -89,12 +111,15 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -170,8 +195,8 @@
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_format_##call(struct ftrace_event_call *unused,			\
 		      struct trace_seq *s)				\
@@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 	return ret;							\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 static enum print_line_t						\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+ftrace_raw_output_id_##call(int event_id, const char *name,		\
+			    struct trace_iterator *iter, int flags)	\
 {									\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
@@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	entry = iter->ent;						\
 									\
-	if (entry->type != event_##call.id) {				\
+	if (entry->type != event_id) {					\
 		WARN_ON_ONCE(1);					\
 		return TRACE_TYPE_UNHANDLED;				\
 	}								\
@@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
-	ret = trace_seq_printf(s, #call ": " print);			\
+	ret = trace_seq_printf(s, "%s: ", name);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
 	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
 	return TRACE_TYPE_HANDLED;					\
 }
-	
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
+static enum print_line_t						\
+ftrace_raw_output_##name(struct trace_iterator *iter, int flags)	\
+{									\
+	return ftrace_raw_output_id_##template(event_##name.id,		\
+					       #name, iter, flags);	\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
@@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	return ret;							\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	__data_size += (len) * sizeof(type);
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 static inline int ftrace_get_offsets_##call(				\
 	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {									\
@@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call(				\
 	return __data_size;						\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call(				\
  *
  */
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
 									\
-static void ftrace_profile_##call(proto);				\
+static void ftrace_profile_##name(proto);				\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
+static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\
 {									\
-	return register_trace_##call(ftrace_profile_##call);		\
+	return register_trace_##name(ftrace_profile_##name);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
+static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 {									\
-	unregister_trace_##call(ftrace_profile_##call);			\
+	unregister_trace_##name(ftrace_profile_##name);			\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-									\
-static struct ftrace_event_call event_##call;				\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 									\
-static void ftrace_raw_event_##call(proto)				\
+static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
+				       proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	struct ring_buffer *buffer;					\
@@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto)				\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
 	event = trace_current_buffer_lock_reserve(&buffer,		\
-				 event_##call.id,			\
+				 event_call->id,			\
 				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
@@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto)				\
 	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
 		trace_nowake_buffer_unlock_commit(buffer,		\
 						  event, irq_flags, pc); \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	ftrace_raw_event_id_##template(&event_##call, args);		\
 }									\
 									\
 static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
@@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.raw_init		= ftrace_raw_init_event_##call,		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
+	.show_format		= ftrace_format_##template,		\
+	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
@@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-static void ftrace_profile_##call(proto)				\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+static void								\
+ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
+			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	extern int perf_swevent_get_recursion_context(void);		\
 	extern void perf_swevent_put_recursion_context(int rctx);	\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
@@ -789,6 +845,15 @@ end_recursion:								\
 									\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)		\
+static void ftrace_profile_##call(proto)			\
+{								\
+	struct ftrace_event_call *event_call = &event_##call;	\
+								\
+	ftrace_profile_templ_##template(event_call, args);	\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
-- 
cgit v1.2.3


From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 18 Nov 2009 20:36:26 -0500
Subject: tracing: Create new DEFINE_EVENT_PRINT

After creating the TRACE_EVENT_TEMPLATE I started to look at other
trace points to see what duplication was made. I noticed that there
are several trace points where they are almost identical except for
the name and the output format. Since TRACE_EVENT_TEMPLATE was successful
in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT.

DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the
DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the
developer to overwrite the print format. If there are two or more
TRACE_EVENTS that are identical except for the name and print, then
they can be converted to use a TRACE_EVENT_TEMPLATE. Since the
TRACE_EVENT_TEMPLATE already does the print output, the first trace event
would have its print format held in the TRACE_EVENT_TEMPLATE and
be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT
and override the print format.

Converting the sched trace points to both DEFINE_EVENT and
DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were
converted to DEFINE_EVENT_PRINT.

I was able to get the following:

$ size kernel/sched.o-*
   text	   data	    bss	    dec	    hex	filename
  79299	   6776	   2520	  88595	  15a13	kernel/sched.o-notrace
 101941	  11896	   2584	 116421	  1c6c5	kernel/sched.o-templ
 104779	  11896	   2584	 119259	  1d1db	kernel/sched.o-trace

sched.o-notrace is the scheduler compiled with no trace points.
sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT
sched.o-trace is the current trace events.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |   2 +
 include/trace/define_trace.h |   5 ++
 include/trace/ftrace.h       | 123 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 126 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 88a5b5a809ec..7063383cca13 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void)
 #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
 #define DEFINE_EVENT(template, name, proto, args)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 244985814a43..5d7d855ae21e 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -35,6 +35,10 @@
 #define DEFINE_EVENT(template, name, proto, args) \
 	DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -69,6 +73,7 @@
 #undef TRACE_EVENT_FN
 #undef TRACE_EVENT_TEMPLATE
 #undef DEFINE_EVENT
+#undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 2969f65d8002..b0461772bc8d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -67,6 +67,10 @@
 #define DEFINE_EVENT(template, name, proto, args)	\
 	static struct ftrace_event_call event_##name
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef __cpparg
 #define __cpparg(arg...) arg
 
@@ -120,6 +124,10 @@
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -198,15 +206,28 @@
 #undef TRACE_EVENT_TEMPLATE
 #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
-ftrace_format_##call(struct ftrace_event_call *unused,			\
-		      struct trace_seq *s)				\
+ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
+			   struct trace_seq *s)				\
 {									\
 	struct ftrace_raw_##call field __attribute__((unused));		\
 	int ret = 0;							\
 									\
 	tstruct;							\
 									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
+	return ret;							\
+}									\
+									\
+static int								\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		     struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##call(unused, s);			\
+	if (!ret)							\
+		return ret;						\
+									\
+	ret = trace_seq_printf(s, "\nprint fmt: " print);		\
 									\
 	return ret;							\
 }
@@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)		\
+static int								\
+ftrace_format_##name(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##template(unused, s);		\
+	if (!ret)							\
+		return ret;						\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags)	\
 					       #name, iter, flags);	\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
+static enum print_line_t						\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##template *field;				\
+	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
+	ret = trace_seq_printf(s, "%s: ", #call);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
+	put_cpu();							\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call(				\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 	unregister_trace_##name(ftrace_profile_##name);			\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #endif
@@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 	event_##call.id = id;						\
 	INIT_LIST_HEAD(&event_##call.fields);				\
 	return 0;							\
-}									\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
 									\
 static struct ftrace_event_call __used					\
 __attribute__((__aligned__(4)))						\
@@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##template,	\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto)			\
 	ftrace_profile_templ_##template(event_call, args);	\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
-- 
cgit v1.2.3


From d7b7e60526d54da4c94afe5f137714cee7d05c41 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 11 Nov 2009 14:29:54 +0900
Subject: PCI: introduce pci_pcie_cap()

Introduce pci_pcie_cap() API that returns saved PCIe capability offset
(currently it is saved in 'pcie_cap' field in the struct PCI dev).
Using pci_pcie_cap() instead of pci_find_capability() avoids
unnecessary search in PCI configuration space.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 233b3a092035..15f37f102dd3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1301,5 +1301,21 @@ extern void pci_hp_create_module_link(struct pci_slot *pci_slot);
 extern void pci_hp_remove_module_link(struct pci_slot *pci_slot);
 #endif
 
+/**
+ * pci_pcie_cap - get the saved PCIe capability offset
+ * @dev: PCI device
+ *
+ * PCIe capability offset is calculated at PCI device initialization
+ * time and saved in the data structure. This function returns saved
+ * PCIe capability offset. Using this instead of pci_find_capability()
+ * reduces unnecessary search in the PCI configuration space. If you
+ * need to calculate PCIe capability offset from raw device for some
+ * reasons, please use pci_find_capability() instead.
+ */
+static inline int pci_pcie_cap(struct pci_dev *dev)
+{
+	return dev->pcie_cap;
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 7eb776c42e75d17bd8107a1359068d8c742639d1 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 11 Nov 2009 14:35:22 +0900
Subject: PCI: introduce pci_is_pcie()

Introduce pci_is_pcie() which returns true if the specified PCI device
is PCI Express capable, false otherwise.

The purpose of pci_is_pcie() is removing 'is_pcie' flag in the struct
pci_dev, which is not needed because we can check it using 'pcie_cap'
field. To remove 'is_pcie', we need to update user of 'is_pcie' to use
pci_is_pcie() instead first.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15f37f102dd3..2891c3d3e51a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1317,5 +1317,16 @@ static inline int pci_pcie_cap(struct pci_dev *dev)
 	return dev->pcie_cap;
 }
 
+/**
+ * pci_is_pcie - check if the PCI device is PCI Express capable
+ * @dev: PCI device
+ *
+ * Retrun true if the PCI device is PCI Express capable, false otherwise.
+ */
+static inline bool pci_is_pcie(struct pci_dev *dev)
+{
+	return !!pci_pcie_cap(dev);
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 3b034b0d084221596bf35c8d893e1d4d5477b9cc Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Tue, 24 Nov 2009 15:50:03 +0900
Subject: percpu: Fix kdump failure if booted with percpu_alloc=page

o kdump functionality reserves a per cpu area at boot time and exports the
  physical address of that area to user space through sys interface. This
  area stores some dump related information like cpu register states etc
  at the time of crash.

o We were assuming that per cpu area always come from linearly mapped meory
  region and using __pa() to determine physical address.
  With percpu_alloc=page, per cpu area can come from vmalloc region also and
  __pa() breaks.

o This patch implments a new function to convert per cpu address to
  physical address.

Before the patch, crash_notes addresses looked as follows.

cpu0 60fffff49800
cpu1 60fffff60800
cpu2 60fffff77800

These are bogus phsyical addresses.

After the patch, address are following.

cpu0 13eb44000
cpu1 13eb43000
cpu2 13eb42000
cpu3 13eb41000

These look fine. I got 4G of memory and /proc/iomem tell me following.

100000000-13fffffff : System RAM

tj: * added missing asm/io.h include reported by Stephen Rothwell
    * repositioned per_cpu_ptr_phys() in percpu.c and added comment.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/base/cpu.c     |  2 +-
 include/linux/percpu.h |  1 +
 mm/percpu.c            | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index e62a4ccea54d..69ee5b7517ec 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -97,7 +97,7 @@ static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute
 	 * boot up and this data does not change there after. Hence this
 	 * operation should be safe. No locking required.
 	 */
-	addr = __pa(per_cpu_ptr(crash_notes, cpunum));
+	addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
 	rc = sprintf(buf, "%Lx\n", addr);
 	return rc;
 }
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 878836ca999c..6ac984fa34f8 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -154,6 +154,7 @@ struct percpu_data {
 
 extern void *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void *__pdata);
+extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
diff --git a/mm/percpu.c b/mm/percpu.c
index 5adfc268b408..008fbd9e6fa4 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -74,6 +74,7 @@
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
+#include <asm/io.h>
 
 #define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
@@ -1302,6 +1303,27 @@ void free_percpu(void *ptr)
 }
 EXPORT_SYMBOL_GPL(free_percpu);
 
+/**
+ * per_cpu_ptr_to_phys - convert translated percpu address to physical address
+ * @addr: the address to be converted to physical address
+ *
+ * Given @addr which is dereferenceable address obtained via one of
+ * percpu access macros, this function translates it into its physical
+ * address.  The caller is responsible for ensuring @addr stays valid
+ * until this function finishes.
+ *
+ * RETURNS:
+ * The physical address for @addr.
+ */
+phys_addr_t per_cpu_ptr_to_phys(void *addr)
+{
+	if ((unsigned long)addr < VMALLOC_START ||
+			(unsigned long)addr >= VMALLOC_END)
+		return __pa(addr);
+	else
+		return page_to_phys(vmalloc_to_page(addr));
+}
+
 static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 					size_t reserved_size,
 					ssize_t *dyn_sizep)
-- 
cgit v1.2.3


From 35a8a3fdcd4f973a5430e868f2f2a5c363803a5b Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 25 Nov 2009 17:50:00 +0100
Subject: drbd: moved CN_IDX_DRBD and CN_VAL_DRBD to the right file

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/connector.h | 2 ++
 include/linux/drbd.h      | 7 -------
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/connector.h b/include/linux/connector.h
index 3a14615fd35c..72ba63eb83c5 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -43,6 +43,8 @@
 #define CN_DST_VAL			0x1
 #define CN_IDX_DM			0x7	/* Device Mapper */
 #define CN_VAL_DM_USERSPACE_LOG		0x1
+#define CN_IDX_DRBD			0x8
+#define CN_VAL_DRBD			0x1
 
 #define CN_NETLINK_USERS		8
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 99a4d76694ed..e84f4733cb55 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -322,13 +322,6 @@ enum drbd_timeout_flag {
 #define DRBD_NL_CREATE_DEVICE 0x01
 #define DRBD_NL_SET_DEFAULTS  0x02
 
-/* The following line should be moved over to linux/connector.h
- * when the time comes */
-#ifndef CN_IDX_DRBD
-# define CN_IDX_DRBD			0x4
-/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */
-#endif
-#define CN_VAL_DRBD			0x1
 
 /* For searching a vacant cn_idx value */
 #define CN_IDX_STEP			6977
-- 
cgit v1.2.3


From 4e242d1616781f9f1f0b01abf878700b259cd8b5 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 25 Nov 2009 00:29:51 +0000
Subject: xfrm: Define new XFRM netlink auth attribute with specified
 truncation bits

The new XFRMA_ALG_AUTH_TRUNC attribute taking a xfrm_algo_auth as
argument allows the installation of authentication algorithms with
a truncation length specified in userspace, i.e. SHA256 with 128 bit
instead of 96 bit truncation.

Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 3246f0e66bcc..29e04beb1fc9 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -90,6 +90,13 @@ struct xfrm_algo {
 	char		alg_key[0];
 };
 
+struct xfrm_algo_auth {
+	char		alg_name[64];
+	unsigned int	alg_key_len;    /* in bits */
+	unsigned int	alg_trunc_len;  /* in bits */
+	char		alg_key[0];
+};
+
 struct xfrm_algo_aead {
 	char		alg_name[64];
 	unsigned int	alg_key_len;	/* in bits */
@@ -274,6 +281,7 @@ enum xfrm_attr_type_t {
 	XFRMA_MIGRATE,
 	XFRMA_ALG_AEAD,		/* struct xfrm_algo_aead */
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
+	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
-- 
cgit v1.2.3


From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 26 Nov 2009 09:04:55 +0100
Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS()

It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE
does: does it define an event as well beyond defining a template?

To clarify this, rename it to DECLARE_EVENT_CLASS, which follows
the various 'DECLARE_*()' idioms we already have in the kernel:

  DECLARE_EVENT_CLASS(class)

    DEFINE_EVENT(class, event1)
    DEFINE_EVENT(class, event2)
    DEFINE_EVENT(class, event3)

To complete this logic we should also rename TRACE_EVENT() to:

  DEFINE_SINGLE_EVENT(single_event)

... but in a more quiet moment of the kernel cycle.

Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h   |  2 +-
 include/trace/define_trace.h |  2 +-
 include/trace/events/sched.h |  6 +++---
 include/trace/ftrace.h       | 46 ++++++++++++++++++++++----------------------
 4 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7063383cca13..f59604ed0ec6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void)
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
-#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
 #define DEFINE_EVENT(template, name, proto, args)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 5d7d855ae21e..5acfb1eb4df9 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -71,7 +71,7 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
-#undef TRACE_EVENT_TEMPLATE
+#undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 238f74b58486..5ce795021851 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task,
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT_TEMPLATE(sched_wakeup_template,
+DECLARE_EVENT_CLASS(sched_wakeup_template,
 
 	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
 
@@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
 
-TRACE_EVENT_TEMPLATE(sched_process_template,
+DECLARE_EVENT_CLASS(sched_process_template,
 
 	TP_PROTO(struct task_struct *p),
 
@@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send,
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-TRACE_EVENT_TEMPLATE(sched_stat_template,
+DECLARE_EVENT_CLASS(sched_stat_template,
 
 	TP_PROTO(struct task_struct *tsk, u64 delay),
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b0461772bc8d..2c9c073e45ad 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -19,17 +19,17 @@
 #include <linux/ftrace_event.h>
 
 /*
- * TRACE_EVENT_TEMPLATE can be used to add a generic function
+ * DECLARE_EVENT_CLASS can be used to add a generic function
  * handlers for events. That is, if all events have the same
  * parameters and just have distinct trace points.
  * Each tracepoint can be defined with DEFINE_EVENT and that
- * will map the TRACE_EVENT_TEMPLATE to the tracepoint.
+ * will map the DECLARE_EVENT_CLASS to the tracepoint.
  *
  * TRACE_EVENT is a one to one mapping between tracepoint and template.
  */
 #undef TRACE_EVENT
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
-	TRACE_EVENT_TEMPLATE(name,			       \
+	DECLARE_EVENT_CLASS(name,			       \
 			     PARAMS(proto),		       \
 			     PARAMS(args),		       \
 			     PARAMS(tstruct),		       \
@@ -56,8 +56,8 @@
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)	\
 	struct ftrace_raw_##name {					\
 		struct trace_entry	ent;				\
 		tstruct							\
@@ -115,8 +115,8 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
@@ -203,8 +203,8 @@
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
 			   struct trace_seq *s)				\
@@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static enum print_line_t						\
 ftrace_raw_output_id_##call(int event_id, const char *name,		\
 			    struct trace_iterator *iter, int flags)	\
@@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
@@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline int ftrace_get_offsets_##call(				\
 	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {									\
@@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call(				\
  *
  */
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)			\
@@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 									\
 static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
 				       proto)				\
@@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
@@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static void								\
 ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			    proto)					\
-- 
cgit v1.2.3


From 2d4dc890b5c8fabd818a8586607e6843c4375e62 Mon Sep 17 00:00:00 2001
From: Ilya Loginov <isloginov@gmail.com>
Date: Thu, 26 Nov 2009 09:16:19 +0100
Subject: block: add helpers to run flush_dcache_page() against a bio and a
 request's pages

Mtdblock driver doesn't call flush_dcache_page for pages in request.  So,
this causes problems on architectures where the icache doesn't fill from
the dcache or with dcache aliases.  The patch fixes this.

The ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE symbol was introduced to avoid
pointless empty cache-thrashing loops on architectures for which
flush_dcache_page() is a no-op.  Every architecture was provided with this
flush pages on architectires where ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE is
equal 1 or do nothing otherwise.

See "fix mtd_blkdevs problem with caches on some architectures" discussion
on LKML for more information.

Signed-off-by: Ilya Loginov <isloginov@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Peter Horton <phorton@bitbox.co.uk>
Cc: "Ed L. Cashin" <ecashin@coraid.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/alpha/include/asm/cacheflush.h      |  1 +
 arch/arm/include/asm/cacheflush.h        |  1 +
 arch/avr32/include/asm/cacheflush.h      |  1 +
 arch/blackfin/include/asm/cacheflush.h   |  2 ++
 arch/cris/include/asm/cacheflush.h       |  1 +
 arch/frv/include/asm/cacheflush.h        |  1 +
 arch/h8300/include/asm/cacheflush.h      |  1 +
 arch/ia64/include/asm/cacheflush.h       |  1 +
 arch/m32r/include/asm/cacheflush.h       |  3 +++
 arch/m68k/include/asm/cacheflush_mm.h    |  1 +
 arch/m68k/include/asm/cacheflush_no.h    |  1 +
 arch/microblaze/include/asm/cacheflush.h |  1 +
 arch/mips/include/asm/cacheflush.h       |  1 +
 arch/mn10300/include/asm/cacheflush.h    |  1 +
 arch/parisc/include/asm/cacheflush.h     |  1 +
 arch/powerpc/include/asm/cacheflush.h    |  1 +
 arch/s390/include/asm/cacheflush.h       |  1 +
 arch/score/include/asm/cacheflush.h      |  1 +
 arch/sh/include/asm/cacheflush.h         |  1 +
 arch/sparc/include/asm/cacheflush_32.h   |  1 +
 arch/sparc/include/asm/cacheflush_64.h   |  1 +
 arch/x86/include/asm/cacheflush.h        |  1 +
 arch/xtensa/include/asm/cacheflush.h     |  1 +
 block/blk-core.c                         | 19 +++++++++++++++++++
 drivers/mtd/mtd_blkdevs.c                |  2 ++
 fs/bio.c                                 | 12 ++++++++++++
 include/asm-generic/cacheflush.h         |  1 +
 include/linux/bio.h                      | 12 ++++++++++++
 include/linux/blkdev.h                   | 11 +++++++++++
 29 files changed, 83 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h
index b686cc7fc44e..01d71e1c8a9e 100644
--- a/arch/alpha/include/asm/cacheflush.h
+++ b/arch/alpha/include/asm/cacheflush.h
@@ -9,6 +9,7 @@
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index fd03fb63a332..247b7b0adc2a 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -408,6 +408,7 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
  * about to change to user space.  This is the same method as used on SPARC64.
  * See update_mmu_cache for the user space part.
  */
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
 extern void __flush_dcache_page(struct address_space *mapping, struct page *page);
diff --git a/arch/avr32/include/asm/cacheflush.h b/arch/avr32/include/asm/cacheflush.h
index 670674749b20..96e53820bbbd 100644
--- a/arch/avr32/include/asm/cacheflush.h
+++ b/arch/avr32/include/asm/cacheflush.h
@@ -107,6 +107,7 @@ extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
  * do something here, but only for certain configurations.  No such
  * configurations exist at this time.
  */
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(page)		do { } while (0)
 #define flush_dcache_mmap_unlock(page)		do { } while (0)
diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h
index af03a36c7a4e..417eaac7fe99 100644
--- a/arch/blackfin/include/asm/cacheflush.h
+++ b/arch/blackfin/include/asm/cacheflush.h
@@ -68,9 +68,11 @@ do { memcpy(dst, src, len);						\
 #endif
 #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK)
 # define flush_dcache_range(start,end)		blackfin_dcache_flush_range((start), (end))
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 # define flush_dcache_page(page)		blackfin_dflush_page(page_address(page))
 #else
 # define flush_dcache_range(start,end)		do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 # define flush_dcache_page(page)		do { } while (0)
 #endif
 
diff --git a/arch/cris/include/asm/cacheflush.h b/arch/cris/include/asm/cacheflush.h
index cf60e3f69f8d..36795bca605e 100644
--- a/arch/cris/include/asm/cacheflush.h
+++ b/arch/cris/include/asm/cacheflush.h
@@ -12,6 +12,7 @@
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/frv/include/asm/cacheflush.h b/arch/frv/include/asm/cacheflush.h
index 432a69e7f3d4..edbac54ae015 100644
--- a/arch/frv/include/asm/cacheflush.h
+++ b/arch/frv/include/asm/cacheflush.h
@@ -47,6 +47,7 @@ static inline void __flush_cache_all(void)
 }
 
 /* dcache/icache coherency... */
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #ifdef CONFIG_MMU
 extern void flush_dcache_page(struct page *page);
 #else
diff --git a/arch/h8300/include/asm/cacheflush.h b/arch/h8300/include/asm/cacheflush.h
index 5ffdca217b95..4cf2df20c1ce 100644
--- a/arch/h8300/include/asm/cacheflush.h
+++ b/arch/h8300/include/asm/cacheflush.h
@@ -15,6 +15,7 @@
 #define	flush_cache_dup_mm(mm)		do { } while (0)
 #define	flush_cache_range(vma,a,b)
 #define	flush_cache_page(vma,p,pfn)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define	flush_dcache_page(page)
 #define	flush_dcache_mmap_lock(mapping)
 #define	flush_dcache_mmap_unlock(mapping)
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
index c8ce2719fee8..429eefc93ee7 100644
--- a/arch/ia64/include/asm/cacheflush.h
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -25,6 +25,7 @@
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)			\
 do {						\
 	clear_bit(PG_arch_1, &(page)->flags);	\
diff --git a/arch/m32r/include/asm/cacheflush.h b/arch/m32r/include/asm/cacheflush.h
index 78587c958146..8e8e04516c39 100644
--- a/arch/m32r/include/asm/cacheflush.h
+++ b/arch/m32r/include/asm/cacheflush.h
@@ -12,6 +12,7 @@ extern void _flush_cache_copyback_all(void);
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
@@ -33,6 +34,7 @@ extern void smp_flush_cache_all(void);
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
@@ -46,6 +48,7 @@ extern void smp_flush_cache_all(void);
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 16bf375fdbe1..73de7c89d8e0 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -128,6 +128,7 @@ static inline void __flush_page_to_ram(void *vaddr)
 	}
 }
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/m68k/include/asm/cacheflush_no.h b/arch/m68k/include/asm/cacheflush_no.h
index c65f00a94553..89f195656be7 100644
--- a/arch/m68k/include/asm/cacheflush_no.h
+++ b/arch/m68k/include/asm/cacheflush_no.h
@@ -12,6 +12,7 @@
 #define flush_cache_range(vma, start, end)	__flush_cache_all()
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_dcache_range(start,len)		__flush_cache_all()
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index f989d6aad648..088076e657b3 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -37,6 +37,7 @@
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 
 #define flush_dcache_range(start, end)	__invalidate_dcache_range(start, end)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index 03b1d69b142f..40bb9fde205f 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -38,6 +38,7 @@ extern void (*flush_cache_range)(struct vm_area_struct *vma,
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
 extern void __flush_dcache_page(struct page *page);
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 static inline void flush_dcache_page(struct page *page)
 {
 	if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc)
diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h
index 1a55d61f0d06..29e692f7f030 100644
--- a/arch/mn10300/include/asm/cacheflush.h
+++ b/arch/mn10300/include/asm/cacheflush.h
@@ -26,6 +26,7 @@
 #define flush_cache_page(vma, vmaddr, pfn)	do {} while (0)
 #define flush_cache_vmap(start, end)		do {} while (0)
 #define flush_cache_vunmap(start, end)		do {} while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do {} while (0)
 #define flush_dcache_mmap_lock(mapping)		do {} while (0)
 #define flush_dcache_mmap_unlock(mapping)	do {} while (0)
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 724395143f26..7a73b615c23d 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -42,6 +42,7 @@ void flush_cache_mm(struct mm_struct *mm);
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 
 #define flush_dcache_mmap_lock(mapping) \
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index ba667a383b8c..ab9e402518e8 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -25,6 +25,7 @@
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
index 49d5af916d01..405cc97c6249 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/cacheflush.h
@@ -10,6 +10,7 @@
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/score/include/asm/cacheflush.h b/arch/score/include/asm/cacheflush.h
index 07cc8fc457cd..caaba24036e3 100644
--- a/arch/score/include/asm/cacheflush.h
+++ b/arch/score/include/asm/cacheflush.h
@@ -16,6 +16,7 @@ extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 
 #define flush_cache_dup_mm(mm)			do {} while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do {} while (0)
 #define flush_dcache_mmap_lock(mapping)		do {} while (0)
 #define flush_dcache_mmap_unlock(mapping)	do {} while (0)
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index c29918f3c819..dda96eb3e7c0 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -42,6 +42,7 @@ extern void flush_cache_page(struct vm_area_struct *vma,
 				unsigned long addr, unsigned long pfn);
 extern void flush_cache_range(struct vm_area_struct *vma,
 				 unsigned long start, unsigned long end);
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void flush_icache_page(struct vm_area_struct *vma,
diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h
index 68ac10910271..2e468773f250 100644
--- a/arch/sparc/include/asm/cacheflush_32.h
+++ b/arch/sparc/include/asm/cacheflush_32.h
@@ -75,6 +75,7 @@ BTFIXUPDEF_CALL(void, flush_sig_insns, struct mm_struct *, unsigned long)
 
 extern void sparc_flush_page_to_ram(struct page *page);
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)			sparc_flush_page_to_ram(page)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h
index c43321729b3b..b95384033e89 100644
--- a/arch/sparc/include/asm/cacheflush_64.h
+++ b/arch/sparc/include/asm/cacheflush_64.h
@@ -37,6 +37,7 @@ extern void flush_dcache_page_all(struct mm_struct *mm, struct page *page);
 #endif
 
 extern void __flush_dcache_range(unsigned long start, unsigned long end);
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 
 #define flush_icache_page(vma, pg)	do { } while(0)
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b54f6afe7ec4..9076add593a8 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end) { }
 static inline void flush_cache_page(struct vm_area_struct *vma,
 				    unsigned long vmaddr, unsigned long pfn) { }
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 static inline void flush_dcache_page(struct page *page) { }
 static inline void flush_dcache_mmap_lock(struct address_space *mapping) { }
 static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { }
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index b7b8fbe47c77..a508f2f73bd7 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -101,6 +101,7 @@ static inline void __invalidate_icache_page_alias(unsigned long virt,
 #define flush_cache_vmap(start,end)	flush_cache_all()
 #define flush_cache_vunmap(start,end)	flush_cache_all()
 
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page*);
 extern void flush_cache_range(struct vm_area_struct*, ulong, ulong);
 extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long);
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da5111120c..718897e6d37f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2358,6 +2358,25 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+/**
+ * rq_flush_dcache_pages - Helper function to flush all pages in a request
+ * @rq: the request to be flushed
+ *
+ * Description:
+ *     Flush all pages in @rq.
+ */
+void rq_flush_dcache_pages(struct request *rq)
+{
+	struct req_iterator iter;
+	struct bio_vec *bvec;
+
+	rq_for_each_segment(bvec, rq, iter)
+		flush_dcache_page(bvec->bv_page);
+}
+EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
+#endif
+
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 8ca17a3e96ea..64e2b379a350 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -59,12 +59,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
 				return -EIO;
+		rq_flush_dcache_pages(req);
 		return 0;
 
 	case WRITE:
 		if (!tr->writesect)
 			return -EIO;
 
+		rq_flush_dcache_pages(req);
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
 				return -EIO;
diff --git a/fs/bio.c b/fs/bio.c
index 12da5db8682c..e23a63f4f7de 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio)
 	}
 }
 
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+void bio_flush_dcache_pages(struct bio *bi)
+{
+	int i;
+	struct bio_vec *bvec;
+
+	bio_for_each_segment(bvec, bi, i)
+		flush_dcache_page(bvec->bv_page);
+}
+EXPORT_SYMBOL(bio_flush_dcache_pages);
+#endif
+
 /**
  * bio_endio - end I/O on a bio
  * @bio:	bio
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index ba4ec39a1131..57b5c3c82e86 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -13,6 +13,7 @@
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
 #define flush_dcache_page(page)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 474792b825d0..7fc5606e6ea5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -391,6 +391,18 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
 				 gfp_t, int);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
+
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
+#endif
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+extern void bio_flush_dcache_pages(struct bio *bi);
+#else
+static inline void bio_flush_dcache_pages(struct bio *bi)
+{
+}
+#endif
+
 extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
 				 unsigned long, unsigned int, int, gfp_t);
 extern struct bio *bio_copy_user_iov(struct request_queue *,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1cc02972fbe2..e727f6c44c44 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -752,6 +752,17 @@ struct req_iterator {
 #define rq_iter_last(rq, _iter)					\
 		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
 
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
+#endif
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+extern void rq_flush_dcache_pages(struct request *rq);
+#else
+static inline void rq_flush_dcache_pages(struct request *rq)
+{
+}
+#endif
+
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
-- 
cgit v1.2.3


From d9449ce35a1e8fb58dd2d419f9215562a14ecca0 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Thu, 26 Nov 2009 09:45:40 +0100
Subject: Fix regression in direct writes performance due to WRITE_ODIRECT flag
 removal

There seems to be a regression in direct write path due to following
commit in for-2.6.33 branch of block tree.

commit 1af60fbd759d31f565552fea315c2033947cfbe6
Author: Jeff Moyer <jmoyer@redhat.com>
Date:   Fri Oct 2 18:56:53 2009 -0400

    block: get rid of the WRITE_ODIRECT flag

Marking direct writes as WRITE_SYNC_PLUG instead of WRITE_ODIRECT, sets
the NOIDLE flag in bio and hence in request. This tells CFQ to not expect
more request from the queue and not idle on it (despite the fact that
queue's think time is less and it is not seeky).

So direct writers lose big time when competing with sequential readers.

Using fio, I have run one direct writer and two sequential readers and
following are the results with 2.6.32-rc7 kernel and with for-2.6.33
branch.

Test
====
1 direct writer and 2 sequential reader running simultaneously.

[global]
directory=/mnt/sdc/fio/
runtime=10

[seqwrite]
rw=write
size=4G
direct=1

[seqread]
rw=read
size=2G
numjobs=2

2.6.32-rc7
==========
direct writes: aggrb=2,968KB/s
readers	     : aggrb=101MB/s

for-2.6.33 branch
=================
direct write: aggrb=19KB/s
readers	      aggrb=137MB/s

This patch brings back the WRITE_ODIRECT flag, with the difference that we
don't set the BIO_RW_UNPLUG flag so that device is not unplugged after
submission of request and an explicit unplug from submitter is required.

That way we fix the jeff's issue of not enough merging taking place in aio
path as well as make sure direct writes get their fair share.

After the fix
=============
for-2.6.33 + fix
----------------
direct writes: aggrb=2,728KB/s
reads: aggrb=103MB/s

Thanks
Vivek

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/direct-io.c     | 2 +-
 include/linux/fs.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3af761c8c5cc..b912270942fa 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
-		rw = WRITE_SYNC_PLUG;
+		rw = WRITE_ODIRECT_PLUG;
 
 	if (bdev)
 		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2f5fca4147c2..79cea8051736 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -129,6 +129,7 @@ struct inodes_stat_t {
  * WRITE_SYNC		Like WRITE_SYNC_PLUG, but also unplugs the device
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
+ * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
  * SWRITE_SYNC
  * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
  *			See SWRITE.
@@ -150,6 +151,7 @@ struct inodes_stat_t {
 #define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
+#define WRITE_ODIRECT_PLUG	(WRITE | (1 << BIO_RW_SYNCIO))
 #define SWRITE_SYNC_PLUG	\
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-- 
cgit v1.2.3


From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:48:30 +0900
Subject: sched: Introduce task_times() to replace task_{u,s}time() pair

Functions task_{u,s}time() are called in pair in almost all
cases.  However task_stime() is implemented to call task_utime()
from its inside, so such paired calls run task_utime() twice.

It means we do heavy divisions (div_u64 + do_div) twice to get
utime and stime which can be obtained at same time by one set
of divisions.

This patch introduces a function task_times(*tsk, *utime,
*stime) to retrieve utime and stime at once in better, optimized
way.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
LKML-Reference: <4B0E16AE.906@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  3 +--
 include/linux/sched.h |  1 +
 kernel/exit.c         |  7 +++++--
 kernel/sched.c        | 55 ++++++++++++++++++++++++++++++++-------------------
 kernel/sys.c          |  3 +--
 5 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index e209f64ab27b..330deda70d08 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
-		utime = task_utime(task);
-		stime = task_stime(task);
+		task_times(task, &utime, &stime);
 		gtime = task_gtime(task);
 	}
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78ba664474f3..fe6ae1516640 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t)
 extern cputime_t task_utime(struct task_struct *p);
 extern cputime_t task_stime(struct task_struct *p);
 extern cputime_t task_gtime(struct task_struct *p);
+extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..29068ab2670a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk)
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
 	else {
+		cputime_t utime, stime;
+
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
@@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
+		task_times(tsk, &utime, &stime);
+		sig->utime = cputime_add(sig->utime, utime);
+		sig->stime = cputime_add(sig->stime, stime);
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
diff --git a/kernel/sched.c b/kernel/sched.c
index 315ba4059f93..475a6f2b7158 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5191,6 +5191,14 @@ cputime_t task_stime(struct task_struct *p)
 {
 	return p->stime;
 }
+
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	if (ut)
+		*ut = task_utime(p);
+	if (st)
+		*st = task_stime(p);
+}
 #else
 
 #ifndef nsecs_to_cputime
@@ -5198,41 +5206,48 @@ cputime_t task_stime(struct task_struct *p)
 	msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
 #endif
 
-cputime_t task_utime(struct task_struct *p)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	cputime_t utime = p->utime, total = utime + p->stime;
-	u64 temp;
+	cputime_t rtime, utime = p->utime, total = utime + p->stime;
 
 	/*
 	 * Use CFS's precise accounting:
 	 */
-	temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime);
+	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
 	if (total) {
-		temp *= utime;
+		u64 temp;
+
+		temp = (u64)(rtime * utime);
 		do_div(temp, total);
-	}
-	utime = (cputime_t)temp;
+		utime = (cputime_t)temp;
+	} else
+		utime = rtime;
 
+	/*
+	 * Compare with previous values, to keep monotonicity:
+	 */
 	p->prev_utime = max(p->prev_utime, utime);
-	return p->prev_utime;
+	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+
+	if (ut)
+		*ut = p->prev_utime;
+	if (st)
+		*st = p->prev_stime;
+}
+
+cputime_t task_utime(struct task_struct *p)
+{
+	cputime_t utime;
+	task_times(p, &utime, NULL);
+	return utime;
 }
 
 cputime_t task_stime(struct task_struct *p)
 {
 	cputime_t stime;
-
-	/*
-	 * Use CFS's precise accounting. (we subtract utime from
-	 * the total, to make sure the total observed by userspace
-	 * grows monotonically - apps rely on that):
-	 */
-	stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p);
-
-	if (stime >= 0)
-		p->prev_stime = max(p->prev_stime, stime);
-
-	return p->prev_stime;
+	task_times(p, NULL, &stime);
+	return stime;
 }
 #endif
 
diff --git a/kernel/sys.c b/kernel/sys.c
index ce17760d9c51..bbdfce0d4347 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1346,8 +1346,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		utime = task_utime(current);
-		stime = task_stime(current);
+		task_times(current, &utime, &stime);
 		accumulate_thread_rusage(p, r);
 		maxrss = p->signal->maxrss;
 		goto out;
-- 
cgit v1.2.3


From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:49:05 +0900
Subject: sched: Remove task_{u,s,g}time()

Now all task_{u,s}time() pairs are replaced by task_times().
And task_gtime() is too simple to be an inline function.

Cleanup them all.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  4 ++--
 include/linux/sched.h |  3 ---
 kernel/exit.c         |  2 +-
 kernel/sched.c        | 33 ++-------------------------------
 4 files changed, 5 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 330deda70d08..ca61a88aed66 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime = cputime_add(gtime, task_gtime(t));
+				gtime = cputime_add(gtime, t->gtime);
 				t = next_thread(t);
 			} while (t != task);
 
@@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		task_times(task, &utime, &stime);
-		gtime = task_gtime(task);
+		gtime = task->gtime;
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae1516640..0395b0f4df3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-extern cputime_t task_utime(struct task_struct *p);
-extern cputime_t task_stime(struct task_struct *p);
-extern cputime_t task_gtime(struct task_struct *p);
 extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index 29068ab2670a..2eaf68b634e3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk)
 		task_times(tsk, &utime, &stime);
 		sig->utime = cputime_add(sig->utime, utime);
 		sig->stime = cputime_add(sig->stime, stime);
-		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 475a6f2b7158..82251c21f785 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5182,22 +5182,12 @@ void account_idle_ticks(unsigned long ticks)
  * Use precise platform statistics if available:
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cputime_t task_utime(struct task_struct *p)
-{
-	return p->utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
-	return p->stime;
-}
-
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	if (ut)
-		*ut = task_utime(p);
+		*ut = p->utime;
 	if (st)
-		*st = task_stime(p);
+		*st = p->stime;
 }
 #else
 
@@ -5235,27 +5225,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	if (st)
 		*st = p->prev_stime;
 }
-
-cputime_t task_utime(struct task_struct *p)
-{
-	cputime_t utime;
-	task_times(p, &utime, NULL);
-	return utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
-	cputime_t stime;
-	task_times(p, NULL, &stime);
-	return stime;
-}
 #endif
 
-inline cputime_t task_gtime(struct task_struct *p)
-{
-	return p->gtime;
-}
-
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
-- 
cgit v1.2.3


From b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:49:27 +0900
Subject: sched, time: Define nsecs_to_jiffies()

Use of msecs_to_jiffies() for nsecs_to_cputime() have some
problems:

 - The type of msecs_to_jiffies()'s argument is unsigned int, so
   it cannot convert msecs greater than UINT_MAX = about 49.7 days.

 - msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument
   is set, assuming that input was negative value.  So it cannot
   convert msecs greater than INT_MAX = about 24.8 days too.

This patch defines a new function nsecs_to_jiffies() that can
deal greater values, and that can deal all incoming values as
unsigned.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Amrico Wang <xiyou.wangcong@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <johnstul@linux.vnet.ibm.com>
LKML-Reference: <4B0E16E7.5070307@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jiffies.h |  1 +
 kernel/sched.c          |  3 +--
 kernel/time.c           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1a9cf78bfce5..6811f4bfc6e7 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
 extern unsigned long clock_t_to_jiffies(unsigned long x);
 extern u64 jiffies_64_to_clock_t(u64 x);
 extern u64 nsec_to_clock_t(u64 x);
+extern unsigned long nsecs_to_jiffies(u64 n);
 
 #define TIMESTAMP_SIZE	30
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 82251c21f785..b3d4e2be95aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5192,8 +5192,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 #else
 
 #ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) \
-	msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
diff --git a/kernel/time.c b/kernel/time.c
index 2e2e469a7fec..804798005d19 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x)
 #endif
 }
 
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n:	nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+#if (NSEC_PER_SEC % HZ) == 0
+	/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+	return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+	/* overflow after 292 years if HZ = 1024 */
+	return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+	/*
+	 * Generic case - optimized for cases where HZ is a multiple of 3.
+	 * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+	 */
+	return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
+#endif
+}
+
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
-- 
cgit v1.2.3


From 445409602c09219767c06497c0dc2285eac244ed Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Nov 2009 06:07:08 +0000
Subject: veth: move loopback logic to common location

The veth driver contains code to forward an skb
from the start_xmit function of one network
device into the receive path of another device.

Moving that code into a common location lets us
reuse the code for direct forwarding of data
between macvlan ports, and possibly in other
drivers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c        | 17 +++--------------
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 2d657f2314cb..6c4b5a2d7877 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -155,8 +155,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct veth_net_stats *stats, *rcv_stats;
 	int length, cpu;
 
-	skb_orphan(skb);
-
 	priv = netdev_priv(dev);
 	rcv = priv->peer;
 	rcv_priv = netdev_priv(rcv);
@@ -168,20 +166,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!(rcv->flags & IFF_UP))
 		goto tx_drop;
 
-	if (skb->len > (rcv->mtu + MTU_PAD))
-		goto rx_drop;
-
-        skb->tstamp.tv64 = 0;
-	skb->pkt_type = PACKET_HOST;
-	skb->protocol = eth_type_trans(skb, rcv);
 	if (dev->features & NETIF_F_NO_CSUM)
 		skb->ip_summed = rcv_priv->ip_summed;
 
-	skb->mark = 0;
-	secpath_reset(skb);
-	nf_reset(skb);
-
-	length = skb->len;
+	length = skb->len + ETH_HLEN;
+	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
+		goto rx_drop;
 
 	stats->tx_bytes += length;
 	stats->tx_packets++;
@@ -189,7 +179,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	rcv_stats->rx_bytes += length;
 	rcv_stats->rx_packets++;
 
-	netif_rx(skb);
 	return NETDEV_TX_OK;
 
 tx_drop:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97873e31661c..9428793775a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1562,6 +1562,8 @@ extern int		dev_set_mac_address(struct net_device *,
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev,
 					    struct netdev_queue *txq);
+extern int		dev_forward_skb(struct net_device *dev,
+					struct sk_buff *skb);
 
 extern int		netdev_budget;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e65af6041415..7775e8b48deb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
@@ -1419,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_DROP     (packet was dropped)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	skb_orphan(skb);
+
+	if (!(dev->flags & IFF_UP))
+		return NET_RX_DROP;
+
+	if (skb->len > (dev->mtu + dev->hard_header_len))
+		return NET_RX_DROP;
+
+	skb_dst_drop(skb);
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, dev);
+	skb->mark = 0;
+	secpath_reset(skb);
+	nf_reset(skb);
+	return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
-- 
cgit v1.2.3


From 27c0b1a850cdea6298f573d835782f3337be913c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Nov 2009 06:07:11 +0000
Subject: macvlan: export macvlan mode through netlink

In order to support all three modes of macvlan at
runtime, extend the existing netlink protocol
to allow choosing the mode per macvlan slave
interface.

This depends on a matching patch to iproute2
in order to become accessible in user land.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c   | 56 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/if_link.h | 15 +++++++++++++
 2 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d6bd84353f44..322112c7358a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -33,12 +33,6 @@
 
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
-enum macvlan_mode {
-	MACVLAN_MODE_PRIVATE	= 1,
-	MACVLAN_MODE_VEPA	= 2,
-	MACVLAN_MODE_BRIDGE	= 4,
-};
-
 struct macvlan_port {
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
@@ -614,6 +608,17 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 			return -EADDRNOTAVAIL;
 	}
+
+	if (data && data[IFLA_MACVLAN_MODE]) {
+		switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) {
+		case MACVLAN_MODE_PRIVATE:
+		case MACVLAN_MODE_VEPA:
+		case MACVLAN_MODE_BRIDGE:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
 	return 0;
 }
 
@@ -678,6 +683,10 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 	vlan->dev      = dev;
 	vlan->port     = port;
 
+	vlan->mode     = MACVLAN_MODE_VEPA;
+	if (data && data[IFLA_MACVLAN_MODE])
+		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		return err;
@@ -699,6 +708,36 @@ static void macvlan_dellink(struct net_device *dev, struct list_head *head)
 		macvlan_port_destroy(port->dev);
 }
 
+static int macvlan_changelink(struct net_device *dev,
+		struct nlattr *tb[], struct nlattr *data[])
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	if (data && data[IFLA_MACVLAN_MODE])
+		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
+	return 0;
+}
+
+static size_t macvlan_get_size(const struct net_device *dev)
+{
+	return nla_total_size(4);
+}
+
+static int macvlan_fill_info(struct sk_buff *skb,
+				const struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	NLA_PUT_U32(skb, IFLA_MACVLAN_MODE, vlan->mode);
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
+	[IFLA_MACVLAN_MODE] = { .type = NLA_U32 },
+};
+
 static struct rtnl_link_ops macvlan_link_ops __read_mostly = {
 	.kind		= "macvlan",
 	.priv_size	= sizeof(struct macvlan_dev),
@@ -707,6 +746,11 @@ static struct rtnl_link_ops macvlan_link_ops __read_mostly = {
 	.validate	= macvlan_validate,
 	.newlink	= macvlan_newlink,
 	.dellink	= macvlan_dellink,
+	.maxtype	= IFLA_MACVLAN_MAX,
+	.policy		= macvlan_policy,
+	.changelink	= macvlan_changelink,
+	.get_size	= macvlan_get_size,
+	.fill_info	= macvlan_fill_info,
 };
 
 static int macvlan_device_event(struct notifier_block *unused,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 1d3b2425f661..6674791622ca 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -181,4 +181,19 @@ struct ifla_vlan_qos_mapping {
 	__u32 to;
 };
 
+/* MACVLAN section */
+enum {
+	IFLA_MACVLAN_UNSPEC,
+	IFLA_MACVLAN_MODE,
+	__IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+	MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
+	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
+};
+
 #endif /* _LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From 5e7565930524410f097f5b04f8aba663089a6ffc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Nov 2009 07:54:54 +0000
Subject: vlan: support "loose binding" to the underlying network device

Currently the UP/DOWN state of VLANs is synchronized to the state of the
underlying device, meaning all VLANs are set down once the underlying
device is set down. This causes all routes to the VLAN devices to vanish.

Add a flag to specify a "loose binding" mode, in which only the operstate
is transfered, but the VLAN device state is independant.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h  | 1 +
 net/8021q/vlan.c         | 9 +++++++--
 net/8021q/vlan_dev.c     | 6 ++++--
 net/8021q/vlan_netlink.c | 3 ++-
 4 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 153f6b9e722c..3d870fda8c4f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -339,6 +339,7 @@ enum vlan_ioctl_cmds {
 enum vlan_flags {
 	VLAN_FLAG_REORDER_HDR	= 0x1,
 	VLAN_FLAG_GVRP		= 0x2,
+	VLAN_FLAG_LOOSE_BINDING	= 0x4,
 };
 
 enum vlan_name_types {
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1483243edf14..225aa2fac0e3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -431,6 +431,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	struct vlan_group *grp;
 	int i, flgs;
 	struct net_device *vlandev;
+	struct vlan_dev_info *vlan;
 	LIST_HEAD(list);
 
 	if (is_vlan_dev(dev))
@@ -507,7 +508,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!(flgs & IFF_UP))
 				continue;
 
-			dev_change_flags(vlandev, flgs & ~IFF_UP);
+			vlan = vlan_dev_info(vlandev);
+			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
+				dev_change_flags(vlandev, flgs & ~IFF_UP);
 			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
@@ -523,7 +526,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (flgs & IFF_UP)
 				continue;
 
-			dev_change_flags(vlandev, flgs | IFF_UP);
+			vlan = vlan_dev_info(vlandev);
+			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
+				dev_change_flags(vlandev, flgs | IFF_UP);
 			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index de0dc6bacbe8..b7889782047e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -431,7 +431,8 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	u32 old_flags = vlan->flags;
 
-	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
+	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
+		     VLAN_FLAG_LOOSE_BINDING))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
@@ -456,7 +457,8 @@ static int vlan_dev_open(struct net_device *dev)
 	struct net_device *real_dev = vlan->real_dev;
 	int err;
 
-	if (!(real_dev->flags & IFF_UP))
+	if (!(real_dev->flags & IFF_UP) &&
+	    !(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 3c9cf6a8e7fb..ddc105734af7 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -60,7 +60,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
-		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
+		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
+		      VLAN_FLAG_LOOSE_BINDING))
 			return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 27 Nov 2009 04:55:53 +0100
Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints

In-kernel user breakpoints are created using functions in which
we pass breakpoint parameters as individual variables: address,
length and type.

Although it fits well for x86, this just does not scale across
archictectures that may support this api later as these may have
more or different needs. Pass in a perf_event_attr structure
instead because it is meant to evolve as much as possible into
a generic hardware breakpoint parameter structure.

Reported-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c      | 74 ++++++++++++++++++++----------------
 include/linux/hw_breakpoint.h | 36 ++++++++----------
 kernel/hw_breakpoint.c        | 87 +++++++++----------------------------------
 3 files changed, 75 insertions(+), 122 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 75e0cd847bd6..2941b32ea666 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -593,6 +593,34 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 	return dr7;
 }
 
+static struct perf_event *
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+			 struct task_struct *tsk)
+{
+	int err;
+	int gen_len, gen_type;
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	/*
+	 * We shoud have at least an inactive breakpoint at this
+	 * slot. It means the user is writing dr7 without having
+	 * written the address register first
+	 */
+	if (!bp)
+		return ERR_PTR(-EINVAL);
+
+	err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+	if (err)
+		return ERR_PTR(err);
+
+	attr = bp->attr;
+	attr.bp_len = gen_len;
+	attr.bp_type = gen_type;
+	attr.disabled = 0;
+
+	return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+}
+
 /*
  * Handle ptrace writes to debug register 7.
  */
@@ -603,7 +631,6 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 	int i, orig_ret = 0, rc = 0;
 	int enabled, second_pass = 0;
 	unsigned len, type;
-	int gen_len, gen_type;
 	struct perf_event *bp;
 
 	data &= ~DR_CONTROL_RESERVED;
@@ -634,33 +661,12 @@ restore:
 			continue;
 		}
 
-		/*
-		 * We shoud have at least an inactive breakpoint at this
-		 * slot. It means the user is writing dr7 without having
-		 * written the address register first
-		 */
-		if (!bp) {
-			rc = -EINVAL;
-			break;
-		}
-
-		rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
-		if (rc)
-			break;
-
-		/*
-		 * This is a temporary thing as bp is unregistered/registered
-		 * to simulate modification
-		 */
-		bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
-					       gen_type, bp->callback,
-					       tsk, true);
-		thread->ptrace_bps[i] = NULL;
+		bp = ptrace_modify_breakpoint(bp, len, type, tsk);
 
 		/* Incorrect bp, or we have a bug in bp API */
 		if (IS_ERR(bp)) {
 			rc = PTR_ERR(bp);
-			bp = NULL;
+			thread->ptrace_bps[i] = NULL;
 			break;
 		}
 		thread->ptrace_bps[i] = bp;
@@ -707,24 +713,26 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 {
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
+	DEFINE_BREAKPOINT_ATTR(attr);
 
 	if (!t->ptrace_bps[nr]) {
 		/*
 		 * Put stub len and type to register (reserve) an inactive but
 		 * correct bp
 		 */
-		bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
-						 HW_BREAKPOINT_W,
-						 ptrace_triggered, tsk,
-						 false);
+		attr.bp_addr = addr;
+		attr.bp_len = HW_BREAKPOINT_LEN_1;
+		attr.bp_type = HW_BREAKPOINT_W;
+		attr.disabled = 1;
+
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
 	} else {
 		bp = t->ptrace_bps[nr];
 		t->ptrace_bps[nr] = NULL;
-		bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
-					       bp->attr.bp_type,
-					       bp->callback,
-					       tsk,
-					       bp->attr.disabled);
+
+		attr = bp->attr;
+		attr.bp_addr = addr;
+		bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
 	}
 	/*
 	 * CHECKME: the previous code returned -EIO if the addr wasn't a
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index c9f7f7c7b0e0..5da472e434b7 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -20,6 +20,14 @@ enum {
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+/* As it's for in-kernel or ptrace use, we want it to be pinned */
+#define DEFINE_BREAKPOINT_ATTR(name)	\
+struct perf_event_attr name = {		\
+	.type = PERF_TYPE_BREAKPOINT,	\
+	.size = sizeof(name),		\
+	.pinned = 1,			\
+};
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
 }
 
 extern struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active);
+			    struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
 extern struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+			  struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active);
+			  struct task_struct *tsk);
 
 /*
  * Kernel breakpoints are not associated with any particular thread.
@@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
 static inline struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active)		{ return NULL; }
+			    struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+			  struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active)			{ return NULL; }
+			  struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(unsigned long addr,
 				int len,
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 32e1018191be..2a47514f12fd 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -289,90 +289,32 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 	return __register_perf_hw_breakpoint(bp);
 }
 
-/*
- * Register a breakpoint bound to a task and a given cpu.
- * If cpu is -1, the breakpoint is active for the task in every cpu
- * If the task is -1, the breakpoint is active for every tasks in the given
- * cpu.
- */
-static struct perf_event *
-register_user_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
-				perf_callback_t triggered,
-				pid_t pid,
-				int cpu,
-				bool active)
-{
-	struct perf_event_attr *attr;
-	struct perf_event *bp;
-
-	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
-	if (!attr)
-		return ERR_PTR(-ENOMEM);
-
-	attr->type = PERF_TYPE_BREAKPOINT;
-	attr->size = sizeof(*attr);
-	attr->bp_addr = addr;
-	attr->bp_len = len;
-	attr->bp_type = type;
-	/*
-	 * Such breakpoints are used by debuggers to trigger signals when
-	 * we hit the excepted memory op. We can't miss such events, they
-	 * must be pinned.
-	 */
-	attr->pinned = 1;
-
-	if (!active)
-		attr->disabled = 1;
-
-	bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
-	kfree(attr);
-
-	return bp;
-}
-
 /**
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @active: should we activate it while registering it
- *
  */
 struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active)
+			    struct task_struct *tsk)
 {
-	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
-					       tsk->pid, -1, active);
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
 /**
  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
  * @bp: the breakpoint structure to modify
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: new breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @active: should we activate it while registering it
  */
 struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active)
+			  struct task_struct *tsk)
 {
 	/*
 	 * FIXME: do it without unregistering
@@ -381,8 +323,7 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 	 */
 	unregister_hw_breakpoint(bp);
 
-	return register_user_hw_breakpoint(addr, len, type, triggered,
-					   tsk, active);
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
@@ -406,8 +347,16 @@ register_kernel_hw_breakpoint_cpu(unsigned long addr,
 				  int cpu,
 				  bool active)
 {
-	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
-					       -1, cpu, active);
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	attr.bp_addr = addr;
+	attr.bp_len = len;
+	attr.bp_type = type;
+
+	if (!active)
+		attr.disabled = 1;
+
+	return perf_event_create_kernel_counter(&attr, cpu, -1, triggered);
 }
 
 /**
-- 
cgit v1.2.3


From dd1853c3f493f6d22d9e5390b192a07b73d2ac0a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 27 Nov 2009 04:55:54 +0100
Subject: hw-breakpoints: Use struct perf_event_attr to define kernel
 breakpoints

Kernel breakpoints are created using functions in which we pass
breakpoint parameters as individual variables: address, length
and type.

Although it fits well for x86, this just does not scale across
architectures that may support this api later as these may have
more or different needs. Pass in a perf_event_attr structure
instead because it is meant to evolve as much as possible into
a generic hardware breakpoint parameter structure.

Reported-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1259294154-5197-2-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h           | 35 ++++++++++++---------------
 kernel/hw_breakpoint.c                  | 35 ++++-----------------------
 kernel/trace/trace_ksym.c               | 42 ++++++++++++++++-----------------
 samples/hw_breakpoint/data_breakpoint.c | 10 ++++----
 4 files changed, 44 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 5da472e434b7..a03daed08c59 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -28,6 +28,13 @@ struct perf_event_attr name = {		\
 	.pinned = 1,			\
 };
 
+static inline void hw_breakpoint_init(struct perf_event_attr *attr)
+{
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(*attr);
+	attr->pinned = 1;
+}
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -59,19 +66,13 @@ modify_user_hw_breakpoint(struct perf_event *bp,
  * Kernel breakpoints are not associated with any particular thread.
  */
 extern struct perf_event *
-register_wide_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_callback_t triggered,
-				int cpu,
-				bool active);
+				int cpu);
 
 extern struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active);
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -100,18 +101,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 			  perf_callback_t triggered,
 			  struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
-register_wide_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_callback_t triggered,
-				int cpu,
-				bool active)		{ return NULL; }
+				int cpu)		{ return NULL; }
 static inline struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active)		{ return NULL; }
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)	{ return NULL; }
 static inline int
 register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
 static inline int
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2a47514f12fd..cf5ee1628411 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -339,42 +339,16 @@ void unregister_hw_breakpoint(struct perf_event *bp)
 }
 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 
-static struct perf_event *
-register_kernel_hw_breakpoint_cpu(unsigned long addr,
-				  int len,
-				  int type,
-				  perf_callback_t triggered,
-				  int cpu,
-				  bool active)
-{
-	DEFINE_BREAKPOINT_ATTR(attr);
-
-	attr.bp_addr = addr;
-	attr.bp_len = len;
-	attr.bp_type = type;
-
-	if (!active)
-		attr.disabled = 1;
-
-	return perf_event_create_kernel_counter(&attr, cpu, -1, triggered);
-}
-
 /**
  * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
- * @active: should we activate it while registering it
  *
  * @return a set of per_cpu pointers to perf events
  */
 struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active)
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)
 {
 	struct perf_event **cpu_events, **pevent, *bp;
 	long err;
@@ -386,8 +360,7 @@ register_wide_hw_breakpoint(unsigned long addr,
 
 	for_each_possible_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
-		bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
-					triggered, cpu, active);
+		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 
 		*pevent = bp;
 
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index c538b15b95d6..ddfa0fd43bc0 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -42,9 +42,7 @@
 
 struct trace_ksym {
 	struct perf_event	**ksym_hbp;
-	unsigned long		ksym_addr;
-	int			type;
-	int			len;
+	struct perf_event_attr	attr;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
 	unsigned long		counter;
 #endif
@@ -71,7 +69,7 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-		if ((entry->ksym_addr == hbp_hit_addr) &&
+		if ((entry->attr.bp_addr == hbp_hit_addr) &&
 		    (entry->counter <= MAX_UL_INT)) {
 			entry->counter++;
 			break;
@@ -192,14 +190,15 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	if (!entry)
 		return -ENOMEM;
 
-	entry->type = op;
-	entry->ksym_addr = addr;
-	entry->len = HW_BREAKPOINT_LEN_4;
+	hw_breakpoint_init(&entry->attr);
+
+	entry->attr.bp_type = op;
+	entry->attr.bp_addr = addr;
+	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
 
 	ret = -EAGAIN;
-	entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
-					entry->len, entry->type,
-					ksym_hbp_handler, true);
+	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
 
 	if (IS_ERR(entry->ksym_hbp)) {
 		ret = PTR_ERR(entry->ksym_hbp);
@@ -236,12 +235,12 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
 	mutex_lock(&ksym_tracer_mutex);
 
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
-		if (entry->type == HW_BREAKPOINT_R)
+		ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+		if (entry->attr.bp_type == HW_BREAKPOINT_R)
 			ret = trace_seq_puts(s, "r--\n");
-		else if (entry->type == HW_BREAKPOINT_W)
+		else if (entry->attr.bp_type == HW_BREAKPOINT_W)
 			ret = trace_seq_puts(s, "-w-\n");
-		else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
+		else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
 			ret = trace_seq_puts(s, "rw-\n");
 		WARN_ON_ONCE(!ret);
 	}
@@ -317,9 +316,9 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 
 	ret = -EINVAL;
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		if (entry->ksym_addr == ksym_addr) {
+		if (entry->attr.bp_addr == ksym_addr) {
 			/* Check for malformed request: (6) */
-			if (entry->type != op)
+			if (entry->attr.bp_type != op)
 				changed = 1;
 			else
 				goto out;
@@ -328,13 +327,12 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	}
 	if (changed) {
 		unregister_wide_hw_breakpoint(entry->ksym_hbp);
-		entry->type = op;
+		entry->attr.bp_type = op;
 		ret = 0;
 		if (op > 0) {
 			entry->ksym_hbp =
-				register_wide_hw_breakpoint(entry->ksym_addr,
-					entry->len, entry->type,
-					ksym_hbp_handler, true);
+				register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
 			if (IS_ERR(entry->ksym_hbp))
 				ret = PTR_ERR(entry->ksym_hbp);
 			else
@@ -489,7 +487,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 
 	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
 
-	access_type = entry->type;
+	access_type = entry->attr.bp_type;
 
 	switch (access_type) {
 	case HW_BREAKPOINT_R:
@@ -505,7 +503,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 		seq_puts(m, "  NA          ");
 	}
 
-	if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
 		seq_printf(m, "  %-36s", fn_name);
 	else
 		seq_printf(m, "  %-36s", "<NA>");
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index ee7f9fbaffbd..29525500df00 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -51,13 +51,13 @@ static void sample_hbp_handler(struct perf_event *temp, void *data)
 static int __init hw_break_module_init(void)
 {
 	int ret;
-	unsigned long addr;
+	DEFINE_BREAKPOINT_ATTR(attr);
 
-	addr = kallsyms_lookup_name(ksym_name);
+	attr.bp_addr = kallsyms_lookup_name(ksym_name);
+	attr.bp_len = HW_BREAKPOINT_LEN_4;
+	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
-	sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
-						 HW_BREAKPOINT_W | HW_BREAKPOINT_R,
-						 sample_hbp_handler, true);
+	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
 	if (IS_ERR(sample_hbp)) {
 		ret = PTR_ERR(sample_hbp);
 		goto fail;
-- 
cgit v1.2.3


From c8602edf3f9471466755329b78d309f2a01dd449 Mon Sep 17 00:00:00 2001
From: Thomas Kunze <thommycheck@gmx.de>
Date: Tue, 10 Feb 2009 14:54:57 +0100
Subject: move drivers/mfd/*.h to include/linux/mfd

So drivers like collie_battery driver can use
those files easier.
---
 drivers/mfd/mcp-core.c        |   2 +-
 drivers/mfd/mcp-sa11x0.c      |   2 +-
 drivers/mfd/mcp.h             |  66 -----------
 drivers/mfd/ucb1x00-assabet.c |   2 +-
 drivers/mfd/ucb1x00-core.c    |   2 +-
 drivers/mfd/ucb1x00-ts.c      |   2 +-
 drivers/mfd/ucb1x00.h         | 255 ------------------------------------------
 include/linux/mfd/mcp.h       |  68 +++++++++++
 include/linux/mfd/ucb1x00.h   | 255 ++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 328 insertions(+), 326 deletions(-)
 delete mode 100644 drivers/mfd/mcp.h
 delete mode 100644 drivers/mfd/ucb1x00.h
 create mode 100644 include/linux/mfd/mcp.h
 create mode 100644 include/linux/mfd/ucb1x00.h

(limited to 'include/linux')

diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 57271cb3b316..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -17,11 +17,11 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/mfd/mcp.h>
 
 #include <mach/dma.h>
 #include <asm/system.h>
 
-#include "mcp.h"
 
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 62b32dabf629..212189815c8e 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/mfd/mcp.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
@@ -28,7 +29,6 @@
 
 #include <mach/assabet.h>
 
-#include "mcp.h"
 
 struct mcp_sa11x0 {
 	u32	mccr0;
diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h
deleted file mode 100644
index c093a93b8808..000000000000
--- a/drivers/mfd/mcp.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  linux/drivers/mfd/mcp.h
- *
- *  Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef MCP_H
-#define MCP_H
-
-struct mcp_ops;
-
-struct mcp {
-	struct module	*owner;
-	struct mcp_ops	*ops;
-	spinlock_t	lock;
-	int		use_count;
-	unsigned int	sclk_rate;
-	unsigned int	rw_timeout;
-	dma_device_t	dma_audio_rd;
-	dma_device_t	dma_audio_wr;
-	dma_device_t	dma_telco_rd;
-	dma_device_t	dma_telco_wr;
-	struct device	attached_device;
-};
-
-struct mcp_ops {
-	void		(*set_telecom_divisor)(struct mcp *, unsigned int);
-	void		(*set_audio_divisor)(struct mcp *, unsigned int);
-	void		(*reg_write)(struct mcp *, unsigned int, unsigned int);
-	unsigned int	(*reg_read)(struct mcp *, unsigned int);
-	void		(*enable)(struct mcp *);
-	void		(*disable)(struct mcp *);
-};
-
-void mcp_set_telecom_divisor(struct mcp *, unsigned int);
-void mcp_set_audio_divisor(struct mcp *, unsigned int);
-void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
-unsigned int mcp_reg_read(struct mcp *, unsigned int);
-void mcp_enable(struct mcp *);
-void mcp_disable(struct mcp *);
-#define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
-
-struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
-void mcp_host_unregister(struct mcp *);
-
-struct mcp_driver {
-	struct device_driver drv;
-	int (*probe)(struct mcp *);
-	void (*remove)(struct mcp *);
-	int (*suspend)(struct mcp *, pm_message_t);
-	int (*resume)(struct mcp *);
-};
-
-int mcp_driver_register(struct mcp_driver *);
-void mcp_driver_unregister(struct mcp_driver *);
-
-#define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
-#define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
-
-#define mcp_priv(mcp)		((void *)((mcp)+1))
-
-#endif
diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c
index 86fed4870f93..cea9da60850d 100644
--- a/drivers/mfd/ucb1x00-assabet.c
+++ b/drivers/mfd/ucb1x00-assabet.c
@@ -14,10 +14,10 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/device.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 
-#include "ucb1x00.h"
 
 #define UCB1X00_ATTR(name,input)\
 static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 60c3988f3cf3..f9de7891e57f 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -25,11 +25,11 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
-#include "ucb1x00.h"
 
 static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 61b7d3eb9a2f..000cb414a78a 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -30,12 +30,12 @@
 #include <linux/freezer.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 #include <mach/collie.h>
 #include <asm/mach-types.h>
 
-#include "ucb1x00.h"
 
 
 struct ucb1x00_ts {
diff --git a/drivers/mfd/ucb1x00.h b/drivers/mfd/ucb1x00.h
deleted file mode 100644
index a8ad8a0ed5db..000000000000
--- a/drivers/mfd/ucb1x00.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- *  linux/drivers/mfd/ucb1x00.h
- *
- *  Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef UCB1200_H
-#define UCB1200_H
-
-#define UCB_IO_DATA	0x00
-#define UCB_IO_DIR	0x01
-
-#define UCB_IO_0		(1 << 0)
-#define UCB_IO_1		(1 << 1)
-#define UCB_IO_2		(1 << 2)
-#define UCB_IO_3		(1 << 3)
-#define UCB_IO_4		(1 << 4)
-#define UCB_IO_5		(1 << 5)
-#define UCB_IO_6		(1 << 6)
-#define UCB_IO_7		(1 << 7)
-#define UCB_IO_8		(1 << 8)
-#define UCB_IO_9		(1 << 9)
-
-#define UCB_IE_RIS	0x02
-#define UCB_IE_FAL	0x03
-#define UCB_IE_STATUS	0x04
-#define UCB_IE_CLEAR	0x04
-#define UCB_IE_ADC		(1 << 11)
-#define UCB_IE_TSPX		(1 << 12)
-#define UCB_IE_TSMX		(1 << 13)
-#define UCB_IE_TCLIP		(1 << 14)
-#define UCB_IE_ACLIP		(1 << 15)
-
-#define UCB_IRQ_TSPX		12
-
-#define UCB_TC_A	0x05
-#define UCB_TC_A_LOOP		(1 << 7)	/* UCB1200 */
-#define UCB_TC_A_AMPL		(1 << 7)	/* UCB1300 */
-
-#define UCB_TC_B	0x06
-#define UCB_TC_B_VOICE_ENA	(1 << 3)
-#define UCB_TC_B_CLIP		(1 << 4)
-#define UCB_TC_B_ATT		(1 << 6)
-#define UCB_TC_B_SIDE_ENA	(1 << 11)
-#define UCB_TC_B_MUTE		(1 << 13)
-#define UCB_TC_B_IN_ENA		(1 << 14)
-#define UCB_TC_B_OUT_ENA	(1 << 15)
-
-#define UCB_AC_A	0x07
-#define UCB_AC_B	0x08
-#define UCB_AC_B_LOOP		(1 << 8)
-#define UCB_AC_B_MUTE		(1 << 13)
-#define UCB_AC_B_IN_ENA		(1 << 14)
-#define UCB_AC_B_OUT_ENA	(1 << 15)
-
-#define UCB_TS_CR	0x09
-#define UCB_TS_CR_TSMX_POW	(1 << 0)
-#define UCB_TS_CR_TSPX_POW	(1 << 1)
-#define UCB_TS_CR_TSMY_POW	(1 << 2)
-#define UCB_TS_CR_TSPY_POW	(1 << 3)
-#define UCB_TS_CR_TSMX_GND	(1 << 4)
-#define UCB_TS_CR_TSPX_GND	(1 << 5)
-#define UCB_TS_CR_TSMY_GND	(1 << 6)
-#define UCB_TS_CR_TSPY_GND	(1 << 7)
-#define UCB_TS_CR_MODE_INT	(0 << 8)
-#define UCB_TS_CR_MODE_PRES	(1 << 8)
-#define UCB_TS_CR_MODE_POS	(2 << 8)
-#define UCB_TS_CR_BIAS_ENA	(1 << 11)
-#define UCB_TS_CR_TSPX_LOW	(1 << 12)
-#define UCB_TS_CR_TSMX_LOW	(1 << 13)
-
-#define UCB_ADC_CR	0x0a
-#define UCB_ADC_SYNC_ENA	(1 << 0)
-#define UCB_ADC_VREFBYP_CON	(1 << 1)
-#define UCB_ADC_INP_TSPX	(0 << 2)
-#define UCB_ADC_INP_TSMX	(1 << 2)
-#define UCB_ADC_INP_TSPY	(2 << 2)
-#define UCB_ADC_INP_TSMY	(3 << 2)
-#define UCB_ADC_INP_AD0		(4 << 2)
-#define UCB_ADC_INP_AD1		(5 << 2)
-#define UCB_ADC_INP_AD2		(6 << 2)
-#define UCB_ADC_INP_AD3		(7 << 2)
-#define UCB_ADC_EXT_REF		(1 << 5)
-#define UCB_ADC_START		(1 << 7)
-#define UCB_ADC_ENA		(1 << 15)
-
-#define UCB_ADC_DATA	0x0b
-#define UCB_ADC_DAT_VAL		(1 << 15)
-#define UCB_ADC_DAT(x)		(((x) & 0x7fe0) >> 5)
-
-#define UCB_ID		0x0c
-#define UCB_ID_1200		0x1004
-#define UCB_ID_1300		0x1005
-#define UCB_ID_TC35143          0x9712
-
-#define UCB_MODE	0x0d
-#define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
-#define UCB_MODE_AUD_OFF_CAN	(1 << 13)
-
-#include "mcp.h"
-
-struct ucb1x00_irq {
-	void *devid;
-	void (*fn)(int, void *);
-};
-
-struct ucb1x00 {
-	spinlock_t		lock;
-	struct mcp		*mcp;
-	unsigned int		irq;
-	struct semaphore	adc_sem;
-	spinlock_t		io_lock;
-	u16			id;
-	u16			io_dir;
-	u16			io_out;
-	u16			adc_cr;
-	u16			irq_fal_enbl;
-	u16			irq_ris_enbl;
-	struct ucb1x00_irq	irq_handler[16];
-	struct device		dev;
-	struct list_head	node;
-	struct list_head	devs;
-};
-
-struct ucb1x00_driver;
-
-struct ucb1x00_dev {
-	struct list_head	dev_node;
-	struct list_head	drv_node;
-	struct ucb1x00		*ucb;
-	struct ucb1x00_driver	*drv;
-	void			*priv;
-};
-
-struct ucb1x00_driver {
-	struct list_head	node;
-	struct list_head	devs;
-	int	(*add)(struct ucb1x00_dev *dev);
-	void	(*remove)(struct ucb1x00_dev *dev);
-	int	(*suspend)(struct ucb1x00_dev *dev, pm_message_t state);
-	int	(*resume)(struct ucb1x00_dev *dev);
-};
-
-#define classdev_to_ucb1x00(cd)	container_of(cd, struct ucb1x00, dev)
-
-int ucb1x00_register_driver(struct ucb1x00_driver *);
-void ucb1x00_unregister_driver(struct ucb1x00_driver *);
-
-/**
- *	ucb1x00_clkrate - return the UCB1x00 SIB clock rate
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Return the SIB clock rate in Hz.
- */
-static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb)
-{
-	return mcp_get_sclk_rate(ucb->mcp);
-}
-
-/**
- *	ucb1x00_enable - enable the UCB1x00 SIB clock
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Enable the SIB clock.  This can be called multiple times.
- */
-static inline void ucb1x00_enable(struct ucb1x00 *ucb)
-{
-	mcp_enable(ucb->mcp);
-}
-
-/**
- *	ucb1x00_disable - disable the UCB1x00 SIB clock
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Disable the SIB clock.  The SIB clock will only be disabled
- *	when the number of ucb1x00_enable calls match the number of
- *	ucb1x00_disable calls.
- */
-static inline void ucb1x00_disable(struct ucb1x00 *ucb)
-{
-	mcp_disable(ucb->mcp);
-}
-
-/**
- *	ucb1x00_reg_write - write a UCB1x00 register
- *	@ucb: UCB1x00 structure describing chip
- *	@reg: UCB1x00 4-bit register index to write
- *	@val: UCB1x00 16-bit value to write
- *
- *	Write the UCB1x00 register @reg with value @val.  The SIB
- *	clock must be running for this function to return.
- */
-static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val)
-{
-	mcp_reg_write(ucb->mcp, reg, val);
-}
-
-/**
- *	ucb1x00_reg_read - read a UCB1x00 register
- *	@ucb: UCB1x00 structure describing chip
- *	@reg: UCB1x00 4-bit register index to write
- *
- *	Read the UCB1x00 register @reg and return its value.  The SIB
- *	clock must be running for this function to return.
- */
-static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg)
-{
-	return mcp_reg_read(ucb->mcp, reg);
-}
-/**
- *	ucb1x00_set_audio_divisor - 
- *	@ucb: UCB1x00 structure describing chip
- *	@div: SIB clock divisor
- */
-static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
-	mcp_set_audio_divisor(ucb->mcp, div);
-}
-
-/**
- *	ucb1x00_set_telecom_divisor -
- *	@ucb: UCB1x00 structure describing chip
- *	@div: SIB clock divisor
- */
-static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
-	mcp_set_telecom_divisor(ucb->mcp, div);
-}
-
-void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int);
-void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int);
-unsigned int ucb1x00_io_read(struct ucb1x00 *ucb);
-
-#define UCB_NOSYNC	(0)
-#define UCB_SYNC	(1)
-
-unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync);
-void ucb1x00_adc_enable(struct ucb1x00 *ucb);
-void ucb1x00_adc_disable(struct ucb1x00 *ucb);
-
-/*
- * Which edges of the IRQ do you want to control today?
- */
-#define UCB_RISING	(1 << 0)
-#define UCB_FALLING	(1 << 1)
-
-int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid);
-void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid);
-
-#endif
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
new file mode 100644
index 000000000000..be95e09fd746
--- /dev/null
+++ b/include/linux/mfd/mcp.h
@@ -0,0 +1,68 @@
+/*
+ *  linux/drivers/mfd/mcp.h
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef MCP_H
+#define MCP_H
+
+#include <mach/dma.h>
+
+struct mcp_ops;
+
+struct mcp {
+	struct module	*owner;
+	struct mcp_ops	*ops;
+	spinlock_t	lock;
+	int		use_count;
+	unsigned int	sclk_rate;
+	unsigned int	rw_timeout;
+	dma_device_t	dma_audio_rd;
+	dma_device_t	dma_audio_wr;
+	dma_device_t	dma_telco_rd;
+	dma_device_t	dma_telco_wr;
+	struct device	attached_device;
+};
+
+struct mcp_ops {
+	void		(*set_telecom_divisor)(struct mcp *, unsigned int);
+	void		(*set_audio_divisor)(struct mcp *, unsigned int);
+	void		(*reg_write)(struct mcp *, unsigned int, unsigned int);
+	unsigned int	(*reg_read)(struct mcp *, unsigned int);
+	void		(*enable)(struct mcp *);
+	void		(*disable)(struct mcp *);
+};
+
+void mcp_set_telecom_divisor(struct mcp *, unsigned int);
+void mcp_set_audio_divisor(struct mcp *, unsigned int);
+void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
+unsigned int mcp_reg_read(struct mcp *, unsigned int);
+void mcp_enable(struct mcp *);
+void mcp_disable(struct mcp *);
+#define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
+
+struct mcp *mcp_host_alloc(struct device *, size_t);
+int mcp_host_register(struct mcp *);
+void mcp_host_unregister(struct mcp *);
+
+struct mcp_driver {
+	struct device_driver drv;
+	int (*probe)(struct mcp *);
+	void (*remove)(struct mcp *);
+	int (*suspend)(struct mcp *, pm_message_t);
+	int (*resume)(struct mcp *);
+};
+
+int mcp_driver_register(struct mcp_driver *);
+void mcp_driver_unregister(struct mcp_driver *);
+
+#define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
+#define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
+
+#define mcp_priv(mcp)		((void *)((mcp)+1))
+
+#endif
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
new file mode 100644
index 000000000000..eac346336382
--- /dev/null
+++ b/include/linux/mfd/ucb1x00.h
@@ -0,0 +1,255 @@
+/*
+ *  linux/include/mfd/ucb1x00.h
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef UCB1200_H
+#define UCB1200_H
+
+#include <linux/mfd/mcp.h>
+#define UCB_IO_DATA	0x00
+#define UCB_IO_DIR	0x01
+
+#define UCB_IO_0		(1 << 0)
+#define UCB_IO_1		(1 << 1)
+#define UCB_IO_2		(1 << 2)
+#define UCB_IO_3		(1 << 3)
+#define UCB_IO_4		(1 << 4)
+#define UCB_IO_5		(1 << 5)
+#define UCB_IO_6		(1 << 6)
+#define UCB_IO_7		(1 << 7)
+#define UCB_IO_8		(1 << 8)
+#define UCB_IO_9		(1 << 9)
+
+#define UCB_IE_RIS	0x02
+#define UCB_IE_FAL	0x03
+#define UCB_IE_STATUS	0x04
+#define UCB_IE_CLEAR	0x04
+#define UCB_IE_ADC		(1 << 11)
+#define UCB_IE_TSPX		(1 << 12)
+#define UCB_IE_TSMX		(1 << 13)
+#define UCB_IE_TCLIP		(1 << 14)
+#define UCB_IE_ACLIP		(1 << 15)
+
+#define UCB_IRQ_TSPX		12
+
+#define UCB_TC_A	0x05
+#define UCB_TC_A_LOOP		(1 << 7)	/* UCB1200 */
+#define UCB_TC_A_AMPL		(1 << 7)	/* UCB1300 */
+
+#define UCB_TC_B	0x06
+#define UCB_TC_B_VOICE_ENA	(1 << 3)
+#define UCB_TC_B_CLIP		(1 << 4)
+#define UCB_TC_B_ATT		(1 << 6)
+#define UCB_TC_B_SIDE_ENA	(1 << 11)
+#define UCB_TC_B_MUTE		(1 << 13)
+#define UCB_TC_B_IN_ENA		(1 << 14)
+#define UCB_TC_B_OUT_ENA	(1 << 15)
+
+#define UCB_AC_A	0x07
+#define UCB_AC_B	0x08
+#define UCB_AC_B_LOOP		(1 << 8)
+#define UCB_AC_B_MUTE		(1 << 13)
+#define UCB_AC_B_IN_ENA		(1 << 14)
+#define UCB_AC_B_OUT_ENA	(1 << 15)
+
+#define UCB_TS_CR	0x09
+#define UCB_TS_CR_TSMX_POW	(1 << 0)
+#define UCB_TS_CR_TSPX_POW	(1 << 1)
+#define UCB_TS_CR_TSMY_POW	(1 << 2)
+#define UCB_TS_CR_TSPY_POW	(1 << 3)
+#define UCB_TS_CR_TSMX_GND	(1 << 4)
+#define UCB_TS_CR_TSPX_GND	(1 << 5)
+#define UCB_TS_CR_TSMY_GND	(1 << 6)
+#define UCB_TS_CR_TSPY_GND	(1 << 7)
+#define UCB_TS_CR_MODE_INT	(0 << 8)
+#define UCB_TS_CR_MODE_PRES	(1 << 8)
+#define UCB_TS_CR_MODE_POS	(2 << 8)
+#define UCB_TS_CR_BIAS_ENA	(1 << 11)
+#define UCB_TS_CR_TSPX_LOW	(1 << 12)
+#define UCB_TS_CR_TSMX_LOW	(1 << 13)
+
+#define UCB_ADC_CR	0x0a
+#define UCB_ADC_SYNC_ENA	(1 << 0)
+#define UCB_ADC_VREFBYP_CON	(1 << 1)
+#define UCB_ADC_INP_TSPX	(0 << 2)
+#define UCB_ADC_INP_TSMX	(1 << 2)
+#define UCB_ADC_INP_TSPY	(2 << 2)
+#define UCB_ADC_INP_TSMY	(3 << 2)
+#define UCB_ADC_INP_AD0		(4 << 2)
+#define UCB_ADC_INP_AD1		(5 << 2)
+#define UCB_ADC_INP_AD2		(6 << 2)
+#define UCB_ADC_INP_AD3		(7 << 2)
+#define UCB_ADC_EXT_REF		(1 << 5)
+#define UCB_ADC_START		(1 << 7)
+#define UCB_ADC_ENA		(1 << 15)
+
+#define UCB_ADC_DATA	0x0b
+#define UCB_ADC_DAT_VAL		(1 << 15)
+#define UCB_ADC_DAT(x)		(((x) & 0x7fe0) >> 5)
+
+#define UCB_ID		0x0c
+#define UCB_ID_1200		0x1004
+#define UCB_ID_1300		0x1005
+#define UCB_ID_TC35143          0x9712
+
+#define UCB_MODE	0x0d
+#define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
+#define UCB_MODE_AUD_OFF_CAN	(1 << 13)
+
+
+struct ucb1x00_irq {
+	void *devid;
+	void (*fn)(int, void *);
+};
+
+struct ucb1x00 {
+	spinlock_t		lock;
+	struct mcp		*mcp;
+	unsigned int		irq;
+	struct semaphore	adc_sem;
+	spinlock_t		io_lock;
+	u16			id;
+	u16			io_dir;
+	u16			io_out;
+	u16			adc_cr;
+	u16			irq_fal_enbl;
+	u16			irq_ris_enbl;
+	struct ucb1x00_irq	irq_handler[16];
+	struct device		dev;
+	struct list_head	node;
+	struct list_head	devs;
+};
+
+struct ucb1x00_driver;
+
+struct ucb1x00_dev {
+	struct list_head	dev_node;
+	struct list_head	drv_node;
+	struct ucb1x00		*ucb;
+	struct ucb1x00_driver	*drv;
+	void			*priv;
+};
+
+struct ucb1x00_driver {
+	struct list_head	node;
+	struct list_head	devs;
+	int	(*add)(struct ucb1x00_dev *dev);
+	void	(*remove)(struct ucb1x00_dev *dev);
+	int	(*suspend)(struct ucb1x00_dev *dev, pm_message_t state);
+	int	(*resume)(struct ucb1x00_dev *dev);
+};
+
+#define classdev_to_ucb1x00(cd)	container_of(cd, struct ucb1x00, dev)
+
+int ucb1x00_register_driver(struct ucb1x00_driver *);
+void ucb1x00_unregister_driver(struct ucb1x00_driver *);
+
+/**
+ *	ucb1x00_clkrate - return the UCB1x00 SIB clock rate
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Return the SIB clock rate in Hz.
+ */
+static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb)
+{
+	return mcp_get_sclk_rate(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_enable - enable the UCB1x00 SIB clock
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Enable the SIB clock.  This can be called multiple times.
+ */
+static inline void ucb1x00_enable(struct ucb1x00 *ucb)
+{
+	mcp_enable(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_disable - disable the UCB1x00 SIB clock
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Disable the SIB clock.  The SIB clock will only be disabled
+ *	when the number of ucb1x00_enable calls match the number of
+ *	ucb1x00_disable calls.
+ */
+static inline void ucb1x00_disable(struct ucb1x00 *ucb)
+{
+	mcp_disable(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_reg_write - write a UCB1x00 register
+ *	@ucb: UCB1x00 structure describing chip
+ *	@reg: UCB1x00 4-bit register index to write
+ *	@val: UCB1x00 16-bit value to write
+ *
+ *	Write the UCB1x00 register @reg with value @val.  The SIB
+ *	clock must be running for this function to return.
+ */
+static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val)
+{
+	mcp_reg_write(ucb->mcp, reg, val);
+}
+
+/**
+ *	ucb1x00_reg_read - read a UCB1x00 register
+ *	@ucb: UCB1x00 structure describing chip
+ *	@reg: UCB1x00 4-bit register index to write
+ *
+ *	Read the UCB1x00 register @reg and return its value.  The SIB
+ *	clock must be running for this function to return.
+ */
+static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg)
+{
+	return mcp_reg_read(ucb->mcp, reg);
+}
+/**
+ *	ucb1x00_set_audio_divisor - 
+ *	@ucb: UCB1x00 structure describing chip
+ *	@div: SIB clock divisor
+ */
+static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div)
+{
+	mcp_set_audio_divisor(ucb->mcp, div);
+}
+
+/**
+ *	ucb1x00_set_telecom_divisor -
+ *	@ucb: UCB1x00 structure describing chip
+ *	@div: SIB clock divisor
+ */
+static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div)
+{
+	mcp_set_telecom_divisor(ucb->mcp, div);
+}
+
+void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int);
+void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int);
+unsigned int ucb1x00_io_read(struct ucb1x00 *ucb);
+
+#define UCB_NOSYNC	(0)
+#define UCB_SYNC	(1)
+
+unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync);
+void ucb1x00_adc_enable(struct ucb1x00 *ucb);
+void ucb1x00_adc_disable(struct ucb1x00 *ucb);
+
+/*
+ * Which edges of the IRQ do you want to control today?
+ */
+#define UCB_RISING	(1 << 0)
+#define UCB_FALLING	(1 << 1)
+
+int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid);
+void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
+void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
+int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid);
+
+#endif
-- 
cgit v1.2.3


From 9ca3dc805cd0d89c44f88b9a399061946781323a Mon Sep 17 00:00:00 2001
From: Thomas Kunze <thommycheck@gmx.de>
Date: Tue, 10 Feb 2009 14:50:56 +0100
Subject: add gpiolib support to ucb1x00

The old access methods to the gpios will be removed when
all users has been converted. (mainly ucb1x00-ts)
---
 arch/arm/mach-sa1100/include/mach/mcp.h |  1 +
 drivers/mfd/mcp-sa11x0.c                |  1 +
 drivers/mfd/ucb1x00-core.c              | 87 ++++++++++++++++++++++++++++++++-
 include/linux/mfd/mcp.h                 |  1 +
 include/linux/mfd/ucb1x00.h             |  3 ++
 5 files changed, 91 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index fb8b09a57ad7..ed1a331508a7 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -16,6 +16,7 @@ struct mcp_plat_data {
 	u32 mccr0;
 	u32 mccr1;
 	unsigned int sclk_rate;
+	int gpio_base;
 };
 
 #endif
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 212189815c8e..258427232728 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
+	mcp->gpio_base		= data->gpio_base;
 
 	platform_set_drvdata(pdev, mcp);
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index f9de7891e57f..252b74188ec2 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -26,11 +26,11 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/mfd/ucb1x00.h>
+#include <linux/gpio.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
-
 static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
@@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb)
 	return ucb1x00_reg_read(ucb, UCB_IO_DATA);
 }
 
+static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	if (value)
+		ucb->io_out |= 1 << offset;
+	else
+		ucb->io_out &= ~(1 << offset);
+
+	ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+}
+
+static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset);
+}
+
+static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	ucb->io_dir &= ~(1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+	return 0;
+}
+
+static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset
+		, int value)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	ucb->io_dir |= (1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+
+	if (value)
+		ucb->io_out |= 1 << offset;
+	else
+		ucb->io_out &= ~(1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+	return 0;
+}
+
 /*
  * UCB1300 data sheet says we must:
  *  1. enable ADC	=> 5us (including reference startup time)
@@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp)
 	struct ucb1x00_driver *drv;
 	unsigned int id;
 	int ret = -ENODEV;
+	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
@@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp)
 		goto err_free;
 	}
 
+	ucb->gpio.base = -1;
+	if (mcp->gpio_base != 0) {
+		ucb->gpio.label = dev_name(&ucb->dev);
+		ucb->gpio.base = mcp->gpio_base;
+		ucb->gpio.ngpio = 10;
+		ucb->gpio.set = ucb1x00_gpio_set;
+		ucb->gpio.get = ucb1x00_gpio_get;
+		ucb->gpio.direction_input = ucb1x00_gpio_direction_input;
+		ucb->gpio.direction_output = ucb1x00_gpio_direction_output;
+		ret = gpiochip_add(&ucb->gpio);
+		if (ret)
+			goto err_free;
+	} else
+		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
+
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
 			  "UCB1x00", ucb);
 	if (ret) {
 		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
 			ucb->irq, ret);
-		goto err_free;
+		goto err_gpio;
 	}
 
 	mcp_set_drvdata(mcp, ucb);
@@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (ret)
 		goto err_irq;
 
+
 	INIT_LIST_HEAD(&ucb->devs);
 	mutex_lock(&ucb1x00_mutex);
 	list_add(&ucb->node, &ucb1x00_devices);
@@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp)
 		ucb1x00_add_dev(ucb, drv);
 	}
 	mutex_unlock(&ucb1x00_mutex);
+
 	goto out;
 
  err_irq:
 	free_irq(ucb->irq, ucb);
+ err_gpio:
+	if (ucb->gpio.base != -1)
+		temp = gpiochip_remove(&ucb->gpio);
  err_free:
 	kfree(ucb);
  err_disable:
@@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp)
 {
 	struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
 	struct list_head *l, *n;
+	int ret;
 
 	mutex_lock(&ucb1x00_mutex);
 	list_del(&ucb->node);
@@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp)
 	}
 	mutex_unlock(&ucb1x00_mutex);
 
+	if (ucb->gpio.base != -1) {
+		ret = gpiochip_remove(&ucb->gpio);
+		if (ret)
+			dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret);
+	}
+
 	free_irq(ucb->irq, ucb);
 	device_unregister(&ucb->dev);
 }
@@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp)
 	struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
 	struct ucb1x00_dev *dev;
 
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
 	mutex_lock(&ucb1x00_mutex);
 	list_for_each_entry(dev, &ucb->devs, dev_node) {
 		if (dev->drv->resume)
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index be95e09fd746..ee496708e38b 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -26,6 +26,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
+	int		gpio_base;
 };
 
 struct mcp_ops {
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index eac346336382..aa9c3789bed4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -11,6 +11,8 @@
 #define UCB1200_H
 
 #include <linux/mfd/mcp.h>
+#include <linux/gpio.h>
+
 #define UCB_IO_DATA	0x00
 #define UCB_IO_DIR	0x01
 
@@ -123,6 +125,7 @@ struct ucb1x00 {
 	struct device		dev;
 	struct list_head	node;
 	struct list_head	devs;
+	struct gpio_chip 	gpio;
 };
 
 struct ucb1x00_driver;
-- 
cgit v1.2.3


From 67fbb16be69d138a3b6645ec5395b487cb915c58 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Tue, 24 Nov 2009 23:59:15 +0100
Subject: nl80211: PMKSA caching support

This is an interface to set, delete and flush PMKIDs through nl80211.
Main users would be fullmac devices which firmwares are capable of
generating the RSN IEs for the re-association requests, e.g. iwmc3200wifi.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |   2 +
 include/linux/nl80211.h   |  11 +++++
 include/net/cfg80211.h    |  28 +++++++++++
 net/wireless/nl80211.c    | 120 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 161 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index afa8e0ac27a7..d9724a28c0c2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1266,6 +1266,8 @@ enum ieee80211_sa_query_action {
 
 #define WLAN_MAX_KEY_LEN		32
 
+#define WLAN_PMKID_LEN			16
+
 /**
  * ieee80211_get_qos_ctl - get pointer to qos control bytes
  * @hdr: the frame
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 45db17f81aa3..da8ea2e19273 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -349,6 +349,10 @@ enum nl80211_commands {
 	NL80211_CMD_GET_SURVEY,
 	NL80211_CMD_NEW_SURVEY_RESULTS,
 
+	NL80211_CMD_SET_PMKSA,
+	NL80211_CMD_DEL_PMKSA,
+	NL80211_CMD_FLUSH_PMKSA,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -598,6 +602,10 @@ enum nl80211_commands {
  *      the survey response for %NL80211_CMD_GET_SURVEY, nested attribute
  *      containing info as possible, see &enum survey_info.
  *
+ * @NL80211_ATTR_PMKID: PMK material for PMKSA caching.
+ * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can
+ *	cache, a wiphy attribute.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -732,6 +740,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SURVEY_INFO,
 
+	NL80211_ATTR_PMKID,
+	NL80211_ATTR_MAX_NUM_PMKIDS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a6492e9bca97..0884b9a0f778 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -871,6 +871,19 @@ struct cfg80211_bitrate_mask {
 	u32 fixed;   /* fixed bitrate, 0 == not fixed */
 	u32 maxrate; /* in kbps, 0 == no limit */
 };
+/**
+ * struct cfg80211_pmksa - PMK Security Association
+ *
+ * This structure is passed to the set/del_pmksa() method for PMKSA
+ * caching.
+ *
+ * @bssid: The AP's BSSID.
+ * @pmkid: The PMK material itself.
+ */
+struct cfg80211_pmksa {
+	u8 *bssid;
+	u8 *pmkid;
+};
 
 /**
  * struct cfg80211_ops - backend description for wireless configuration
@@ -976,6 +989,13 @@ struct cfg80211_bitrate_mask {
  * @dump_survey: get site survey information.
  *
  * @testmode_cmd: run a test mode command
+ *
+ * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac
+ *	devices running firmwares capable of generating the (re) association
+ *	RSN IE. It allows for faster roaming between WPA2 BSSIDs.
+ * @del_pmksa: Delete a cached PMKID.
+ * @flush_pmksa: Flush all cached PMKIDs.
+ *
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -1097,6 +1117,12 @@ struct cfg80211_ops {
 	int	(*dump_survey)(struct wiphy *wiphy, struct net_device *netdev,
 			int idx, struct survey_info *info);
 
+	int	(*set_pmksa)(struct wiphy *wiphy, struct net_device *netdev,
+			     struct cfg80211_pmksa *pmksa);
+	int	(*del_pmksa)(struct wiphy *wiphy, struct net_device *netdev,
+			     struct cfg80211_pmksa *pmksa);
+	int	(*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev);
+
 	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
@@ -1195,6 +1221,8 @@ struct wiphy {
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
 
+	u8 max_num_pmkids;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 149539ade15e..a6028433e3a0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -139,6 +139,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 	[NL80211_ATTR_PID] = { .type = NLA_U32 },
 	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
+	[NL80211_ATTR_PMKID] = { .type = NLA_BINARY,
+				 .len = WLAN_PMKID_LEN },
 };
 
 /* policy for the attributes */
@@ -450,6 +452,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		sizeof(u32) * dev->wiphy.n_cipher_suites,
 		dev->wiphy.cipher_suites);
 
+	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
+		   dev->wiphy.max_num_pmkids);
+
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
 		goto nla_put_failure;
@@ -561,6 +566,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(deauth, DEAUTHENTICATE);
 	CMD(disassoc, DISASSOCIATE);
 	CMD(join_ibss, JOIN_IBSS);
+	CMD(set_pmksa, SET_PMKSA);
+	CMD(del_pmksa, DEL_PMKSA);
+	CMD(flush_pmksa, FLUSH_PMKSA);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4221,6 +4229,99 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev,
+			struct cfg80211_pmksa *pmksa) = NULL;
+	int err;
+	struct net_device *dev;
+	struct cfg80211_pmksa pmksa;
+
+	memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_PMKID])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto out_rtnl;
+
+	pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
+	pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	switch (info->genlhdr->cmd) {
+	case NL80211_CMD_SET_PMKSA:
+		rdev_ops = rdev->ops->set_pmksa;
+		break;
+	case NL80211_CMD_DEL_PMKSA:
+		rdev_ops = rdev->ops->del_pmksa;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	if (!rdev_ops) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = rdev_ops(&rdev->wiphy, dev, &pmksa);
+
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ out_rtnl:
+	rtnl_unlock();
+
+	return err;
+}
+
+static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	int err;
+	struct net_device *dev;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto out_rtnl;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!rdev->ops->flush_pmksa) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = rdev->ops->flush_pmksa(&rdev->wiphy, dev);
+
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ out_rtnl:
+	rtnl_unlock();
+
+	return err;
+
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4465,6 +4566,25 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.dumpit = nl80211_dump_survey,
 	},
+	{
+		.cmd = NL80211_CMD_SET_PMKSA,
+		.doit = nl80211_setdel_pmksa,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_PMKSA,
+		.doit = nl80211_setdel_pmksa,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_FLUSH_PMKSA,
+		.doit = nl80211_flush_pmksa,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
-- 
cgit v1.2.3


From 5789d290cda6854b03986031df02b965572279df Mon Sep 17 00:00:00 2001
From: PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>
Date: Wed, 25 Nov 2009 00:11:30 +0000
Subject: ethtool: Add Direct Attach support to connector port reporting

This patch allows a base driver to specify Direct Attach as the
type of port through the ethtool interface.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index edd03b79e8f8..bcaa0e0a54d3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -631,6 +631,8 @@ struct ethtool_ops {
 #define PORT_MII		0x02
 #define PORT_FIBRE		0x03
 #define PORT_BNC		0x04
+#define PORT_DA			0x05
+#define PORT_NONE		0xef
 #define PORT_OTHER		0xff
 
 /* Which transceiver to use. */
-- 
cgit v1.2.3


From 8e7cac79808b62f242069a6ac88d364d35621371 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 29 Nov 2009 16:34:48 +0200
Subject: core: Fix user return notifier on fork()

fork() clones all thread_info flags, including
TIF_USER_RETURN_NOTIFY; if the new task is first scheduled on a cpu
which doesn't have user return notifiers set, this causes user
return notifiers to trigger without any way of clearing itself.

This is easy to trigger with a forky workload on the host in
parallel with kvm, resulting in a cpu in an endless loop on the
verge of returning to userspace.

Fix by dropping the TIF_USER_RETURN_NOTIFY immediately after fork.

Signed-off-by: Avi Kivity <avi@redhat.com>
LKML-Reference: <1259505288-16559-1-git-send-email-avi@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/user-return-notifier.h | 7 +++++++
 kernel/fork.c                        | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h
index b6ac056291d7..9c4a445bb43c 100644
--- a/include/linux/user-return-notifier.h
+++ b/include/linux/user-return-notifier.h
@@ -26,6 +26,11 @@ static inline void propagate_user_return_notify(struct task_struct *prev,
 
 void fire_user_return_notifiers(void);
 
+static inline void clear_user_return_notifier(struct task_struct *p)
+{
+	clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY);
+}
+
 #else
 
 struct user_return_notifier {};
@@ -37,6 +42,8 @@ static inline void propagate_user_return_notify(struct task_struct *prev,
 
 static inline void fire_user_return_notifiers(void) {}
 
+static inline void clear_user_return_notifier(struct task_struct *p) {}
+
 #endif
 
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 266c6af6ef1b..1b7512d5a64a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
 #include <linux/magic.h>
 #include <linux/perf_event.h>
 #include <linux/posix-timers.h>
+#include <linux/user-return-notifier.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto out;
 
 	setup_thread_stack(tsk, orig);
+	clear_user_return_notifier(tsk);
 	stackend = end_of_stack(tsk);
 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
 
-- 
cgit v1.2.3


From be9cd7b6f84fd0cc59c8770771073b5c66f958ac Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 27 Nov 2009 04:31:27 +0000
Subject: mfd: Add power control platform data to SDHI driver

This patch adds platform data with a function for power
control to the SDHI driver. The idea is that board specific
code can provide their own functions so power can be enabled
and disabled for the sd-cards.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/mfd/sh_mobile_sdhi.c       | 11 +++++++++++
 include/linux/mfd/sh_mobile_sdhi.h |  8 ++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 include/linux/mfd/sh_mobile_sdhi.h

(limited to 'include/linux')

diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c
index 56f72cc1d569..03efae8041ab 100644
--- a/drivers/mfd/sh_mobile_sdhi.c
+++ b/drivers/mfd/sh_mobile_sdhi.c
@@ -24,6 +24,7 @@
 
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
+#include <linux/mfd/sh_mobile_sdhi.h>
 
 struct sh_mobile_sdhi {
 	struct clk *clk;
@@ -50,6 +51,15 @@ static struct mfd_cell sh_mobile_sdhi_cell = {
 	.resources     = sh_mobile_sdhi_resources,
 };
 
+static void sh_mobile_sdhi_set_pwr(struct platform_device *tmio, int state)
+{
+	struct platform_device *pdev = to_platform_device(tmio->dev.parent);
+	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
+
+	if (p && p->set_pwr)
+		p->set_pwr(pdev, state);
+}
+
 static int __init sh_mobile_sdhi_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_sdhi *priv;
@@ -87,6 +97,7 @@ static int __init sh_mobile_sdhi_probe(struct platform_device *pdev)
 
 	/* FIXME: silly const unsigned int hclk */
 	*(unsigned int *)&priv->mmc_data.hclk = clk_get_rate(priv->clk);
+	priv->mmc_data.set_pwr = sh_mobile_sdhi_set_pwr;
 
 	memcpy(&priv->cell_mmc, &sh_mobile_sdhi_cell, sizeof(priv->cell_mmc));
 	priv->cell_mmc.driver_data = &priv->mmc_data;
diff --git a/include/linux/mfd/sh_mobile_sdhi.h b/include/linux/mfd/sh_mobile_sdhi.h
new file mode 100644
index 000000000000..3bcd7163485c
--- /dev/null
+++ b/include/linux/mfd/sh_mobile_sdhi.h
@@ -0,0 +1,8 @@
+#ifndef __SH_MOBILE_SDHI_H__
+#define __SH_MOBILE_SDHI_H__
+
+struct sh_mobile_sdhi_info {
+	void (*set_pwr)(struct platform_device *pdev, int state);
+};
+
+#endif /* __SH_MOBILE_SDHI_H__ */
-- 
cgit v1.2.3


From fc1d003de39c306a44abce97c346921de31277cd Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 27 Nov 2009 07:32:24 +0000
Subject: sh: Move KEYSC header file

This patch moves the KEYSC header file from the
SuperH specific asm directory to a place where
it can be shared by multiple architectures.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-ecovec24/setup.c |  2 +-
 arch/sh/boards/mach-kfr2r09/setup.c  |  2 +-
 arch/sh/boards/mach-migor/setup.c    |  2 +-
 arch/sh/boards/mach-se/7722/setup.c  |  2 +-
 arch/sh/boards/mach-se/7724/setup.c  |  2 +-
 arch/sh/include/asm/sh_keysc.h       | 14 --------------
 drivers/input/keyboard/sh_keysc.c    |  2 +-
 include/linux/input/sh_keysc.h       | 14 ++++++++++++++
 8 files changed, 20 insertions(+), 20 deletions(-)
 delete mode 100644 arch/sh/include/asm/sh_keysc.h
 create mode 100644 include/linux/input/sh_keysc.h

(limited to 'include/linux')

diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 5932f049e782..0dd98ed5f7a8 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -20,12 +20,12 @@
 #include <linux/i2c.h>
 #include <linux/i2c/tsc2007.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/mfd/sh_mobile_sdhi.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
 #include <asm/heartbeat.h>
 #include <asm/sh_eth.h>
-#include <asm/sh_keysc.h>
 #include <asm/clock.h>
 #include <asm/suspend.h>
 #include <cpu/sh7724.h>
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 85fa8a3b7f73..e755bad6dc15 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -16,6 +16,7 @@
 #include <linux/clk.h>
 #include <linux/gpio.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/i2c.h>
 #include <linux/usb/r8a66597.h>
 #include <media/soc_camera.h>
@@ -25,7 +26,6 @@
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
-#include <asm/sh_keysc.h>
 #include <cpu/sh7724.h>
 #include <mach/kfr2r09.h>
 
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 369525701d60..9099b6da9957 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/nand.h>
 #include <linux/i2c.h>
@@ -25,7 +26,6 @@
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
-#include <asm/sh_keysc.h>
 #include <asm/suspend.h>
 #include <mach/migor.h>
 #include <cpu/sh7722.h>
diff --git a/arch/sh/boards/mach-se/7722/setup.c b/arch/sh/boards/mach-se/7722/setup.c
index d05f34f6528e..b1cb9425b600 100644
--- a/arch/sh/boards/mach-se/7722/setup.c
+++ b/arch/sh/boards/mach-se/7722/setup.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/smc91x.h>
 #include <mach-se/mach/se7722.h>
 #include <mach-se/mach/mrshpc.h>
@@ -21,7 +22,6 @@
 #include <asm/clock.h>
 #include <asm/io.h>
 #include <asm/heartbeat.h>
-#include <asm/sh_keysc.h>
 #include <cpu/sh7722.h>
 
 /* Heartbeat */
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index ae23fa970e6d..da01fc0dc881 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -19,6 +19,7 @@
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/usb/r8a66597.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
@@ -27,7 +28,6 @@
 #include <asm/heartbeat.h>
 #include <asm/sh_eth.h>
 #include <asm/clock.h>
-#include <asm/sh_keysc.h>
 #include <asm/suspend.h>
 #include <cpu/sh7724.h>
 #include <mach-se/mach/se7724.h>
diff --git a/arch/sh/include/asm/sh_keysc.h b/arch/sh/include/asm/sh_keysc.h
deleted file mode 100644
index 4a65b1e40eab..000000000000
--- a/arch/sh/include/asm/sh_keysc.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_KEYSC_H__
-#define __ASM_KEYSC_H__
-
-#define SH_KEYSC_MAXKEYS 30
-
-struct sh_keysc_info {
-	enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3 } mode;
-	int scan_timing; /* 0 -> 7, see KYCR1, SCN[2:0] */
-	int delay;
-	int kycr2_delay;
-	int keycodes[SH_KEYSC_MAXKEYS];
-};
-
-#endif /* __ASM_KEYSC_H__ */
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 887af79b7bff..076111fc72d2 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -18,9 +18,9 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <asm/sh_keysc.h>
 
 #define KYCR1_OFFS   0x00
 #define KYCR2_OFFS   0x04
diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h
new file mode 100644
index 000000000000..c211b5cf08e6
--- /dev/null
+++ b/include/linux/input/sh_keysc.h
@@ -0,0 +1,14 @@
+#ifndef __SH_KEYSC_H__
+#define __SH_KEYSC_H__
+
+#define SH_KEYSC_MAXKEYS 30
+
+struct sh_keysc_info {
+	enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3 } mode;
+	int scan_timing; /* 0 -> 7, see KYCR1, SCN[2:0] */
+	int delay;
+	int kycr2_delay;
+	int keycodes[SH_KEYSC_MAXKEYS];
+};
+
+#endif /* __SH_KEYSC_H__ */
-- 
cgit v1.2.3


From fae4339919c741f89f7e293b8c646207e1df28e1 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 27 Nov 2009 07:38:01 +0000
Subject: sh: Break out SuperH PFC code

This file breaks out the SuperH PFC code from
arch/sh/kernel/gpio.c + arch/sh/include/asm/gpio.h
to drivers/sh/pfc.c + include/linux/sh_pfc.h.

Similar to the INTC stuff. The non-SuperH specific
file location makes it possible to share the code
between multiple architectures.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/gpio.h |  82 +------
 arch/sh/kernel/Makefile    |   1 -
 arch/sh/kernel/gpio.c      | 584 ---------------------------------------------
 drivers/sh/Makefile        |   1 +
 drivers/sh/pfc.c           | 584 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sh_pfc.h     |  96 ++++++++
 6 files changed, 682 insertions(+), 666 deletions(-)
 delete mode 100644 arch/sh/kernel/gpio.c
 create mode 100644 drivers/sh/pfc.c
 create mode 100644 include/linux/sh_pfc.h

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h
index 61f93da2c62e..f8d9a731e903 100644
--- a/arch/sh/include/asm/gpio.h
+++ b/arch/sh/include/asm/gpio.h
@@ -20,7 +20,7 @@
 #endif
 
 #define ARCH_NR_GPIOS 512
-#include <asm-generic/gpio.h>
+#include <linux/sh_pfc.h>
 
 #ifdef CONFIG_GPIOLIB
 
@@ -53,84 +53,4 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* CONFIG_GPIOLIB */
 
-typedef unsigned short pinmux_enum_t;
-typedef unsigned short pinmux_flag_t;
-
-#define PINMUX_TYPE_NONE            0
-#define PINMUX_TYPE_FUNCTION        1
-#define PINMUX_TYPE_GPIO            2
-#define PINMUX_TYPE_OUTPUT          3
-#define PINMUX_TYPE_INPUT           4
-#define PINMUX_TYPE_INPUT_PULLUP    5
-#define PINMUX_TYPE_INPUT_PULLDOWN  6
-
-#define PINMUX_FLAG_TYPE            (0x7)
-#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
-#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
-
-#define PINMUX_FLAG_DBIT_SHIFT      5
-#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
-#define PINMUX_FLAG_DREG_SHIFT      10
-#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
-
-struct pinmux_gpio {
-	pinmux_enum_t enum_id;
-	pinmux_flag_t flags;
-};
-
-#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
-#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
-
-struct pinmux_cfg_reg {
-	unsigned long reg, reg_width, field_width;
-	unsigned long *cnt;
-	pinmux_enum_t *enum_ids;
-};
-
-#define PINMUX_CFG_REG(name, r, r_width, f_width) \
-	.reg = r, .reg_width = r_width, .field_width = f_width,		\
-	.cnt = (unsigned long [r_width / f_width]) {}, \
-	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
-
-struct pinmux_data_reg {
-	unsigned long reg, reg_width, reg_shadow;
-	pinmux_enum_t *enum_ids;
-};
-
-#define PINMUX_DATA_REG(name, r, r_width) \
-	.reg = r, .reg_width = r_width,	\
-	.enum_ids = (pinmux_enum_t [r_width]) \
-
-struct pinmux_range {
-	pinmux_enum_t begin;
-	pinmux_enum_t end;
-	pinmux_enum_t force;
-};
-
-struct pinmux_info {
-	char *name;
-	pinmux_enum_t reserved_id;
-	struct pinmux_range data;
-	struct pinmux_range input;
-	struct pinmux_range input_pd;
-	struct pinmux_range input_pu;
-	struct pinmux_range output;
-	struct pinmux_range mark;
-	struct pinmux_range function;
-
-	unsigned first_gpio, last_gpio;
-
-	struct pinmux_gpio *gpios;
-	struct pinmux_cfg_reg *cfg_regs;
-	struct pinmux_data_reg *data_regs;
-
-	pinmux_enum_t *gpio_data;
-	unsigned int gpio_data_size;
-
-	unsigned long *gpio_in_use;
-	struct gpio_chip chip;
-};
-
-int register_pinmux(struct pinmux_info *pip);
-
 #endif /* __ASM_SH_GPIO_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 8edb927a1f30..0471a3eb25ed 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
diff --git a/arch/sh/kernel/gpio.c b/arch/sh/kernel/gpio.c
deleted file mode 100644
index d22e5af699f9..000000000000
--- a/arch/sh/kernel/gpio.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Pinmuxed GPIO support for SuperH.
- *
- * Copyright (C) 2008 Magnus Damm
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/bitops.h>
-#include <linux/gpio.h>
-
-static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
-{
-	if (enum_id < r->begin)
-		return 0;
-
-	if (enum_id > r->end)
-		return 0;
-
-	return 1;
-}
-
-static unsigned long gpio_read_raw_reg(unsigned long reg,
-				       unsigned long reg_width)
-{
-	switch (reg_width) {
-	case 8:
-		return ctrl_inb(reg);
-	case 16:
-		return ctrl_inw(reg);
-	case 32:
-		return ctrl_inl(reg);
-	}
-
-	BUG();
-	return 0;
-}
-
-static void gpio_write_raw_reg(unsigned long reg,
-			       unsigned long reg_width,
-			       unsigned long data)
-{
-	switch (reg_width) {
-	case 8:
-		ctrl_outb(data, reg);
-		return;
-	case 16:
-		ctrl_outw(data, reg);
-		return;
-	case 32:
-		ctrl_outl(data, reg);
-		return;
-	}
-
-	BUG();
-}
-
-static void gpio_write_bit(struct pinmux_data_reg *dr,
-			   unsigned long in_pos, unsigned long value)
-{
-	unsigned long pos;
-
-	pos = dr->reg_width - (in_pos + 1);
-
-#ifdef DEBUG
-	pr_info("write_bit addr = %lx, value = %ld, pos = %ld, "
-		"r_width = %ld\n",
-		dr->reg, !!value, pos, dr->reg_width);
-#endif
-
-	if (value)
-		set_bit(pos, &dr->reg_shadow);
-	else
-		clear_bit(pos, &dr->reg_shadow);
-
-	gpio_write_raw_reg(dr->reg, dr->reg_width, dr->reg_shadow);
-}
-
-static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
-			 unsigned long field_width, unsigned long in_pos)
-{
-	unsigned long data, mask, pos;
-
-	data = 0;
-	mask = (1 << field_width) - 1;
-	pos = reg_width - ((in_pos + 1) * field_width);
-
-#ifdef DEBUG
-	pr_info("read_reg: addr = %lx, pos = %ld, "
-		"r_width = %ld, f_width = %ld\n",
-		reg, pos, reg_width, field_width);
-#endif
-
-	data = gpio_read_raw_reg(reg, reg_width);
-	return (data >> pos) & mask;
-}
-
-static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
-			   unsigned long field_width, unsigned long in_pos,
-			   unsigned long value)
-{
-	unsigned long mask, pos;
-
-	mask = (1 << field_width) - 1;
-	pos = reg_width - ((in_pos + 1) * field_width);
-
-#ifdef DEBUG
-	pr_info("write_reg addr = %lx, value = %ld, pos = %ld, "
-		"r_width = %ld, f_width = %ld\n",
-		reg, value, pos, reg_width, field_width);
-#endif
-
-	mask = ~(mask << pos);
-	value = value << pos;
-
-	switch (reg_width) {
-	case 8:
-		ctrl_outb((ctrl_inb(reg) & mask) | value, reg);
-		break;
-	case 16:
-		ctrl_outw((ctrl_inw(reg) & mask) | value, reg);
-		break;
-	case 32:
-		ctrl_outl((ctrl_inl(reg) & mask) | value, reg);
-		break;
-	}
-}
-
-static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
-{
-	struct pinmux_gpio *gpiop = &gpioc->gpios[gpio];
-	struct pinmux_data_reg *data_reg;
-	int k, n;
-
-	if (!enum_in_range(gpiop->enum_id, &gpioc->data))
-		return -1;
-
-	k = 0;
-	while (1) {
-		data_reg = gpioc->data_regs + k;
-
-		if (!data_reg->reg_width)
-			break;
-
-		for (n = 0; n < data_reg->reg_width; n++) {
-			if (data_reg->enum_ids[n] == gpiop->enum_id) {
-				gpiop->flags &= ~PINMUX_FLAG_DREG;
-				gpiop->flags |= (k << PINMUX_FLAG_DREG_SHIFT);
-				gpiop->flags &= ~PINMUX_FLAG_DBIT;
-				gpiop->flags |= (n << PINMUX_FLAG_DBIT_SHIFT);
-				return 0;
-			}
-		}
-		k++;
-	}
-
-	BUG();
-
-	return -1;
-}
-
-static void setup_data_regs(struct pinmux_info *gpioc)
-{
-	struct pinmux_data_reg *drp;
-	int k;
-
-	for (k = gpioc->first_gpio; k <= gpioc->last_gpio; k++)
-		setup_data_reg(gpioc, k);
-
-	k = 0;
-	while (1) {
-		drp = gpioc->data_regs + k;
-
-		if (!drp->reg_width)
-			break;
-
-		drp->reg_shadow = gpio_read_raw_reg(drp->reg, drp->reg_width);
-		k++;
-	}
-}
-
-static int get_data_reg(struct pinmux_info *gpioc, unsigned gpio,
-			struct pinmux_data_reg **drp, int *bitp)
-{
-	struct pinmux_gpio *gpiop = &gpioc->gpios[gpio];
-	int k, n;
-
-	if (!enum_in_range(gpiop->enum_id, &gpioc->data))
-		return -1;
-
-	k = (gpiop->flags & PINMUX_FLAG_DREG) >> PINMUX_FLAG_DREG_SHIFT;
-	n = (gpiop->flags & PINMUX_FLAG_DBIT) >> PINMUX_FLAG_DBIT_SHIFT;
-	*drp = gpioc->data_regs + k;
-	*bitp = n;
-	return 0;
-}
-
-static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
-			  struct pinmux_cfg_reg **crp, int *indexp,
-			  unsigned long **cntp)
-{
-	struct pinmux_cfg_reg *config_reg;
-	unsigned long r_width, f_width;
-	int k, n;
-
-	k = 0;
-	while (1) {
-		config_reg = gpioc->cfg_regs + k;
-
-		r_width = config_reg->reg_width;
-		f_width = config_reg->field_width;
-
-		if (!r_width)
-			break;
-		for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) {
-			if (config_reg->enum_ids[n] == enum_id) {
-				*crp = config_reg;
-				*indexp = n;
-				*cntp = &config_reg->cnt[n / (1 << f_width)];
-				return 0;
-			}
-		}
-		k++;
-	}
-
-	return -1;
-}
-
-static int get_gpio_enum_id(struct pinmux_info *gpioc, unsigned gpio,
-			    int pos, pinmux_enum_t *enum_idp)
-{
-	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
-	pinmux_enum_t *data = gpioc->gpio_data;
-	int k;
-
-	if (!enum_in_range(enum_id, &gpioc->data)) {
-		if (!enum_in_range(enum_id, &gpioc->mark)) {
-			pr_err("non data/mark enum_id for gpio %d\n", gpio);
-			return -1;
-		}
-	}
-
-	if (pos) {
-		*enum_idp = data[pos + 1];
-		return pos + 1;
-	}
-
-	for (k = 0; k < gpioc->gpio_data_size; k++) {
-		if (data[k] == enum_id) {
-			*enum_idp = data[k + 1];
-			return k + 1;
-		}
-	}
-
-	pr_err("cannot locate data/mark enum_id for gpio %d\n", gpio);
-	return -1;
-}
-
-static void write_config_reg(struct pinmux_info *gpioc,
-			     struct pinmux_cfg_reg *crp,
-			     int index)
-{
-	unsigned long ncomb, pos, value;
-
-	ncomb = 1 << crp->field_width;
-	pos = index / ncomb;
-	value = index % ncomb;
-
-	gpio_write_reg(crp->reg, crp->reg_width, crp->field_width, pos, value);
-}
-
-static int check_config_reg(struct pinmux_info *gpioc,
-			    struct pinmux_cfg_reg *crp,
-			    int index)
-{
-	unsigned long ncomb, pos, value;
-
-	ncomb = 1 << crp->field_width;
-	pos = index / ncomb;
-	value = index % ncomb;
-
-	if (gpio_read_reg(crp->reg, crp->reg_width,
-			  crp->field_width, pos) == value)
-		return 0;
-
-	return -1;
-}
-
-enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
-
-static int pinmux_config_gpio(struct pinmux_info *gpioc, unsigned gpio,
-			      int pinmux_type, int cfg_mode)
-{
-	struct pinmux_cfg_reg *cr = NULL;
-	pinmux_enum_t enum_id;
-	struct pinmux_range *range;
-	int in_range, pos, index;
-	unsigned long *cntp;
-
-	switch (pinmux_type) {
-
-	case PINMUX_TYPE_FUNCTION:
-		range = NULL;
-		break;
-
-	case PINMUX_TYPE_OUTPUT:
-		range = &gpioc->output;
-		break;
-
-	case PINMUX_TYPE_INPUT:
-		range = &gpioc->input;
-		break;
-
-	case PINMUX_TYPE_INPUT_PULLUP:
-		range = &gpioc->input_pu;
-		break;
-
-	case PINMUX_TYPE_INPUT_PULLDOWN:
-		range = &gpioc->input_pd;
-		break;
-
-	default:
-		goto out_err;
-	}
-
-	pos = 0;
-	enum_id = 0;
-	index = 0;
-	while (1) {
-		pos = get_gpio_enum_id(gpioc, gpio, pos, &enum_id);
-		if (pos <= 0)
-			goto out_err;
-
-		if (!enum_id)
-			break;
-
-		in_range = enum_in_range(enum_id, &gpioc->function);
-		if (!in_range && range) {
-			in_range = enum_in_range(enum_id, range);
-
-			if (in_range && enum_id == range->force)
-				continue;
-		}
-
-		if (!in_range)
-			continue;
-
-		if (get_config_reg(gpioc, enum_id, &cr, &index, &cntp) != 0)
-			goto out_err;
-
-		switch (cfg_mode) {
-		case GPIO_CFG_DRYRUN:
-			if (!*cntp || !check_config_reg(gpioc, cr, index))
-				continue;
-			break;
-
-		case GPIO_CFG_REQ:
-			write_config_reg(gpioc, cr, index);
-			*cntp = *cntp + 1;
-			break;
-
-		case GPIO_CFG_FREE:
-			*cntp = *cntp - 1;
-			break;
-		}
-	}
-
-	return 0;
- out_err:
-	return -1;
-}
-
-static DEFINE_SPINLOCK(gpio_lock);
-
-static struct pinmux_info *chip_to_pinmux(struct gpio_chip *chip)
-{
-	return container_of(chip, struct pinmux_info, chip);
-}
-
-static int sh_gpio_request(struct gpio_chip *chip, unsigned offset)
-{
-	struct pinmux_info *gpioc = chip_to_pinmux(chip);
-	struct pinmux_data_reg *dummy;
-	unsigned long flags;
-	int i, ret, pinmux_type;
-
-	ret = -EINVAL;
-
-	if (!gpioc)
-		goto err_out;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	if ((gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE) != PINMUX_TYPE_NONE)
-		goto err_unlock;
-
-	/* setup pin function here if no data is associated with pin */
-
-	if (get_data_reg(gpioc, offset, &dummy, &i) != 0)
-		pinmux_type = PINMUX_TYPE_FUNCTION;
-	else
-		pinmux_type = PINMUX_TYPE_GPIO;
-
-	if (pinmux_type == PINMUX_TYPE_FUNCTION) {
-		if (pinmux_config_gpio(gpioc, offset,
-				       pinmux_type,
-				       GPIO_CFG_DRYRUN) != 0)
-			goto err_unlock;
-
-		if (pinmux_config_gpio(gpioc, offset,
-				       pinmux_type,
-				       GPIO_CFG_REQ) != 0)
-			BUG();
-	}
-
-	gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
-	gpioc->gpios[offset].flags |= pinmux_type;
-
-	ret = 0;
- err_unlock:
-	spin_unlock_irqrestore(&gpio_lock, flags);
- err_out:
-	return ret;
-}
-
-static void sh_gpio_free(struct gpio_chip *chip, unsigned offset)
-{
-	struct pinmux_info *gpioc = chip_to_pinmux(chip);
-	unsigned long flags;
-	int pinmux_type;
-
-	if (!gpioc)
-		return;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	pinmux_type = gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE;
-	pinmux_config_gpio(gpioc, offset, pinmux_type, GPIO_CFG_FREE);
-	gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
-	gpioc->gpios[offset].flags |= PINMUX_TYPE_NONE;
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-}
-
-static int pinmux_direction(struct pinmux_info *gpioc,
-			    unsigned gpio, int new_pinmux_type)
-{
-	int pinmux_type;
-	int ret = -EINVAL;
-
-	if (!gpioc)
-		goto err_out;
-
-	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
-
-	switch (pinmux_type) {
-	case PINMUX_TYPE_GPIO:
-		break;
-	case PINMUX_TYPE_OUTPUT:
-	case PINMUX_TYPE_INPUT:
-	case PINMUX_TYPE_INPUT_PULLUP:
-	case PINMUX_TYPE_INPUT_PULLDOWN:
-		pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
-		break;
-	default:
-		goto err_out;
-	}
-
-	if (pinmux_config_gpio(gpioc, gpio,
-			       new_pinmux_type,
-			       GPIO_CFG_DRYRUN) != 0)
-		goto err_out;
-
-	if (pinmux_config_gpio(gpioc, gpio,
-			       new_pinmux_type,
-			       GPIO_CFG_REQ) != 0)
-		BUG();
-
-	gpioc->gpios[gpio].flags &= ~PINMUX_FLAG_TYPE;
-	gpioc->gpios[gpio].flags |= new_pinmux_type;
-
-	ret = 0;
- err_out:
-	return ret;
-}
-
-static int sh_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	struct pinmux_info *gpioc = chip_to_pinmux(chip);
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_INPUT);
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	return ret;
-}
-
-static void sh_gpio_set_value(struct pinmux_info *gpioc,
-			     unsigned gpio, int value)
-{
-	struct pinmux_data_reg *dr = NULL;
-	int bit = 0;
-
-	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0)
-		BUG();
-	else
-		gpio_write_bit(dr, bit, value);
-}
-
-static int sh_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
-				    int value)
-{
-	struct pinmux_info *gpioc = chip_to_pinmux(chip);
-	unsigned long flags;
-	int ret;
-
-	sh_gpio_set_value(gpioc, offset, value);
-	spin_lock_irqsave(&gpio_lock, flags);
-	ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_OUTPUT);
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	return ret;
-}
-
-static int sh_gpio_get_value(struct pinmux_info *gpioc, unsigned gpio)
-{
-	struct pinmux_data_reg *dr = NULL;
-	int bit = 0;
-
-	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0) {
-		BUG();
-		return 0;
-	}
-
-	return gpio_read_reg(dr->reg, dr->reg_width, 1, bit);
-}
-
-static int sh_gpio_get(struct gpio_chip *chip, unsigned offset)
-{
-	return sh_gpio_get_value(chip_to_pinmux(chip), offset);
-}
-
-static void sh_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	sh_gpio_set_value(chip_to_pinmux(chip), offset, value);
-}
-
-int register_pinmux(struct pinmux_info *pip)
-{
-	struct gpio_chip *chip = &pip->chip;
-
-	pr_info("sh pinmux: %s handling gpio %d -> %d\n",
-		pip->name, pip->first_gpio, pip->last_gpio);
-
-	setup_data_regs(pip);
-
-	chip->request = sh_gpio_request;
-	chip->free = sh_gpio_free;
-	chip->direction_input = sh_gpio_direction_input;
-	chip->get = sh_gpio_get;
-	chip->direction_output = sh_gpio_direction_output;
-	chip->set = sh_gpio_set;
-
-	WARN_ON(pip->first_gpio != 0); /* needs testing */
-
-	chip->label = pip->name;
-	chip->owner = THIS_MODULE;
-	chip->base = pip->first_gpio;
-	chip->ngpio = (pip->last_gpio - pip->first_gpio) + 1;
-
-	return gpiochip_add(chip);
-}
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 6a025cefe6dc..4956bf1f2134 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
+obj-$(CONFIG_GENERIC_GPIO)	+= pfc.o
 obj-y				+= intc.o
diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
new file mode 100644
index 000000000000..d22e5af699f9
--- /dev/null
+++ b/drivers/sh/pfc.c
@@ -0,0 +1,584 @@
+/*
+ * Pinmuxed GPIO support for SuperH.
+ *
+ * Copyright (C) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+
+static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
+{
+	if (enum_id < r->begin)
+		return 0;
+
+	if (enum_id > r->end)
+		return 0;
+
+	return 1;
+}
+
+static unsigned long gpio_read_raw_reg(unsigned long reg,
+				       unsigned long reg_width)
+{
+	switch (reg_width) {
+	case 8:
+		return ctrl_inb(reg);
+	case 16:
+		return ctrl_inw(reg);
+	case 32:
+		return ctrl_inl(reg);
+	}
+
+	BUG();
+	return 0;
+}
+
+static void gpio_write_raw_reg(unsigned long reg,
+			       unsigned long reg_width,
+			       unsigned long data)
+{
+	switch (reg_width) {
+	case 8:
+		ctrl_outb(data, reg);
+		return;
+	case 16:
+		ctrl_outw(data, reg);
+		return;
+	case 32:
+		ctrl_outl(data, reg);
+		return;
+	}
+
+	BUG();
+}
+
+static void gpio_write_bit(struct pinmux_data_reg *dr,
+			   unsigned long in_pos, unsigned long value)
+{
+	unsigned long pos;
+
+	pos = dr->reg_width - (in_pos + 1);
+
+#ifdef DEBUG
+	pr_info("write_bit addr = %lx, value = %ld, pos = %ld, "
+		"r_width = %ld\n",
+		dr->reg, !!value, pos, dr->reg_width);
+#endif
+
+	if (value)
+		set_bit(pos, &dr->reg_shadow);
+	else
+		clear_bit(pos, &dr->reg_shadow);
+
+	gpio_write_raw_reg(dr->reg, dr->reg_width, dr->reg_shadow);
+}
+
+static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
+			 unsigned long field_width, unsigned long in_pos)
+{
+	unsigned long data, mask, pos;
+
+	data = 0;
+	mask = (1 << field_width) - 1;
+	pos = reg_width - ((in_pos + 1) * field_width);
+
+#ifdef DEBUG
+	pr_info("read_reg: addr = %lx, pos = %ld, "
+		"r_width = %ld, f_width = %ld\n",
+		reg, pos, reg_width, field_width);
+#endif
+
+	data = gpio_read_raw_reg(reg, reg_width);
+	return (data >> pos) & mask;
+}
+
+static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
+			   unsigned long field_width, unsigned long in_pos,
+			   unsigned long value)
+{
+	unsigned long mask, pos;
+
+	mask = (1 << field_width) - 1;
+	pos = reg_width - ((in_pos + 1) * field_width);
+
+#ifdef DEBUG
+	pr_info("write_reg addr = %lx, value = %ld, pos = %ld, "
+		"r_width = %ld, f_width = %ld\n",
+		reg, value, pos, reg_width, field_width);
+#endif
+
+	mask = ~(mask << pos);
+	value = value << pos;
+
+	switch (reg_width) {
+	case 8:
+		ctrl_outb((ctrl_inb(reg) & mask) | value, reg);
+		break;
+	case 16:
+		ctrl_outw((ctrl_inw(reg) & mask) | value, reg);
+		break;
+	case 32:
+		ctrl_outl((ctrl_inl(reg) & mask) | value, reg);
+		break;
+	}
+}
+
+static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
+{
+	struct pinmux_gpio *gpiop = &gpioc->gpios[gpio];
+	struct pinmux_data_reg *data_reg;
+	int k, n;
+
+	if (!enum_in_range(gpiop->enum_id, &gpioc->data))
+		return -1;
+
+	k = 0;
+	while (1) {
+		data_reg = gpioc->data_regs + k;
+
+		if (!data_reg->reg_width)
+			break;
+
+		for (n = 0; n < data_reg->reg_width; n++) {
+			if (data_reg->enum_ids[n] == gpiop->enum_id) {
+				gpiop->flags &= ~PINMUX_FLAG_DREG;
+				gpiop->flags |= (k << PINMUX_FLAG_DREG_SHIFT);
+				gpiop->flags &= ~PINMUX_FLAG_DBIT;
+				gpiop->flags |= (n << PINMUX_FLAG_DBIT_SHIFT);
+				return 0;
+			}
+		}
+		k++;
+	}
+
+	BUG();
+
+	return -1;
+}
+
+static void setup_data_regs(struct pinmux_info *gpioc)
+{
+	struct pinmux_data_reg *drp;
+	int k;
+
+	for (k = gpioc->first_gpio; k <= gpioc->last_gpio; k++)
+		setup_data_reg(gpioc, k);
+
+	k = 0;
+	while (1) {
+		drp = gpioc->data_regs + k;
+
+		if (!drp->reg_width)
+			break;
+
+		drp->reg_shadow = gpio_read_raw_reg(drp->reg, drp->reg_width);
+		k++;
+	}
+}
+
+static int get_data_reg(struct pinmux_info *gpioc, unsigned gpio,
+			struct pinmux_data_reg **drp, int *bitp)
+{
+	struct pinmux_gpio *gpiop = &gpioc->gpios[gpio];
+	int k, n;
+
+	if (!enum_in_range(gpiop->enum_id, &gpioc->data))
+		return -1;
+
+	k = (gpiop->flags & PINMUX_FLAG_DREG) >> PINMUX_FLAG_DREG_SHIFT;
+	n = (gpiop->flags & PINMUX_FLAG_DBIT) >> PINMUX_FLAG_DBIT_SHIFT;
+	*drp = gpioc->data_regs + k;
+	*bitp = n;
+	return 0;
+}
+
+static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
+			  struct pinmux_cfg_reg **crp, int *indexp,
+			  unsigned long **cntp)
+{
+	struct pinmux_cfg_reg *config_reg;
+	unsigned long r_width, f_width;
+	int k, n;
+
+	k = 0;
+	while (1) {
+		config_reg = gpioc->cfg_regs + k;
+
+		r_width = config_reg->reg_width;
+		f_width = config_reg->field_width;
+
+		if (!r_width)
+			break;
+		for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) {
+			if (config_reg->enum_ids[n] == enum_id) {
+				*crp = config_reg;
+				*indexp = n;
+				*cntp = &config_reg->cnt[n / (1 << f_width)];
+				return 0;
+			}
+		}
+		k++;
+	}
+
+	return -1;
+}
+
+static int get_gpio_enum_id(struct pinmux_info *gpioc, unsigned gpio,
+			    int pos, pinmux_enum_t *enum_idp)
+{
+	pinmux_enum_t enum_id = gpioc->gpios[gpio].enum_id;
+	pinmux_enum_t *data = gpioc->gpio_data;
+	int k;
+
+	if (!enum_in_range(enum_id, &gpioc->data)) {
+		if (!enum_in_range(enum_id, &gpioc->mark)) {
+			pr_err("non data/mark enum_id for gpio %d\n", gpio);
+			return -1;
+		}
+	}
+
+	if (pos) {
+		*enum_idp = data[pos + 1];
+		return pos + 1;
+	}
+
+	for (k = 0; k < gpioc->gpio_data_size; k++) {
+		if (data[k] == enum_id) {
+			*enum_idp = data[k + 1];
+			return k + 1;
+		}
+	}
+
+	pr_err("cannot locate data/mark enum_id for gpio %d\n", gpio);
+	return -1;
+}
+
+static void write_config_reg(struct pinmux_info *gpioc,
+			     struct pinmux_cfg_reg *crp,
+			     int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	gpio_write_reg(crp->reg, crp->reg_width, crp->field_width, pos, value);
+}
+
+static int check_config_reg(struct pinmux_info *gpioc,
+			    struct pinmux_cfg_reg *crp,
+			    int index)
+{
+	unsigned long ncomb, pos, value;
+
+	ncomb = 1 << crp->field_width;
+	pos = index / ncomb;
+	value = index % ncomb;
+
+	if (gpio_read_reg(crp->reg, crp->reg_width,
+			  crp->field_width, pos) == value)
+		return 0;
+
+	return -1;
+}
+
+enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
+
+static int pinmux_config_gpio(struct pinmux_info *gpioc, unsigned gpio,
+			      int pinmux_type, int cfg_mode)
+{
+	struct pinmux_cfg_reg *cr = NULL;
+	pinmux_enum_t enum_id;
+	struct pinmux_range *range;
+	int in_range, pos, index;
+	unsigned long *cntp;
+
+	switch (pinmux_type) {
+
+	case PINMUX_TYPE_FUNCTION:
+		range = NULL;
+		break;
+
+	case PINMUX_TYPE_OUTPUT:
+		range = &gpioc->output;
+		break;
+
+	case PINMUX_TYPE_INPUT:
+		range = &gpioc->input;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLUP:
+		range = &gpioc->input_pu;
+		break;
+
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		range = &gpioc->input_pd;
+		break;
+
+	default:
+		goto out_err;
+	}
+
+	pos = 0;
+	enum_id = 0;
+	index = 0;
+	while (1) {
+		pos = get_gpio_enum_id(gpioc, gpio, pos, &enum_id);
+		if (pos <= 0)
+			goto out_err;
+
+		if (!enum_id)
+			break;
+
+		in_range = enum_in_range(enum_id, &gpioc->function);
+		if (!in_range && range) {
+			in_range = enum_in_range(enum_id, range);
+
+			if (in_range && enum_id == range->force)
+				continue;
+		}
+
+		if (!in_range)
+			continue;
+
+		if (get_config_reg(gpioc, enum_id, &cr, &index, &cntp) != 0)
+			goto out_err;
+
+		switch (cfg_mode) {
+		case GPIO_CFG_DRYRUN:
+			if (!*cntp || !check_config_reg(gpioc, cr, index))
+				continue;
+			break;
+
+		case GPIO_CFG_REQ:
+			write_config_reg(gpioc, cr, index);
+			*cntp = *cntp + 1;
+			break;
+
+		case GPIO_CFG_FREE:
+			*cntp = *cntp - 1;
+			break;
+		}
+	}
+
+	return 0;
+ out_err:
+	return -1;
+}
+
+static DEFINE_SPINLOCK(gpio_lock);
+
+static struct pinmux_info *chip_to_pinmux(struct gpio_chip *chip)
+{
+	return container_of(chip, struct pinmux_info, chip);
+}
+
+static int sh_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	struct pinmux_info *gpioc = chip_to_pinmux(chip);
+	struct pinmux_data_reg *dummy;
+	unsigned long flags;
+	int i, ret, pinmux_type;
+
+	ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if ((gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE) != PINMUX_TYPE_NONE)
+		goto err_unlock;
+
+	/* setup pin function here if no data is associated with pin */
+
+	if (get_data_reg(gpioc, offset, &dummy, &i) != 0)
+		pinmux_type = PINMUX_TYPE_FUNCTION;
+	else
+		pinmux_type = PINMUX_TYPE_GPIO;
+
+	if (pinmux_type == PINMUX_TYPE_FUNCTION) {
+		if (pinmux_config_gpio(gpioc, offset,
+				       pinmux_type,
+				       GPIO_CFG_DRYRUN) != 0)
+			goto err_unlock;
+
+		if (pinmux_config_gpio(gpioc, offset,
+				       pinmux_type,
+				       GPIO_CFG_REQ) != 0)
+			BUG();
+	}
+
+	gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
+	gpioc->gpios[offset].flags |= pinmux_type;
+
+	ret = 0;
+ err_unlock:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+ err_out:
+	return ret;
+}
+
+static void sh_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	struct pinmux_info *gpioc = chip_to_pinmux(chip);
+	unsigned long flags;
+	int pinmux_type;
+
+	if (!gpioc)
+		return;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	pinmux_type = gpioc->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_config_gpio(gpioc, offset, pinmux_type, GPIO_CFG_FREE);
+	gpioc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
+	gpioc->gpios[offset].flags |= PINMUX_TYPE_NONE;
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+
+static int pinmux_direction(struct pinmux_info *gpioc,
+			    unsigned gpio, int new_pinmux_type)
+{
+	int pinmux_type;
+	int ret = -EINVAL;
+
+	if (!gpioc)
+		goto err_out;
+
+	pinmux_type = gpioc->gpios[gpio].flags & PINMUX_FLAG_TYPE;
+
+	switch (pinmux_type) {
+	case PINMUX_TYPE_GPIO:
+		break;
+	case PINMUX_TYPE_OUTPUT:
+	case PINMUX_TYPE_INPUT:
+	case PINMUX_TYPE_INPUT_PULLUP:
+	case PINMUX_TYPE_INPUT_PULLDOWN:
+		pinmux_config_gpio(gpioc, gpio, pinmux_type, GPIO_CFG_FREE);
+		break;
+	default:
+		goto err_out;
+	}
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_DRYRUN) != 0)
+		goto err_out;
+
+	if (pinmux_config_gpio(gpioc, gpio,
+			       new_pinmux_type,
+			       GPIO_CFG_REQ) != 0)
+		BUG();
+
+	gpioc->gpios[gpio].flags &= ~PINMUX_FLAG_TYPE;
+	gpioc->gpios[gpio].flags |= new_pinmux_type;
+
+	ret = 0;
+ err_out:
+	return ret;
+}
+
+static int sh_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct pinmux_info *gpioc = chip_to_pinmux(chip);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_INPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return ret;
+}
+
+static void sh_gpio_set_value(struct pinmux_info *gpioc,
+			     unsigned gpio, int value)
+{
+	struct pinmux_data_reg *dr = NULL;
+	int bit = 0;
+
+	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0)
+		BUG();
+	else
+		gpio_write_bit(dr, bit, value);
+}
+
+static int sh_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
+				    int value)
+{
+	struct pinmux_info *gpioc = chip_to_pinmux(chip);
+	unsigned long flags;
+	int ret;
+
+	sh_gpio_set_value(gpioc, offset, value);
+	spin_lock_irqsave(&gpio_lock, flags);
+	ret = pinmux_direction(gpioc, offset, PINMUX_TYPE_OUTPUT);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return ret;
+}
+
+static int sh_gpio_get_value(struct pinmux_info *gpioc, unsigned gpio)
+{
+	struct pinmux_data_reg *dr = NULL;
+	int bit = 0;
+
+	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0) {
+		BUG();
+		return 0;
+	}
+
+	return gpio_read_reg(dr->reg, dr->reg_width, 1, bit);
+}
+
+static int sh_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	return sh_gpio_get_value(chip_to_pinmux(chip), offset);
+}
+
+static void sh_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	sh_gpio_set_value(chip_to_pinmux(chip), offset, value);
+}
+
+int register_pinmux(struct pinmux_info *pip)
+{
+	struct gpio_chip *chip = &pip->chip;
+
+	pr_info("sh pinmux: %s handling gpio %d -> %d\n",
+		pip->name, pip->first_gpio, pip->last_gpio);
+
+	setup_data_regs(pip);
+
+	chip->request = sh_gpio_request;
+	chip->free = sh_gpio_free;
+	chip->direction_input = sh_gpio_direction_input;
+	chip->get = sh_gpio_get;
+	chip->direction_output = sh_gpio_direction_output;
+	chip->set = sh_gpio_set;
+
+	WARN_ON(pip->first_gpio != 0); /* needs testing */
+
+	chip->label = pip->name;
+	chip->owner = THIS_MODULE;
+	chip->base = pip->first_gpio;
+	chip->ngpio = (pip->last_gpio - pip->first_gpio) + 1;
+
+	return gpiochip_add(chip);
+}
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
new file mode 100644
index 000000000000..07c08af9f8f6
--- /dev/null
+++ b/include/linux/sh_pfc.h
@@ -0,0 +1,96 @@
+/*
+ * SuperH Pin Function Controller Support
+ *
+ * Copyright (c) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __SH_PFC_H
+#define __SH_PFC_H
+
+#include <asm-generic/gpio.h>
+
+typedef unsigned short pinmux_enum_t;
+typedef unsigned short pinmux_flag_t;
+
+#define PINMUX_TYPE_NONE            0
+#define PINMUX_TYPE_FUNCTION        1
+#define PINMUX_TYPE_GPIO            2
+#define PINMUX_TYPE_OUTPUT          3
+#define PINMUX_TYPE_INPUT           4
+#define PINMUX_TYPE_INPUT_PULLUP    5
+#define PINMUX_TYPE_INPUT_PULLDOWN  6
+
+#define PINMUX_FLAG_TYPE            (0x7)
+#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
+#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
+
+#define PINMUX_FLAG_DBIT_SHIFT      5
+#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
+#define PINMUX_FLAG_DREG_SHIFT      10
+#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
+
+struct pinmux_gpio {
+	pinmux_enum_t enum_id;
+	pinmux_flag_t flags;
+};
+
+#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
+#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
+
+struct pinmux_cfg_reg {
+	unsigned long reg, reg_width, field_width;
+	unsigned long *cnt;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_CFG_REG(name, r, r_width, f_width) \
+	.reg = r, .reg_width = r_width, .field_width = f_width,		\
+	.cnt = (unsigned long [r_width / f_width]) {}, \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+
+struct pinmux_data_reg {
+	unsigned long reg, reg_width, reg_shadow;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_DATA_REG(name, r, r_width) \
+	.reg = r, .reg_width = r_width,	\
+	.enum_ids = (pinmux_enum_t [r_width]) \
+
+struct pinmux_range {
+	pinmux_enum_t begin;
+	pinmux_enum_t end;
+	pinmux_enum_t force;
+};
+
+struct pinmux_info {
+	char *name;
+	pinmux_enum_t reserved_id;
+	struct pinmux_range data;
+	struct pinmux_range input;
+	struct pinmux_range input_pd;
+	struct pinmux_range input_pu;
+	struct pinmux_range output;
+	struct pinmux_range mark;
+	struct pinmux_range function;
+
+	unsigned first_gpio, last_gpio;
+
+	struct pinmux_gpio *gpios;
+	struct pinmux_cfg_reg *cfg_regs;
+	struct pinmux_data_reg *data_regs;
+
+	pinmux_enum_t *gpio_data;
+	unsigned int gpio_data_size;
+
+	unsigned long *gpio_in_use;
+	struct gpio_chip chip;
+};
+
+int register_pinmux(struct pinmux_info *pip);
+
+#endif /* __SH_PFC_H */
-- 
cgit v1.2.3


From 3cf602532c535ec655725e9833378e04c9fd7783 Mon Sep 17 00:00:00 2001
From: Amul Kumar Saha <amul.saha@samsung.com>
Date: Wed, 21 Oct 2009 17:00:05 +0530
Subject: mtd: OneNAND OTP support rework

What is OTP in OneNAND?
The device includes,
1. one block-sized OTP (One Time Programmable) area and
2. user-controlled 1st block OTP(Block 0)
that can be used to increase system security or to provide
identification capabilities.

What is done?
In OneNAND, one block of the NAND Array is set aside as an OTP
memory area, and 1st Block (Block 0) can be used as OTP area.
This area, available to the user, can be configured and locked
with secured user information. The OTP block can be read,
programmed and locked using the same operations as any other NAND
Flash Array memory block. After issuing an OTP-Lock, OTP block
cannot be erased. OTP block is fully-guaranteed to be a good
block.

Why it is done?
Locking the 1st Block OTP has the effect of a 'Write-protect' to
guard against accidental re-programming of data stored in the 1st
block and OTP Block.

Which problem it solves?
OTP support is provided in the existing implementation of
OneNAND/Flex-OneNAND driver, but it is not working with OneNAND
devices. Have observed the following in current OTP OneNAND Implmentation,
1. DataSheet specific sequence to lock the OTP Area is not followed.
2. Certain functions are quiet generic to cope with OTP specific activity.
This patch re-implements OTP support for OneNAND device.

How it is done?
For all blocks, 8th word is available to the user.
However, in case of OTP Block, 8th word of sector 0, page 0 is reserved as
OTP Locking Bit area. Therefore, in case of OTP Block, user usage on this
area is prohibited. Condition specific values are entered in the 8th word,
sector0, page 0 of the OTP block during the process of issuing an OTP-Lock.
The possible conditions are:
1. Only 1st Block Lock
2. Only OTP Block Lock
3. Lock both the 1st Block and the OTP Block

What Other feature additions have been done in this patch?
This patch adds feature for:
1. Only 1st Block Lock
2. Lock both the 1st Block and the OTP Blocks

Re-implemented OTP support for OneNAND
Added following features to OneNAND
	1. Lock only 1st Block in OneNAND
	2. Lock BOTH 1st Block and OTP Block in OneNAND

[comments were slightly tweaked by Artem]

Signed-off-by: Amul Kumar Saha <amul.saha@samsung.com>
Reviewed-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 292 +++++++++++++++++++++++++++++++++----
 include/linux/mtd/onenand.h        |   4 +-
 2 files changed, 264 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 6e250f3a4a16..7bd6ad3ff30a 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1,17 +1,19 @@
 /*
  *  linux/drivers/mtd/onenand/onenand_base.c
  *
- *  Copyright (C) 2005-2007 Samsung Electronics
+ *  Copyright © 2005-2009 Samsung Electronics
+ *  Copyright © 2007 Nokia Corporation
+ *
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *
  *  Credits:
  *	Adrian Hunter <ext-adrian.hunter@nokia.com>:
  *	auto-placement support, read-while load support, various fixes
- *	Copyright (C) Nokia Corporation, 2007
  *
  *	Vishak G <vishak.g at samsung.com>, Rohit Hagargundgi <h.rohit at samsung.com>
  *	Flex-OneNAND support
- *	Copyright (C) Samsung Electronics, 2008
+ *	Amul Kumar Saha <amul.saha at samsung.com>
+ *	OTP support
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -43,6 +45,18 @@ MODULE_PARM_DESC(flex_bdry,	"SLC Boundary information for Flex-OneNAND"
 				"    : 0->Set boundary in unlocked status"
 				"    : 1->Set boundary in locked status");
 
+/* Default OneNAND/Flex-OneNAND OTP options*/
+static int otp;
+
+module_param(otp, int, 0400);
+MODULE_PARM_DESC(otp,	"Corresponding behaviour of OneNAND in OTP"
+			"Syntax : otp=LOCK_TYPE"
+			"LOCK_TYPE : Keys issued, for specific OTP Lock type"
+			"	   : 0 -> Default (No Blocks Locked)"
+			"	   : 1 -> OTP Block lock"
+			"	   : 2 -> 1st Block lock"
+			"	   : 3 -> BOTH OTP Block and 1st Block lock");
+
 /**
  *  onenand_oob_128 - oob info for Flex-Onenand with 4KB page
  *  For now, we expose only 64 out of 80 ecc bytes
@@ -2591,6 +2605,208 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 
 #ifdef CONFIG_MTD_ONENAND_OTP
 
+/**
+ * onenand_otp_command - Send OTP specific command to OneNAND device
+ * @param mtd	 MTD device structure
+ * @param cmd	 the command to be sent
+ * @param addr	 offset to read from or write to
+ * @param len	 number of bytes to read or write
+ */
+static int onenand_otp_command(struct mtd_info *mtd, int cmd, loff_t addr,
+				size_t len)
+{
+	struct onenand_chip *this = mtd->priv;
+	int value, block, page;
+
+	/* Address translation */
+	switch (cmd) {
+	case ONENAND_CMD_OTP_ACCESS:
+		block = (int) (addr >> this->erase_shift);
+		page = -1;
+		break;
+
+	default:
+		block = (int) (addr >> this->erase_shift);
+		page = (int) (addr >> this->page_shift);
+
+		if (ONENAND_IS_2PLANE(this)) {
+			/* Make the even block number */
+			block &= ~1;
+			/* Is it the odd plane? */
+			if (addr & this->writesize)
+				block++;
+			page >>= 1;
+		}
+		page &= this->page_mask;
+		break;
+	}
+
+	if (block != -1) {
+		/* Write 'DFS, FBA' of Flash */
+		value = onenand_block_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS1);
+	}
+
+	if (page != -1) {
+		/* Now we use page size operation */
+		int sectors = 4, count = 4;
+		int dataram;
+
+		switch (cmd) {
+		default:
+			if (ONENAND_IS_2PLANE(this) && cmd == ONENAND_CMD_PROG)
+				cmd = ONENAND_CMD_2X_PROG;
+			dataram = ONENAND_CURRENT_BUFFERRAM(this);
+			break;
+		}
+
+		/* Write 'FPA, FSA' of Flash */
+		value = onenand_page_address(page, sectors);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS8);
+
+		/* Write 'BSA, BSC' of DataRAM */
+		value = onenand_buffer_address(dataram, sectors, count);
+		this->write_word(value, this->base + ONENAND_REG_START_BUFFER);
+	}
+
+	/* Interrupt clear */
+	this->write_word(ONENAND_INT_CLEAR, this->base + ONENAND_REG_INTERRUPT);
+
+	/* Write command */
+	this->write_word(cmd, this->base + ONENAND_REG_COMMAND);
+
+	return 0;
+}
+
+/**
+ * onenand_otp_write_oob_nolock - [Internal] OneNAND write out-of-band, specific to OTP
+ * @param mtd		MTD device structure
+ * @param to		offset to write to
+ * @param len		number of bytes to write
+ * @param retlen	pointer to variable to store the number of written bytes
+ * @param buf		the data to write
+ *
+ * OneNAND write out-of-band only for OTP
+ */
+static int onenand_otp_write_oob_nolock(struct mtd_info *mtd, loff_t to,
+				    struct mtd_oob_ops *ops)
+{
+	struct onenand_chip *this = mtd->priv;
+	int column, ret = 0, oobsize;
+	int written = 0;
+	u_char *oobbuf;
+	size_t len = ops->ooblen;
+	const u_char *buf = ops->oobbuf;
+	int block, value, status;
+
+	to += ops->ooboffs;
+
+	/* Initialize retlen, in case of early exit */
+	ops->oobretlen = 0;
+
+	oobsize = mtd->oobsize;
+
+	column = to & (mtd->oobsize - 1);
+
+	oobbuf = this->oob_buf;
+
+	/* Loop until all data write */
+	while (written < len) {
+		int thislen = min_t(int, oobsize, len - written);
+
+		cond_resched();
+
+		block = (int) (to >> this->erase_shift);
+		/*
+		 * Write 'DFS, FBA' of Flash
+		 * Add: F100h DQ=DFS, FBA
+		 */
+
+		value = onenand_block_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS1);
+
+		/*
+		 * Select DataRAM for DDP
+		 * Add: F101h DQ=DBS
+		 */
+
+		value = onenand_bufferram_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS2);
+		ONENAND_SET_NEXT_BUFFERRAM(this);
+
+		/*
+		 * Enter OTP access mode
+		 */
+		this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
+		this->wait(mtd, FL_OTPING);
+
+		/* We send data to spare ram with oobsize
+		 * to prevent byte access */
+		memcpy(oobbuf + column, buf, thislen);
+
+		/*
+		 * Write Data into DataRAM
+		 * Add: 8th Word
+		 * in sector0/spare/page0
+		 * DQ=XXFCh
+		 */
+		this->write_bufferram(mtd, ONENAND_SPARERAM,
+					oobbuf, 0, mtd->oobsize);
+
+		onenand_otp_command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize);
+		onenand_update_bufferram(mtd, to, 0);
+		if (ONENAND_IS_2PLANE(this)) {
+			ONENAND_SET_BUFFERRAM1(this);
+			onenand_update_bufferram(mtd, to + this->writesize, 0);
+		}
+
+		ret = this->wait(mtd, FL_WRITING);
+		if (ret) {
+			printk(KERN_ERR "%s: write failed %d\n", __func__, ret);
+			break;
+		}
+
+		/* Exit OTP access mode */
+		this->command(mtd, ONENAND_CMD_RESET, 0, 0);
+		this->wait(mtd, FL_RESETING);
+
+		status = this->read_word(this->base + ONENAND_REG_CTRL_STATUS);
+		status &= 0x60;
+
+		if (status == 0x60) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tLOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tLOCKED\n");
+		} else if (status == 0x20) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tLOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tUN-LOCKED\n");
+		} else if (status == 0x40) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tUN-LOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tLOCKED\n");
+		} else {
+			printk(KERN_DEBUG "Reboot to check\n");
+		}
+
+		written += thislen;
+		if (written == len)
+			break;
+
+		to += mtd->writesize;
+		buf += thislen;
+		column = 0;
+	}
+
+	ops->oobretlen = written;
+
+	return ret;
+}
+
 /* Internal OTP operation */
 typedef int (*otp_op_t)(struct mtd_info *mtd, loff_t form, size_t len,
 		size_t *retlen, u_char *buf);
@@ -2693,11 +2909,11 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
 	struct mtd_oob_ops ops;
 	int ret;
 
-	/* Enter OTP access mode */
-	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
-	this->wait(mtd, FL_OTPING);
-
 	if (FLEXONENAND(this)) {
+
+		/* Enter OTP access mode */
+		this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
+		this->wait(mtd, FL_OTPING);
 		/*
 		 * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of
 		 * main area of page 49.
@@ -2708,19 +2924,19 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
 		ops.oobbuf = NULL;
 		ret = onenand_write_ops_nolock(mtd, mtd->writesize * 49, &ops);
 		*retlen = ops.retlen;
+
+		/* Exit OTP access mode */
+		this->command(mtd, ONENAND_CMD_RESET, 0, 0);
+		this->wait(mtd, FL_RESETING);
 	} else {
 		ops.mode = MTD_OOB_PLACE;
 		ops.ooblen = len;
 		ops.oobbuf = buf;
 		ops.ooboffs = 0;
-		ret = onenand_write_oob_nolock(mtd, from, &ops);
+		ret = onenand_otp_write_oob_nolock(mtd, from, &ops);
 		*retlen = ops.oobretlen;
 	}
 
-	/* Exit OTP access mode */
-	this->command(mtd, ONENAND_CMD_RESET, 0, 0);
-	this->wait(mtd, FL_RESETING);
-
 	return ret;
 }
 
@@ -2751,16 +2967,21 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 	if (density < ONENAND_DEVICE_DENSITY_512Mb)
 		otp_pages = 20;
 	else
-		otp_pages = 10;
+		otp_pages = 50;
 
 	if (mode == MTD_OTP_FACTORY) {
 		from += mtd->writesize * otp_pages;
-		otp_pages = 64 - otp_pages;
+		otp_pages = ONENAND_PAGES_PER_BLOCK - otp_pages;
 	}
 
 	/* Check User/Factory boundary */
-	if (((mtd->writesize * otp_pages) - (from + len)) < 0)
-		return 0;
+	if (mode == MTD_OTP_USER) {
+		if (((mtd->writesize * otp_pages) - (from + len)) < 0)
+			return 0;
+	} else {
+		if (((mtd->writesize * otp_pages) - len) < 0)
+			return 0;
+	}
 
 	onenand_get_device(mtd, FL_OTPING);
 	while (len > 0 && otp_pages > 0) {
@@ -2783,13 +3004,12 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 			*retlen += sizeof(struct otp_info);
 		} else {
 			size_t tmp_retlen;
-			int size = len;
 
 			ret = action(mtd, from, len, &tmp_retlen, buf);
 
-			buf += size;
-			len -= size;
-			*retlen += size;
+			buf += tmp_retlen;
+			len -= tmp_retlen;
+			*retlen += tmp_retlen;
 
 			if (ret)
 				break;
@@ -2902,20 +3122,10 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	u_char *buf = FLEXONENAND(this) ? this->page_buf : this->oob_buf;
 	size_t retlen;
 	int ret;
+	unsigned int otp_lock_offset = ONENAND_OTP_LOCK_OFFSET;
 
 	memset(buf, 0xff, FLEXONENAND(this) ? this->writesize
 						 : mtd->oobsize);
-	/*
-	 * Note: OTP lock operation
-	 *       OTP block : 0xXXFC
-	 *       1st block : 0xXXF3 (If chip support)
-	 *       Both      : 0xXXF0 (If chip support)
-	 */
-	if (FLEXONENAND(this))
-		buf[FLEXONENAND_OTP_LOCK_OFFSET] = 0xFC;
-	else
-		buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC;
-
 	/*
 	 * Write lock mark to 8th word of sector0 of page0 of the spare0.
 	 * We write 16 bytes spare area instead of 2 bytes.
@@ -2926,10 +3136,30 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	from = 0;
 	len = FLEXONENAND(this) ? mtd->writesize : 16;
 
+	/*
+	 * Note: OTP lock operation
+	 *       OTP block : 0xXXFC			XX 1111 1100
+	 *       1st block : 0xXXF3 (If chip support)	XX 1111 0011
+	 *       Both      : 0xXXF0 (If chip support)	XX 1111 0000
+	 */
+	if (FLEXONENAND(this))
+		otp_lock_offset = FLEXONENAND_OTP_LOCK_OFFSET;
+
+	/* ONENAND_OTP_AREA | ONENAND_OTP_BLOCK0 | ONENAND_OTP_AREA_BLOCK0 */
+	if (otp == 1)
+		buf[otp_lock_offset] = 0xFC;
+	else if (otp == 2)
+		buf[otp_lock_offset] = 0xF3;
+	else if (otp == 3)
+		buf[otp_lock_offset] = 0xF0;
+	else if (otp != 0)
+		printk(KERN_DEBUG "[OneNAND] Invalid option selected for OTP\n");
+
 	ret = onenand_otp_walk(mtd, from, len, &retlen, buf, do_otp_lock, MTD_OTP_USER);
 
 	return ret ? : retlen;
 }
+
 #endif	/* CONFIG_MTD_ONENAND_OTP */
 
 /**
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index f57e29e17bb0..5509eb06b326 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -1,7 +1,7 @@
 /*
  *  linux/include/linux/mtd/onenand.h
  *
- *  Copyright (C) 2005-2007 Samsung Electronics
+ *  Copyright © 2005-2009 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -137,6 +137,8 @@ struct onenand_chip {
 /*
  * Helper macros
  */
+#define ONENAND_PAGES_PER_BLOCK        (1<<6)
+
 #define ONENAND_CURRENT_BUFFERRAM(this)		(this->bufferram_index)
 #define ONENAND_NEXT_BUFFERRAM(this)		(this->bufferram_index ^ 1)
 #define ONENAND_SET_NEXT_BUFFERRAM(this)	(this->bufferram_index ^= 1)
-- 
cgit v1.2.3


From 1c63aca32903efc219fb9df72bae5344f3e54ed5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 22 Oct 2009 16:53:32 +0900
Subject: mtd: Add __nand_calculate_ecc() to NAND ECC functions

Add __nand_calculate_ecc() which does not take struct mtd_info.
The built-in 256/512 software ECC calculation and correction tester
will use it.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c  | 25 ++++++++++++++++++++-----
 include/linux/mtd/nand_ecc.h | 10 ++++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index db7ae9d6a296..809fb53304ae 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -150,20 +150,19 @@ static const char addressbits[256] = {
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
  *			 block
- * @mtd:	MTD block structure
  * @buf:	input buffer with raw data
+ * @eccsize:	data bytes per ecc step (256 or 512)
  * @code:	output buffer with ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
 		       unsigned char *code)
 {
 	int i;
 	const uint32_t *bp = (uint32_t *)buf;
 	/* 256 or 512 bytes/ecc  */
-	const uint32_t eccsize_mult =
-			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	const uint32_t eccsize_mult = eccsize >> 8;
 	uint32_t cur;		/* current value in buffer */
 	/* rp0..rp15..rp17 are the various accumulated parities (per byte) */
 	uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
@@ -412,6 +411,22 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 		    (invparity[par & 0x55] << 2) |
 		    (invparity[rp17] << 1) |
 		    (invparity[rp16] << 0);
+}
+EXPORT_SYMBOL(__nand_calculate_ecc);
+
+/**
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ *			 block
+ * @mtd:	MTD block structure
+ * @buf:	input buffer with raw data
+ * @code:	output buffer with ECC
+ */
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+		       unsigned char *code)
+{
+	__nand_calculate_ecc(buf,
+			((struct nand_chip *)mtd->priv)->ecc.size, code);
+
 	return 0;
 }
 EXPORT_SYMBOL(nand_calculate_ecc);
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 052ea8ca2434..41bc013571d0 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -16,7 +16,13 @@
 struct mtd_info;
 
 /*
- * Calculate 3 byte ECC code for 256 byte block
+ * Calculate 3 byte ECC code for eccsize byte block
+ */
+void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize,
+				u_char *ecc_code);
+
+/*
+ * Calculate 3 byte ECC code for 256/512 byte block
  */
 int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
 
@@ -27,7 +33,7 @@ int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
 			unsigned int eccsize);
 
 /*
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
  */
 int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc);
 
-- 
cgit v1.2.3


From 72073027ee95d059eb5a064da4a978efab36d4ab Mon Sep 17 00:00:00 2001
From: Mika Korhonen <ext-mika.2.korhonen@nokia.com>
Date: Fri, 23 Oct 2009 07:50:43 +0200
Subject: mtd: OneNAND: multiblock erase support

Add support for multiblock erase command. OneNANDs (excluding Flex-OneNAND)
are capable of simultaneous erase of up to 64 eraseblocks which is much faster.

This changes the erase requests for regions covering multiple eraseblocks
to be performed using multiblock erase.

Signed-off-by: Mika Korhonen <ext-mika.2.korhonen@nokia.com>
Reviewed-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/omap2.c        |  22 ++++-
 drivers/mtd/onenand/onenand_base.c | 173 ++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/flashchip.h      |   4 +-
 include/linux/mtd/onenand_regs.h   |   2 +
 4 files changed, 194 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 0108ed42e877..2dafd0949be5 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -112,10 +112,24 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 	unsigned long timeout;
 	u32 syscfg;
 
-	if (state == FL_RESETING) {
-		int i;
+	if (state == FL_RESETING || state == FL_PREPARING_ERASE ||
+	    state == FL_VERIFYING_ERASE) {
+		int i = 21;
+		unsigned int intr_flags = ONENAND_INT_MASTER;
+
+		switch (state) {
+		case FL_RESETING:
+			intr_flags |= ONENAND_INT_RESET;
+			break;
+		case FL_PREPARING_ERASE:
+			intr_flags |= ONENAND_INT_ERASE;
+			break;
+		case FL_VERIFYING_ERASE:
+			i = 101;
+			break;
+		}
 
-		for (i = 0; i < 20; i++) {
+		while (--i) {
 			udelay(1);
 			intr = read_reg(c, ONENAND_REG_INTERRUPT);
 			if (intr & ONENAND_INT_MASTER)
@@ -126,7 +140,7 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 			wait_err("controller error", state, ctrl, intr);
 			return -EIO;
 		}
-		if (!(intr & ONENAND_INT_RESET)) {
+		if ((intr & intr_flags) != intr_flags) {
 			wait_err("timeout", state, ctrl, intr);
 			return -EIO;
 		}
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 894ebadc3e69..266f471901e5 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -34,6 +34,13 @@
 
 #include <asm/io.h>
 
+/*
+ * Multiblock erase if number of blocks to erase is 2 or more.
+ * Maximum number of blocks for simultaneous erase is 64.
+ */
+#define MB_ERASE_MIN_BLK_COUNT 2
+#define MB_ERASE_MAX_BLK_COUNT 64
+
 /* Default Flex-OneNAND boundary and lock respectively */
 static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 };
 
@@ -353,6 +360,8 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		break;
 
 	case ONENAND_CMD_ERASE:
+	case ONENAND_CMD_MULTIBLOCK_ERASE:
+	case ONENAND_CMD_ERASE_VERIFY:
 	case ONENAND_CMD_BUFFERRAM:
 	case ONENAND_CMD_OTP_ACCESS:
 		block = onenand_block(this, addr);
@@ -497,7 +506,7 @@ static int onenand_wait(struct mtd_info *mtd, int state)
 		if (interrupt & flags)
 			break;
 
-		if (state != FL_READING)
+		if (state != FL_READING && state != FL_PREPARING_ERASE)
 			cond_resched();
 	}
 	/* To get correct interrupt status in timeout case */
@@ -530,6 +539,18 @@ static int onenand_wait(struct mtd_info *mtd, int state)
 		return -EIO;
 	}
 
+	if (state == FL_PREPARING_ERASE && !(interrupt & ONENAND_INT_ERASE)) {
+		printk(KERN_ERR "%s: mb erase timeout! ctrl=0x%04x intr=0x%04x\n",
+		       __func__, ctrl, interrupt);
+		return -EIO;
+	}
+
+	if (!(interrupt & ONENAND_INT_MASTER)) {
+		printk(KERN_ERR "%s: timeout! ctrl=0x%04x intr=0x%04x\n",
+		       __func__, ctrl, interrupt);
+		return -EIO;
+	}
+
 	/* If there's controller error, it's a real error */
 	if (ctrl & ONENAND_CTRL_ERROR) {
 		printk(KERN_ERR "%s: controller error = 0x%04x\n",
@@ -2182,6 +2203,148 @@ static int onenand_block_isbad_nolock(struct mtd_info *mtd, loff_t ofs, int allo
 	return bbm->isbad_bbt(mtd, ofs, allowbbt);
 }
 
+
+static int onenand_multiblock_erase_verify(struct mtd_info *mtd,
+					   struct erase_info *instr)
+{
+	struct onenand_chip *this = mtd->priv;
+	loff_t addr = instr->addr;
+	int len = instr->len;
+	unsigned int block_size = (1 << this->erase_shift);
+	int ret = 0;
+
+	while (len) {
+		this->command(mtd, ONENAND_CMD_ERASE_VERIFY, addr, block_size);
+		ret = this->wait(mtd, FL_VERIFYING_ERASE);
+		if (ret) {
+			printk(KERN_ERR "%s: Failed verify, block %d\n",
+			       __func__, onenand_block(this, addr));
+			instr->state = MTD_ERASE_FAILED;
+			instr->fail_addr = addr;
+			return -1;
+		}
+		len -= block_size;
+		addr += block_size;
+	}
+	return 0;
+}
+
+/**
+ * onenand_multiblock_erase - [Internal] erase block(s) using multiblock erase
+ * @param mtd		MTD device structure
+ * @param instr		erase instruction
+ * @param region	erase region
+ *
+ * Erase one or more blocks up to 64 block at a time
+ */
+static int onenand_multiblock_erase(struct mtd_info *mtd,
+				    struct erase_info *instr,
+				    unsigned int block_size)
+{
+	struct onenand_chip *this = mtd->priv;
+	loff_t addr = instr->addr;
+	int len = instr->len;
+	int eb_count = 0;
+	int ret = 0;
+	int bdry_block = 0;
+
+	instr->state = MTD_ERASING;
+
+	if (ONENAND_IS_DDP(this)) {
+		loff_t bdry_addr = this->chipsize >> 1;
+		if (addr < bdry_addr && (addr + len) > bdry_addr)
+			bdry_block = bdry_addr >> this->erase_shift;
+	}
+
+	/* Pre-check bbs */
+	while (len) {
+		/* Check if we have a bad block, we do not erase bad blocks */
+		if (onenand_block_isbad_nolock(mtd, addr, 0)) {
+			printk(KERN_WARNING "%s: attempt to erase a bad block "
+			       "at addr 0x%012llx\n",
+			       __func__, (unsigned long long) addr);
+			instr->state = MTD_ERASE_FAILED;
+			return -EIO;
+		}
+		len -= block_size;
+		addr += block_size;
+	}
+
+	len = instr->len;
+	addr = instr->addr;
+
+	/* loop over 64 eb batches */
+	while (len) {
+		struct erase_info verify_instr = *instr;
+		int max_eb_count = MB_ERASE_MAX_BLK_COUNT;
+
+		verify_instr.addr = addr;
+		verify_instr.len = 0;
+
+		/* do not cross chip boundary */
+		if (bdry_block) {
+			int this_block = (addr >> this->erase_shift);
+
+			if (this_block < bdry_block) {
+				max_eb_count = min(max_eb_count,
+						   (bdry_block - this_block));
+			}
+		}
+
+		eb_count = 0;
+
+		while (len > block_size && eb_count < (max_eb_count - 1)) {
+			this->command(mtd, ONENAND_CMD_MULTIBLOCK_ERASE,
+				      addr, block_size);
+			onenand_invalidate_bufferram(mtd, addr, block_size);
+
+			ret = this->wait(mtd, FL_PREPARING_ERASE);
+			if (ret) {
+				printk(KERN_ERR "%s: Failed multiblock erase, "
+				       "block %d\n", __func__,
+				       onenand_block(this, addr));
+				instr->state = MTD_ERASE_FAILED;
+				instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+				return -EIO;
+			}
+
+			len -= block_size;
+			addr += block_size;
+			eb_count++;
+		}
+
+		/* last block of 64-eb series */
+		cond_resched();
+		this->command(mtd, ONENAND_CMD_ERASE, addr, block_size);
+		onenand_invalidate_bufferram(mtd, addr, block_size);
+
+		ret = this->wait(mtd, FL_ERASING);
+		/* Check if it is write protected */
+		if (ret) {
+			printk(KERN_ERR "%s: Failed erase, block %d\n",
+			       __func__, onenand_block(this, addr));
+			instr->state = MTD_ERASE_FAILED;
+			instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+			return -EIO;
+		}
+
+		len -= block_size;
+		addr += block_size;
+		eb_count++;
+
+		/* verify */
+		verify_instr.len = eb_count * block_size;
+		if (onenand_multiblock_erase_verify(mtd, &verify_instr)) {
+			instr->state = verify_instr.state;
+			instr->fail_addr = verify_instr.fail_addr;
+			return -EIO;
+		}
+
+	}
+	return 0;
+}
+
+
 /**
  * onenand_block_by_block_erase - [Internal] erase block(s) using regular erase
  * @param mtd		MTD device structure
@@ -2315,7 +2478,13 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
 
-	ret = onenand_block_by_block_erase(mtd, instr, region, block_size);
+	if (region || instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) {
+		/* region is set for Flex-OneNAND (no mb erase) */
+		ret = onenand_block_by_block_erase(mtd, instr,
+						   region, block_size);
+	} else {
+		ret = onenand_multiblock_erase(mtd, instr, block_size);
+	}
 
 	/* Deselect and wake up anyone waiting on the device */
 	onenand_release_device(mtd);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index f350a4879f75..d0bf422ae374 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -41,9 +41,11 @@ typedef enum {
 	/* These 2 come from nand_state_t, which has been unified here */
 	FL_READING,
 	FL_CACHEDPRG,
-	/* These 2 come from onenand_state_t, which has been unified here */
+	/* These 4 come from onenand_state_t, which has been unified here */
 	FL_RESETING,
 	FL_OTPING,
+	FL_PREPARING_ERASE,
+	FL_VERIFYING_ERASE,
 
 	FL_UNKNOWN
 } flstate_t;
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index acadbf53a69f..cd6f3b431195 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -131,6 +131,8 @@
 #define ONENAND_CMD_LOCK_TIGHT		(0x2C)
 #define ONENAND_CMD_UNLOCK_ALL		(0x27)
 #define ONENAND_CMD_ERASE		(0x94)
+#define ONENAND_CMD_MULTIBLOCK_ERASE	(0x95)
+#define ONENAND_CMD_ERASE_VERIFY	(0x71)
 #define ONENAND_CMD_RESET		(0xF0)
 #define ONENAND_CMD_OTP_ACCESS		(0x65)
 #define ONENAND_CMD_READID		(0x90)
-- 
cgit v1.2.3


From b1c6e6db5bb7acad82e1c64914c6a9404dae3ee1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 2 Nov 2009 18:12:33 +0000
Subject: mtd: nand: add option to quieten off the no device found messgae

Add NAND_SCAN_SILENT_NODEV to chip->options to the user-worrying messages
'No NAND device found!!!'. This message often worries users (was three
exclamation marks really necessary?) and especially in systems such as the
Simtec Osiris where there may be optional NAND devices which are not
known until probe time.

Revised version of the original NAND_PROBE_SPECULATIVE patch after comments
by Artem Bityutskiy about adding a whole new call.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 3 ++-
 include/linux/mtd/nand.h     | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ba06473326d1..724cb2c9ad3f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2756,7 +2756,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
 	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id);
 
 	if (IS_ERR(type)) {
-		printk(KERN_WARNING "No NAND device found!!!\n");
+		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
+			printk(KERN_WARNING "No NAND device found.\n");
 		chip->select_chip(mtd, -1);
 		return PTR_ERR(type);
 	}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2476078a032f..ccab9dfc5217 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -170,7 +170,6 @@ typedef enum {
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
 
-
 /* Options valid for Samsung large page devices */
 #define NAND_SAMSUNG_LP_OPTIONS \
 	(NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK)
@@ -196,6 +195,9 @@ typedef enum {
 /* This option is defined if the board driver allocates its own buffers
    (e.g. because it needs them DMA-coherent */
 #define NAND_OWN_BUFFERS	0x00040000
+/* Chip may not exist, so silence any errors in scan */
+#define NAND_SCAN_SILENT_NODEV	0x00080000
+
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
 #define NAND_CONTROLLER_ALLOC	0x80000000
-- 
cgit v1.2.3


From b2ef1a2bb2eb49cd7c75b22f1ea40ead0bdfdb8a Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Thu, 5 Nov 2009 15:53:43 +0100
Subject: mtd: move manufacturer to the common cfi.h header file

This patch moves the MANUFACTURER_ST and MANUFACTURER_INTEL to the
include/linux/mtd/cfi.h header file and renames them to CFI_MFR_ST and
CFI_MFR_INTEL. CFI_MFR_ST was already present there.

All references in drivers/mtd/chips/cfi_cmdset_0001.c are updated to reflect
this.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 20 ++++++++++----------
 include/linux/mtd/cfi.h             |  9 +++++----
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 94be67e880a7..5fbf29e1e64f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -43,11 +43,11 @@
 // debugging, turns off buffer write mode if set to 1
 #define FORCE_WORD_WRITE 0
 
-#define MANUFACTURER_INTEL	0x0089
+/* Intel chips */
 #define I82802AB	0x00ad
 #define I82802AC	0x00ac
 #define PF38F4476	0x881c
-#define MANUFACTURER_ST         0x0020
+/* STMicroelectronics chips */
 #define M50LPW080       0x002F
 #define M50FLW080A	0x0080
 #define M50FLW080B	0x0081
@@ -308,16 +308,16 @@ static struct cfi_fixup cfi_fixup_table[] = {
 #endif
 	{ CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL },
 	{ CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL },
-	{ MANUFACTURER_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, },
+	{ CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, },
 	{ 0, 0, NULL, NULL }
 };
 
 static struct cfi_fixup jedec_fixup_table[] = {
-	{ MANUFACTURER_INTEL, I82802AB,   fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_INTEL, I82802AC,   fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50LPW080,  fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50FLW080A, fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50FLW080B, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_INTEL, I82802AB,   fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_INTEL, I82802AC,   fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50LPW080,  fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50FLW080A, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50FLW080B, fixup_use_fwh_lock, NULL, },
 	{ 0, 0, NULL, NULL }
 };
 static struct cfi_fixup fixup_table[] = {
@@ -333,7 +333,7 @@ static struct cfi_fixup fixup_table[] = {
 static void cfi_fixup_major_minor(struct cfi_private *cfi,
 						struct cfi_pri_intelext *extp)
 {
-	if (cfi->mfr == MANUFACTURER_INTEL &&
+	if (cfi->mfr == CFI_MFR_INTEL &&
 			cfi->id == PF38F4476 && extp->MinorVersion == '3')
 		extp->MinorVersion = '1';
 }
@@ -2249,7 +2249,7 @@ static int cfi_intelext_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 
 	/* Some chips have OTP located in the _top_ partition only.
 	   For example: Intel 28F256L18T (T means top-parameter device) */
-	if (cfi->mfr == MANUFACTURER_INTEL) {
+	if (cfi->mfr == CFI_MFR_INTEL) {
 		switch (cfi->id) {
 		case 0x880b:
 		case 0x880c:
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 88d3d8fbf9f2..df89f4275232 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -518,10 +518,11 @@ struct cfi_fixup {
 #define CFI_MFR_ANY 0xffff
 #define CFI_ID_ANY  0xffff
 
-#define CFI_MFR_AMD 0x0001
-#define CFI_MFR_ATMEL 0x001F
-#define CFI_MFR_SAMSUNG 0x00EC
-#define CFI_MFR_ST  0x0020 	/* STMicroelectronics */
+#define CFI_MFR_AMD	0x0001
+#define CFI_MFR_INTEL	0x0089
+#define CFI_MFR_ATMEL	0x001F
+#define CFI_MFR_SAMSUNG	0x00EC
+#define CFI_MFR_ST	0x0020 /* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
 
-- 
cgit v1.2.3


From 456b565cc52fbcdaa2e19ffdf40d9dd3b726d603 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Fri, 16 Oct 2009 14:09:18 +0200
Subject: core: Add kernel message dumper to call on oopses and panics

The core functionality is implemented as per Linus suggestion from

  http://lists.infradead.org/pipermail/linux-mtd/2009-October/027620.html

(with the kmsg_dump implementation by Linus). A struct kmsg_dumper has
been added which contains a callback to dump the kernel log buffers on
crashes. The kmsg_dump function gets called from oops_exit() and panic()
and invokes this callbacks with the crash reason.

[dwmw2: Fix log_end handling]
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Reviewed-by: Anders Grafstrom <anders.grafstrom@netinsight.net>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/kmsg_dump.h |  44 +++++++++++++++++
 kernel/panic.c            |   3 ++
 kernel/printk.c           | 119 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 include/linux/kmsg_dump.h

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
new file mode 100644
index 000000000000..7f089ec5ef32
--- /dev/null
+++ b/include/linux/kmsg_dump.h
@@ -0,0 +1,44 @@
+/*
+ * linux/include/kmsg_dump.h
+ *
+ * Copyright (C) 2009 Net Insight AB
+ *
+ * Author: Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+#ifndef _LINUX_KMSG_DUMP_H
+#define _LINUX_KMSG_DUMP_H
+
+#include <linux/list.h>
+
+enum kmsg_dump_reason {
+	KMSG_DUMP_OOPS,
+	KMSG_DUMP_PANIC,
+};
+
+/**
+ * struct kmsg_dumper - kernel crash message dumper structure
+ * @dump:	The callback which gets called on crashes. The buffer is passed
+ * 		as two sections, where s1 (length l1) contains the older
+ * 		messages and s2 (length l2) contains the newer.
+ * @list:	Entry in the dumper list (private)
+ * @registered:	Flag that specifies if this is already registered
+ */
+struct kmsg_dumper {
+	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
+			const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2);
+	struct list_head list;
+	int registered;
+};
+
+void kmsg_dump(enum kmsg_dump_reason reason);
+
+int kmsg_dump_register(struct kmsg_dumper *dumper);
+
+int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+
+#endif /* _LINUX_KMSG_DUMP_H */
diff --git a/kernel/panic.c b/kernel/panic.c
index bcdef26e3332..8c43226a544d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
  */
 #include <linux/debug_locks.h>
 #include <linux/interrupt.h>
+#include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 	dump_stack();
 #endif
 
+	kmsg_dump(KMSG_DUMP_PANIC);
 	/*
 	 * If we have crashed and we have a crash kernel loaded let it handle
 	 * everything else.
@@ -338,6 +340,7 @@ void oops_exit(void)
 {
 	do_oops_enter_exit();
 	print_oops_end_marker();
+	kmsg_dump(KMSG_DUMP_OOPS);
 }
 
 #ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/printk.c b/kernel/printk.c
index f38b07f78a4e..051d1f50648f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
+#include <linux/kmsg_dump.h>
 
 #include <asm/uaccess.h>
 
@@ -1405,3 +1406,121 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
 #endif
+
+static DEFINE_SPINLOCK(dump_list_lock);
+static LIST_HEAD(dump_list);
+
+/**
+ * kmsg_dump_register - register a kernel log dumper.
+ * @dump: pointer to the kmsg_dumper structure
+ *
+ * Adds a kernel log dumper to the system. The dump callback in the
+ * structure will be called when the kernel oopses or panics and must be
+ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
+ */
+int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+	unsigned long flags;
+	int err = -EBUSY;
+
+	/* The dump callback needs to be set */
+	if (!dumper->dump)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dump_list_lock, flags);
+	/* Don't allow registering multiple times */
+	if (!dumper->registered) {
+		dumper->registered = 1;
+		list_add_tail(&dumper->list, &dump_list);
+		err = 0;
+	}
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_register);
+
+/**
+ * kmsg_dump_unregister - unregister a kmsg dumper.
+ * @dump: pointer to the kmsg_dumper structure
+ *
+ * Removes a dump device from the system. Returns zero on success and
+ * %-EINVAL otherwise.
+ */
+int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+
+	spin_lock_irqsave(&dump_list_lock, flags);
+	if (dumper->registered) {
+		dumper->registered = 0;
+		list_del(&dumper->list);
+		err = 0;
+	}
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
+
+static const char const *kmsg_reasons[] = {
+	[KMSG_DUMP_OOPS]	= "oops",
+	[KMSG_DUMP_PANIC]	= "panic",
+};
+
+static const char *kmsg_to_str(enum kmsg_dump_reason reason)
+{
+	if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
+		return "unknown";
+
+	return kmsg_reasons[reason];
+}
+
+/**
+ * kmsg_dump - dump kernel log to kernel message dumpers.
+ * @reason: the reason (oops, panic etc) for dumping
+ *
+ * Iterate through each of the dump devices and call the oops/panic
+ * callbacks with the log buffer.
+ */
+void kmsg_dump(enum kmsg_dump_reason reason)
+{
+	unsigned long end;
+	unsigned chars;
+	struct kmsg_dumper *dumper;
+	const char *s1, *s2;
+	unsigned long l1, l2;
+	unsigned long flags;
+
+	/* Theoretically, the log could move on after we do this, but
+	   there's not a lot we can do about that. The new messages
+	   will overwrite the start of what we dump. */
+	spin_lock_irqsave(&logbuf_lock, flags);
+	end = log_end & LOG_BUF_MASK;
+	chars = logged_chars;
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	if (logged_chars > end) {
+		s1 = log_buf + log_buf_len - logged_chars + end;
+		l1 = logged_chars - end;
+
+		s2 = log_buf;
+		l2 = end;
+	} else {
+		s1 = "";
+		l1 = 0;
+
+		s2 = log_buf + end - logged_chars;
+		l2 = logged_chars;
+	}
+
+	if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
+		printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
+				kmsg_to_str(reason));
+		return;
+	}
+	list_for_each_entry(dumper, &dump_list, list)
+		dumper->dump(dumper, reason, s1, l1, s2, l2);
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+}
-- 
cgit v1.2.3


From b3a8549593696f5f3efcdbf280e2c8e0fe894855 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Thu, 5 Nov 2009 10:27:13 -0500
Subject: backlight: da903x_bl: control WLED output current in da9034

Update WLED output current source before changing brightness.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 drivers/video/backlight/da903x_bl.c | 7 +++++++
 include/linux/mfd/da903x.h          | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 701a1081e199..7fcb0eb54c60 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -25,6 +25,7 @@
 
 #define DA9034_WLED_CONTROL1	0x3C
 #define DA9034_WLED_CONTROL2	0x3D
+#define DA9034_WLED_ISET(x)	((x) & 0x1f)
 
 #define DA9034_WLED_BOOST_EN	(1 << 5)
 
@@ -101,6 +102,7 @@ static struct backlight_ops da903x_backlight_ops = {
 
 static int da903x_backlight_probe(struct platform_device *pdev)
 {
+	struct da9034_backlight_pdata *pdata = pdev->dev.platform_data;
 	struct da903x_backlight_data *data;
 	struct backlight_device *bl;
 	int max_brightness;
@@ -127,6 +129,11 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 	data->da903x_dev = pdev->dev.parent;
 	data->current_brightness = 0;
 
+	/* adjust the WLED output current */
+	if (pdata)
+		da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
+				DA9034_WLED_ISET(pdata->output_current));
+
 	bl = backlight_device_register(pdev->name, data->da903x_dev,
 			data, &da903x_backlight_ops);
 	if (IS_ERR(bl)) {
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index c63b65c94429..0aa3a1a49ee3 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -96,6 +96,10 @@ struct da9034_touch_pdata {
 	int	y_inverted;
 };
 
+struct da9034_backlight_pdata {
+	int	output_current;	/* output current of WLED, from 0-31 (in mA) */
+};
+
 /* DA9030 battery charger data */
 struct power_supply_info;
 
-- 
cgit v1.2.3


From 9996508b3353063f2d6c48c1a28a84543d72d70b Mon Sep 17 00:00:00 2001
From: Ian Molton <ian.molton@collabora.co.uk>
Date: Tue, 1 Dec 2009 14:47:32 +0800
Subject: hwrng: core - Replace u32 in driver API with byte array

This patch implements a new method by which hw_random hardware drivers
can pass data to the core more efficiently, using a shared buffer.

The old methods have been retained as a compatability layer until all the
drivers have been updated.

Signed-off-by: Ian Molton <ian.molton@collabora.co.uk>
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 107 +++++++++++++++++++++++++-----------------
 include/linux/hw_random.h     |   7 ++-
 2 files changed, 69 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index fc93e2fc7c71..82367262f3a8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -52,7 +52,8 @@
 static struct hwrng *current_rng;
 static LIST_HEAD(rng_list);
 static DEFINE_MUTEX(rng_mutex);
-
+static int data_avail;
+static u8 rng_buffer[SMP_CACHE_BYTES] __cacheline_aligned;
 
 static inline int hwrng_init(struct hwrng *rng)
 {
@@ -67,19 +68,6 @@ static inline void hwrng_cleanup(struct hwrng *rng)
 		rng->cleanup(rng);
 }
 
-static inline int hwrng_data_present(struct hwrng *rng, int wait)
-{
-	if (!rng->data_present)
-		return 1;
-	return rng->data_present(rng, wait);
-}
-
-static inline int hwrng_data_read(struct hwrng *rng, u32 *data)
-{
-	return rng->data_read(rng, data);
-}
-
-
 static int rng_dev_open(struct inode *inode, struct file *filp)
 {
 	/* enforce read-only access to this chrdev */
@@ -91,54 +79,87 @@ static int rng_dev_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
+			int wait) {
+	int present;
+
+	if (rng->read)
+		return rng->read(rng, (void *)buffer, size, wait);
+
+	if (rng->data_present)
+		present = rng->data_present(rng, wait);
+	else
+		present = 1;
+
+	if (present)
+		return rng->data_read(rng, (u32 *)buffer);
+
+	return 0;
+}
+
 static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 			    size_t size, loff_t *offp)
 {
-	u32 data;
 	ssize_t ret = 0;
 	int err = 0;
-	int bytes_read;
+	int bytes_read, len;
 
 	while (size) {
-		err = -ERESTARTSYS;
-		if (mutex_lock_interruptible(&rng_mutex))
+		if (mutex_lock_interruptible(&rng_mutex)) {
+			err = -ERESTARTSYS;
 			goto out;
+		}
+
 		if (!current_rng) {
-			mutex_unlock(&rng_mutex);
 			err = -ENODEV;
-			goto out;
+			goto out_unlock;
 		}
 
-		bytes_read = 0;
-		if (hwrng_data_present(current_rng,
-				       !(filp->f_flags & O_NONBLOCK)))
-			bytes_read = hwrng_data_read(current_rng, &data);
-		mutex_unlock(&rng_mutex);
-
-		err = -EAGAIN;
-		if (!bytes_read && (filp->f_flags & O_NONBLOCK))
-			goto out;
-		if (bytes_read < 0) {
-			err = bytes_read;
-			goto out;
+		if (!data_avail) {
+			bytes_read = rng_get_data(current_rng, rng_buffer,
+				sizeof(rng_buffer),
+				!(filp->f_flags & O_NONBLOCK));
+			if (bytes_read < 0) {
+				err = bytes_read;
+				goto out_unlock;
+			}
+			data_avail = bytes_read;
 		}
 
-		err = -EFAULT;
-		while (bytes_read && size) {
-			if (put_user((u8)data, buf++))
-				goto out;
-			size--;
-			ret++;
-			bytes_read--;
-			data >>= 8;
+		if (!data_avail) {
+			if (filp->f_flags & O_NONBLOCK) {
+				err = -EAGAIN;
+				goto out_unlock;
+			}
+		} else {
+			len = data_avail;
+			if (len > size)
+				len = size;
+
+			data_avail -= len;
+
+			if (copy_to_user(buf + ret, rng_buffer + data_avail,
+								len)) {
+				err = -EFAULT;
+				goto out_unlock;
+			}
+
+			size -= len;
+			ret += len;
 		}
 
+		mutex_unlock(&rng_mutex);
+
 		if (need_resched())
 			schedule_timeout_interruptible(1);
-		err = -ERESTARTSYS;
-		if (signal_pending(current))
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
 			goto out;
+		}
 	}
+out_unlock:
+	mutex_unlock(&rng_mutex);
 out:
 	return ret ? : err;
 }
@@ -280,7 +301,7 @@ int hwrng_register(struct hwrng *rng)
 	struct hwrng *old_rng, *tmp;
 
 	if (rng->name == NULL ||
-	    rng->data_read == NULL)
+	    (rng->data_read == NULL && rng->read == NULL))
 		goto out;
 
 	mutex_lock(&rng_mutex);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 7244456e7e65..9bede7633f74 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -22,10 +22,12 @@
  * @cleanup:		Cleanup callback (can be NULL).
  * @data_present:	Callback to determine if data is available
  *			on the RNG. If NULL, it is assumed that
- *			there is always data available.
+ *			there is always data available.  *OBSOLETE*
  * @data_read:		Read data from the RNG device.
  *			Returns the number of lower random bytes in "data".
- *			Must not be NULL.
+ *			Must not be NULL.    *OSOLETE*
+ * @read:		New API. drivers can fill up to max bytes of data
+ *			into the buffer. The buffer is aligned for any type.
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
@@ -34,6 +36,7 @@ struct hwrng {
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
 	int (*data_read)(struct hwrng *rng, u32 *data);
+	int (*read)(struct hwrng *rng, void *data, size_t max, bool wait);
 	unsigned long priv;
 
 	/* internal. */
-- 
cgit v1.2.3


From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 30 Nov 2009 13:24:18 +0000
Subject: mfd: Correct WM831X_MAX_ISEL_VALUE

There was confusion between the array size and the highest ISEL
value possible.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c            | 2 +-
 include/linux/mfd/wm831x/regulator.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 49b7885c2702..7f27576ca046 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -29,7 +29,7 @@
 /* Current settings - values are 2*2^(reg_val/4) microamps.  These are
  * exported since they are used by multiple drivers.
  */
-int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = {
+int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = {
 	2,
 	2,
 	3,
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index f95466343fb2..955d30fc6a27 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -1212,7 +1212,7 @@
 #define WM831X_LDO1_OK_SHIFT                         0  /* LDO1_OK */
 #define WM831X_LDO1_OK_WIDTH                         1  /* LDO1_OK */
 
-#define WM831X_ISINK_MAX_ISEL 56
-extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
+#define WM831X_ISINK_MAX_ISEL 55
+extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1];
 
 #endif
-- 
cgit v1.2.3


From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Dec 2009 15:36:11 +0000
Subject: SLOW_WORK: Move slow_work's proc file to debugfs

Move slow_work's debugging proc file to debugfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Requested-and-acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/slow-work.txt |   4 +-
 include/linux/slow-work.h   |   8 +-
 init/Kconfig                |   8 +-
 kernel/Makefile             |   2 +-
 kernel/slow-work-debugfs.c  | 227 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/slow-work-proc.c     | 227 --------------------------------------------
 kernel/slow-work.c          |  18 ++--
 kernel/slow-work.h          |   6 +-
 8 files changed, 253 insertions(+), 247 deletions(-)
 create mode 100644 kernel/slow-work-debugfs.c
 delete mode 100644 kernel/slow-work-proc.c

(limited to 'include/linux')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index 52bc31433723..9dbf4470c7e1 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables:
 VIEWING EXECUTING AND QUEUED ITEMS
 ==================================
 
-If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available:
+If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available:
 
-	/proc/slow_work_rq
+	/sys/kernel/debug/slow_work/runqueue
 
 through which the list of work items being executed and the queues of items to
 be executed may be viewed.  The owner of a work item is given the chance to
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index 5035a2691739..13337bf6c3f5 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -20,7 +20,7 @@
 #include <linux/timer.h>
 
 struct slow_work;
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 struct seq_file;
 #endif
 
@@ -42,8 +42,8 @@ struct slow_work_ops {
 	/* execute a work item */
 	void (*execute)(struct slow_work *work);
 
-#ifdef CONFIG_SLOW_WORK_PROC
-	/* describe a work item for /proc */
+#ifdef CONFIG_SLOW_WORK_DEBUG
+	/* describe a work item for debugfs */
 	void (*desc)(struct slow_work *work, struct seq_file *m);
 #endif
 };
@@ -64,7 +64,7 @@ struct slow_work {
 #define SLOW_WORK_DELAYED	5	/* item is struct delayed_slow_work with active timer */
 	const struct slow_work_ops *ops; /* operations table for this item */
 	struct list_head	link;	/* link in queue */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	struct timespec		mark;	/* jiffies at which queued or exec begun */
 #endif
 };
diff --git a/init/Kconfig b/init/Kconfig
index ab5c64801fe5..39923ccc287b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1098,12 +1098,12 @@ config SLOW_WORK
 
 	  See Documentation/slow-work.txt.
 
-config SLOW_WORK_PROC
-	bool "Slow work debugging through /proc"
+config SLOW_WORK_DEBUG
+	bool "Slow work debugging through debugfs"
 	default n
-	depends on SLOW_WORK && PROC_FS
+	depends on SLOW_WORK && DEBUG_FS
 	help
-	  Display the contents of the slow work run queue through /proc,
+	  Display the contents of the slow work run queue through debugfs,
 	  including items currently executing.
 
 	  See Documentation/slow-work.txt.
diff --git a/kernel/Makefile b/kernel/Makefile
index 776ffed1556d..d7c13d249b2d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o
+obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c
new file mode 100644
index 000000000000..e45c43645298
--- /dev/null
+++ b/kernel/slow-work-debugfs.c
@@ -0,0 +1,227 @@
+/* Slow work debugging
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/slow-work.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include "slow-work.h"
+
+#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
+#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
+#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
+
+void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
+{
+	seq_puts(m, "Slow-work: New thread");
+}
+
+/*
+ * Render the time mark field on a work item into a 5-char time with units plus
+ * a space
+ */
+static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
+{
+	struct timespec now, diff;
+
+	now = CURRENT_TIME;
+	diff = timespec_sub(now, work->mark);
+
+	if (diff.tv_sec < 0)
+		seq_puts(m, "  -ve ");
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
+		seq_printf(m, "%3luns ", diff.tv_nsec);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
+		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
+		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
+	else if (diff.tv_sec <= 1)
+		seq_puts(m, "   1s ");
+	else if (diff.tv_sec < 60)
+		seq_printf(m, "%4lus ", diff.tv_sec);
+	else if (diff.tv_sec < 60 * 60)
+		seq_printf(m, "%4lum ", diff.tv_sec / 60);
+	else if (diff.tv_sec < 60 * 60 * 24)
+		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
+	else
+		seq_puts(m, "exces ");
+}
+
+/*
+ * Describe a slow work item for debugfs
+ */
+static int slow_work_runqueue_show(struct seq_file *m, void *v)
+{
+	struct slow_work *work;
+	struct list_head *p = v;
+	unsigned long id;
+
+	switch ((unsigned long) v) {
+	case 1:
+		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
+		return 0;
+	case 2:
+		seq_puts(m, "=== ===== ================ == ===== ==========\n");
+		return 0;
+
+	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
+		id = (unsigned long) v - 3;
+
+		read_lock(&slow_work_execs_lock);
+		work = slow_work_execs[id];
+		if (work) {
+			smp_read_barrier_depends();
+
+			seq_printf(m, "%3lu %5d %16p %2lx ",
+				   id, slow_work_pids[id], work, work->flags);
+			slow_work_print_mark(m, work);
+
+			if (work->ops->desc)
+				work->ops->desc(work, m);
+			seq_putc(m, '\n');
+		}
+		read_unlock(&slow_work_execs_lock);
+		return 0;
+
+	default:
+		work = list_entry(p, struct slow_work, link);
+		seq_printf(m, "%3s     - %16p %2lx ",
+			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
+			   work, work->flags);
+		slow_work_print_mark(m, work);
+
+		if (work->ops->desc)
+			work->ops->desc(work, m);
+		seq_putc(m, '\n');
+		return 0;
+	}
+}
+
+/*
+ * map the iterator to a work item
+ */
+static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *p;
+	unsigned long count, id;
+
+	switch (*_pos >> ITERATOR_SHIFT) {
+	case 0x0:
+		if (*_pos == 0)
+			*_pos = 1;
+		if (*_pos < 3)
+			return (void *)(unsigned long) *_pos;
+		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
+			for (id = *_pos - 3;
+			     id < SLOW_WORK_THREAD_LIMIT;
+			     id++, (*_pos)++)
+				if (slow_work_execs[id])
+					return (void *)(unsigned long) *_pos;
+		*_pos = 0x1UL << ITERATOR_SHIFT;
+
+	case 0x1:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &slow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+
+	case 0x2:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &vslow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
+{
+	spin_lock_irq(&slow_work_queue_lock);
+	return slow_work_runqueue_index(m, _pos);
+}
+
+/*
+ * move to the next line
+ */
+static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	struct list_head *p = v;
+	unsigned long selector = *_pos >> ITERATOR_SHIFT;
+
+	(*_pos)++;
+	switch (selector) {
+	case 0x0:
+		return slow_work_runqueue_index(m, _pos);
+
+	case 0x1:
+		if (*_pos >> ITERATOR_SHIFT == 0x1) {
+			p = p->next;
+			if (p != &slow_work_queue)
+				return p;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+		p = &vslow_work_queue;
+
+	case 0x2:
+		if (*_pos >> ITERATOR_SHIFT == 0x2) {
+			p = p->next;
+			if (p != &vslow_work_queue)
+				return p;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * clean up after reading
+ */
+static void slow_work_runqueue_stop(struct seq_file *m, void *v)
+{
+	spin_unlock_irq(&slow_work_queue_lock);
+}
+
+static const struct seq_operations slow_work_runqueue_ops = {
+	.start		= slow_work_runqueue_start,
+	.stop		= slow_work_runqueue_stop,
+	.next		= slow_work_runqueue_next,
+	.show		= slow_work_runqueue_show,
+};
+
+/*
+ * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents
+ */
+static int slow_work_runqueue_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &slow_work_runqueue_ops);
+}
+
+const struct file_operations slow_work_runqueue_fops = {
+	.owner		= THIS_MODULE,
+	.open		= slow_work_runqueue_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c
deleted file mode 100644
index 3988032571f5..000000000000
--- a/kernel/slow-work-proc.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Slow work debugging
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/slow-work.h>
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/seq_file.h>
-#include "slow-work.h"
-
-#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
-#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
-#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
-
-void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
-{
-	seq_puts(m, "Slow-work: New thread");
-}
-
-/*
- * Render the time mark field on a work item into a 5-char time with units plus
- * a space
- */
-static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
-{
-	struct timespec now, diff;
-
-	now = CURRENT_TIME;
-	diff = timespec_sub(now, work->mark);
-
-	if (diff.tv_sec < 0)
-		seq_puts(m, "  -ve ");
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
-		seq_printf(m, "%3luns ", diff.tv_nsec);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
-		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
-		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
-	else if (diff.tv_sec <= 1)
-		seq_puts(m, "   1s ");
-	else if (diff.tv_sec < 60)
-		seq_printf(m, "%4lus ", diff.tv_sec);
-	else if (diff.tv_sec < 60 * 60)
-		seq_printf(m, "%4lum ", diff.tv_sec / 60);
-	else if (diff.tv_sec < 60 * 60 * 24)
-		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
-	else
-		seq_puts(m, "exces ");
-}
-
-/*
- * Describe a slow work item for /proc
- */
-static int slow_work_runqueue_show(struct seq_file *m, void *v)
-{
-	struct slow_work *work;
-	struct list_head *p = v;
-	unsigned long id;
-
-	switch ((unsigned long) v) {
-	case 1:
-		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
-		return 0;
-	case 2:
-		seq_puts(m, "=== ===== ================ == ===== ==========\n");
-		return 0;
-
-	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
-		id = (unsigned long) v - 3;
-
-		read_lock(&slow_work_execs_lock);
-		work = slow_work_execs[id];
-		if (work) {
-			smp_read_barrier_depends();
-
-			seq_printf(m, "%3lu %5d %16p %2lx ",
-				   id, slow_work_pids[id], work, work->flags);
-			slow_work_print_mark(m, work);
-
-			if (work->ops->desc)
-				work->ops->desc(work, m);
-			seq_putc(m, '\n');
-		}
-		read_unlock(&slow_work_execs_lock);
-		return 0;
-
-	default:
-		work = list_entry(p, struct slow_work, link);
-		seq_printf(m, "%3s     - %16p %2lx ",
-			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
-			   work, work->flags);
-		slow_work_print_mark(m, work);
-
-		if (work->ops->desc)
-			work->ops->desc(work, m);
-		seq_putc(m, '\n');
-		return 0;
-	}
-}
-
-/*
- * map the iterator to a work item
- */
-static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *p;
-	unsigned long count, id;
-
-	switch (*_pos >> ITERATOR_SHIFT) {
-	case 0x0:
-		if (*_pos == 0)
-			*_pos = 1;
-		if (*_pos < 3)
-			return (void *)(unsigned long) *_pos;
-		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
-			for (id = *_pos - 3;
-			     id < SLOW_WORK_THREAD_LIMIT;
-			     id++, (*_pos)++)
-				if (slow_work_execs[id])
-					return (void *)(unsigned long) *_pos;
-		*_pos = 0x1UL << ITERATOR_SHIFT;
-
-	case 0x1:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &slow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-
-	case 0x2:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &vslow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
-{
-	spin_lock_irq(&slow_work_queue_lock);
-	return slow_work_runqueue_index(m, _pos);
-}
-
-/*
- * move to the next line
- */
-static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
-{
-	struct list_head *p = v;
-	unsigned long selector = *_pos >> ITERATOR_SHIFT;
-
-	(*_pos)++;
-	switch (selector) {
-	case 0x0:
-		return slow_work_runqueue_index(m, _pos);
-
-	case 0x1:
-		if (*_pos >> ITERATOR_SHIFT == 0x1) {
-			p = p->next;
-			if (p != &slow_work_queue)
-				return p;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-		p = &vslow_work_queue;
-
-	case 0x2:
-		if (*_pos >> ITERATOR_SHIFT == 0x2) {
-			p = p->next;
-			if (p != &vslow_work_queue)
-				return p;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * clean up after reading
- */
-static void slow_work_runqueue_stop(struct seq_file *m, void *v)
-{
-	spin_unlock_irq(&slow_work_queue_lock);
-}
-
-static const struct seq_operations slow_work_runqueue_ops = {
-	.start		= slow_work_runqueue_start,
-	.stop		= slow_work_runqueue_stop,
-	.next		= slow_work_runqueue_next,
-	.show		= slow_work_runqueue_show,
-};
-
-/*
- * open "/proc/slow_work_rq" to list queue contents
- */
-static int slow_work_runqueue_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &slow_work_runqueue_ops);
-}
-
-const struct file_operations slow_work_runqueue_fops = {
-	.owner		= THIS_MODULE,
-	.open		= slow_work_runqueue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index b5c17f15f9de..00889bd3c590 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -16,7 +16,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/wait.h>
-#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 #include "slow-work.h"
 
 static void slow_work_cull_timeout(unsigned long);
@@ -138,7 +138,7 @@ static void slow_work_clear_thread_processing(int id) {}
 /*
  * Data for tracking currently executing items for indication through /proc
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
 pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
 DEFINE_RWLOCK(slow_work_execs_lock);
@@ -823,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work)
 static const struct slow_work_ops slow_work_new_thread_ops = {
 	.owner		= THIS_MODULE,
 	.execute	= slow_work_new_thread_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= slow_work_new_thread_desc,
 #endif
 };
@@ -1055,9 +1055,15 @@ static int __init init_slow_work(void)
 	if (slow_work_max_max_threads < nr_cpus * 2)
 		slow_work_max_max_threads = nr_cpus * 2;
 #endif
-#ifdef CONFIG_SLOW_WORK_PROC
-	proc_create("slow_work_rq", S_IFREG | 0400, NULL,
-		    &slow_work_runqueue_fops);
+#ifdef CONFIG_SLOW_WORK_DEBUG
+	{
+		struct dentry *dbdir;
+
+		dbdir = debugfs_create_dir("slow_work", NULL);
+		if (dbdir && !IS_ERR(dbdir))
+			debugfs_create_file("runqueue", S_IFREG | 0400, dbdir,
+					    NULL, &slow_work_runqueue_fops);
+	}
 #endif
 	return 0;
 }
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 3c2f007f3ad6..321f3c59d732 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -19,7 +19,7 @@
 /*
  * slow-work.c
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 extern struct slow_work *slow_work_execs[];
 extern pid_t slow_work_pids[];
 extern rwlock_t slow_work_execs_lock;
@@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue;
 extern spinlock_t slow_work_queue_lock;
 
 /*
- * slow-work-proc.c
+ * slow-work-debugfs.c
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 extern const struct file_operations slow_work_runqueue_fops;
 
 extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
-- 
cgit v1.2.3


From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:20 +0800
Subject: trace_syscalls: Remove unused event_syscall_enter and
 event_syscall_exit

fix event_enter_##sname->event
fix event_exit_##sname->event

remove unused event_syscall_enter and event_syscall_exit

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D278.4090209@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 4 ++--
 include/trace/syscall.h       | 2 --
 kernel/trace/trace_syscalls.c | 8 --------
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b50974a93af0..2f7c539ab96d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	  event_enter_##sname = {					\
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_enter,		\
+		.event                  = &enter_syscall_print_##sname,	\
 		.raw_init		= init_enter_##sname,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
@@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	  event_exit_##sname = {					\
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_exit,		\
+		.event                  = &exit_syscall_print_##sname,	\
 		.raw_init		= init_exit_##sname,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 51ee17d3632a..5f8827c92db7 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
-extern struct trace_event event_syscall_enter;
-extern struct trace_event event_syscall_exit;
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 63aa8070365d..00d6e176f5b6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-struct trace_event event_syscall_enter = {
-	.trace			= print_syscall_enter,
-};
-
-struct trace_event event_syscall_exit = {
-	.trace			= print_syscall_exit,
-};
-
 int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
-- 
cgit v1.2.3


From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:30 +0800
Subject: trace_syscalls: Set event_enter_##sname->data to its metadata

Set event_enter_##sname->data to its metadata,
it makes codes simpler.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D282.7050709@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  6 ++++--
 include/trace/syscall.h       |  2 +-
 kernel/trace/trace_syscalls.c | 36 +++++++++++-------------------------
 3 files changed, 16 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2f7c539ab96d..d3c9fd01a110 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_enter_##sname;		\
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
@@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_exit_##sname;		\
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
@@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
 	}
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5f8827c92db7..c5265c81c4e7 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -34,7 +34,7 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(char *name);
+extern int syscall_name_to_nr(const char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 00d6e176f5b6..39649b1675dd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(char *name)
+int syscall_name_to_nr(const char *name)
 {
 	int i;
 
@@ -172,18 +172,11 @@ extern char *__bad_type_size(void);
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	int i;
-	int nr;
 	int ret;
-	struct syscall_metadata *entry;
+	struct syscall_metadata *entry = call->data;
 	struct syscall_trace_enter trace;
 	int offset = offsetof(struct syscall_trace_enter, args);
 
-	nr = syscall_name_to_nr(call->data);
-	entry = syscall_nr_to_meta(nr);
-
-	if (!entry)
-		return 0;
-
 	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
 			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr));
@@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 int syscall_enter_define_fields(struct ftrace_event_call *call)
 {
 	struct syscall_trace_enter trace;
-	struct syscall_metadata *meta;
+	struct syscall_metadata *meta = call->data;
 	int ret;
-	int nr;
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	nr = syscall_name_to_nr(call->data);
-	meta = syscall_nr_to_meta(nr);
-
-	if (!meta)
-		return 0;
-
 	ret = trace_define_common_fields(call);
 	if (ret)
 		return ret;
@@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
+	const char *name;
 
-	name = (char *)call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
@@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
+	const char *name;
 
-	name = (char *)call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return;
@@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
+	const char *name;
 
-	name = call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
@@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
+	const char *name;
 
-	name = call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return;
-- 
cgit v1.2.3


From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:36 +0800
Subject: trace_syscalls: Remove enter_id exit_id

use ->enter_event->id instead of ->enter_id
use ->exit_event->id instead of ->exit_id

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D288.7030001@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  2 --
 include/trace/syscall.h       |  6 ------
 kernel/trace/trace_syscalls.c | 30 ++++++++++--------------------
 3 files changed, 10 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d3c9fd01a110..b9af87560adb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		if (!id)						\
 			return -ENODEV;					\
 		event_enter_##sname.id = id;				\
-		set_syscall_enter_id(num, id);				\
 		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
 		return 0;						\
 	}								\
@@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		if (!id)						\
 			return -ENODEV;					\
 		event_exit_##sname.id = id;				\
-		set_syscall_exit_id(num, id);				\
 		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
 		return 0;						\
 	}								\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index c5265c81c4e7..ca09561cd578 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -15,8 +15,6 @@
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
- * @enter_id: associated ftrace enter event id
- * @exit_id: associated ftrace exit event id
  * @enter_event: associated syscall_enter trace event
  * @exit_event: associated syscall_exit trace event
  */
@@ -25,8 +23,6 @@ struct syscall_metadata {
 	int		nb_args;
 	const char	**types;
 	const char	**args;
-	int		enter_id;
-	int		exit_id;
 
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *exit_event;
@@ -35,8 +31,6 @@ struct syscall_metadata {
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(const char *name);
-void set_syscall_enter_id(int num, int id);
-void set_syscall_exit_id(int num, int id);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 39649b1675dd..27eb18d69222 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name)
 	return -1;
 }
 
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
-	syscalls_metadata[num]->exit_id = id;
-}
-
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
 	if (!entry)
 		goto end;
 
-	if (entry->enter_id != ent->type) {
+	if (entry->enter_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		goto end;
 	}
@@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
 		return TRACE_TYPE_HANDLED;
 	}
 
-	if (entry->exit_id != ent->type) {
+	if (entry->exit_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		return TRACE_TYPE_UNHANDLED;
 	}
@@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
-						  size, 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->enter_event->id, size, 0, 0);
 	if (!event)
 		return;
 
@@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
-				sizeof(*entry), 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->exit_event->id, sizeof(*entry), 0, 0);
 	if (!event)
 		return;
 
@@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 
 	rec = (struct syscall_trace_enter *) raw_data;
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->enter_id;
+	rec->ent.type = sys_data->enter_event->id;
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
 
 end:
 	perf_swevent_put_recursion_context(rctx);
@@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec = (struct syscall_trace_exit *)raw_data;
 
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->exit_id;
+	rec->ent.type = sys_data->exit_event->id;
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
 
 end:
 	perf_swevent_put_recursion_context(rctx);
-- 
cgit v1.2.3


From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:47 +0800
Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata

Add syscall_nr field to struct syscall_metadata,
it helps us to get syscall number easier.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D293.6090800@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  4 ++--
 include/trace/syscall.h       |  3 ++-
 kernel/trace/trace_syscalls.c | 22 +++++++++-------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b9af87560adb..3c280d7ecb76 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	static int init_enter_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
+		num = __syscall_meta_##sname.syscall_nr;		\
 		if (num < 0)						\
 			return -ENOSYS;					\
 		id = register_ftrace_event(&enter_syscall_print_##sname);\
@@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	static int init_exit_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
+		num = __syscall_meta_##sname.syscall_nr;		\
 		if (num < 0)						\
 			return -ENOSYS;					\
 		id = register_ftrace_event(&exit_syscall_print_##sname);\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index ca09561cd578..1531eef3071f 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -12,6 +12,7 @@
  * A syscall entry in the ftrace syscalls array.
  *
  * @name: name of the syscall
+ * @syscall_nr: number of the syscall
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
@@ -20,6 +21,7 @@
  */
 struct syscall_metadata {
 	const char	*name;
+	int		syscall_nr;
 	int		nb_args;
 	const char	**types;
 	const char	**args;
@@ -30,7 +32,6 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(const char *name);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 27eb18d69222..144cc14d8551 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(const char *name)
+static int syscall_name_to_nr(const char *name)
 {
 	int i;
 
@@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void)
 	for (i = 0; i < NR_syscalls; i++) {
 		addr = arch_syscall_addr(i);
 		meta = find_syscall_meta(addr);
+		if (!meta)
+			continue;
+
+		meta->syscall_nr = i;
 		syscalls_metadata[i] = meta;
 	}
 
-- 
cgit v1.2.3


From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:55 +0800
Subject: trace_syscalls: Remove duplicate init_enter_##sname()

use only one init_syscall_trace instead of
many init_enter_##sname()/init_exit_##sname()

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 30 ++----------------------------
 include/trace/syscall.h       |  1 +
 kernel/trace/trace_syscalls.c | 12 ++++++++++++
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3c280d7ecb76..cf0d923ea40e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	static int init_enter_##sname(struct ftrace_event_call *call)	\
-	{								\
-		int num, id;						\
-		num = __syscall_meta_##sname.syscall_nr;		\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&enter_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_enter_##sname.id = id;				\
-		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
-		return 0;						\
-	}								\
 	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
@@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_enter_##sname,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	static int init_exit_##sname(struct ftrace_event_call *call)	\
-	{								\
-		int num, id;						\
-		num = __syscall_meta_##sname.syscall_nr;		\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&exit_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_exit_##sname.id = id;				\
-		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
-		return 0;						\
-	}								\
 	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
@@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_exit_##sname,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 1531eef3071f..dff9371e5274 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -32,6 +32,7 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
+extern int init_syscall_trace(struct ftrace_event_call *call);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 144cc14d8551..c6514093c95a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
+int init_syscall_trace(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+	return 0;
+}
+
 int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
-- 
cgit v1.2.3


From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:24:01 +0800
Subject: trace_syscalls: Simplify syscall profile

use only one prof_sysenter_enable() instead of
prof_sysenter_enable_##sname()

use only one prof_sysenter_disable() instead of
prof_sysenter_disable_##sname()

use only one prof_sysexit_enable() instead of
prof_sysexit_enable_##sname()

use only one prof_sysexit_disable() instead of
prof_sysexit_disable_##sname()

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 31 ++++---------------------------
 include/trace/syscall.h       |  8 ++++----
 kernel/trace/trace_syscalls.c | 24 ++++++++----------------
 3 files changed, 16 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf0d923ea40e..c2df3a593236 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,37 +99,16 @@ struct perf_event_attr;
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
 #ifdef CONFIG_EVENT_PROFILE
-#define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused)      \
-{									       \
-	return reg_prof_syscall_enter("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused)    \
-{									       \
-	unreg_prof_syscall_enter("sys"#sname);				       \
-}
-
-#define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused)       \
-{									       \
-	return reg_prof_syscall_exit("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
-{                                                                              \
-	unreg_prof_syscall_exit("sys"#sname);				       \
-}
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysenter_enable_##sname,			       \
-	.profile_disable = prof_sysenter_disable_##sname,
+	.profile_enable = prof_sysenter_enable,				       \
+	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysexit_enable_##sname,			       \
-	.profile_disable = prof_sysexit_disable_##sname,
+	.profile_enable = prof_sysexit_enable,				       \
+	.profile_disable = prof_sysexit_disable,
 #else
 #define TRACE_SYS_ENTER_PROFILE(sname)
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
@@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index dff9371e5274..961fda3556bb 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 #ifdef CONFIG_EVENT_PROFILE
-int reg_prof_syscall_enter(char *name);
-void unreg_prof_syscall_enter(char *name);
-int reg_prof_syscall_exit(char *name);
-void unreg_prof_syscall_exit(char *name);
+int prof_sysenter_enable(struct ftrace_event_call *call);
+void prof_sysenter_disable(struct ftrace_event_call *call);
+int prof_sysexit_enable(struct ftrace_event_call *call);
+void prof_sysexit_disable(struct ftrace_event_call *call);
 
 #endif
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index c6514093c95a..1e85b6cc26aa 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -520,14 +520,12 @@ end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_enter(char *name)
+int prof_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_enter)
@@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name)
 	return ret;
 }
 
-void unreg_prof_syscall_enter(char *name)
+void prof_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_enter--;
@@ -625,14 +621,12 @@ end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_exit(char *name)
+int prof_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_exit)
@@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name)
 	return ret;
 }
 
-void unreg_prof_syscall_exit(char *name)
+void prof_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_exit--;
-- 
cgit v1.2.3


From ee0a6efc1897ef817e177e669f5c5d211194df24 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 2 Dec 2009 08:36:58 +0900
Subject: percpu: add missing per_cpu_ptr_to_phys() definition for UP

Commit 3b034b0d084221596bf35c8d893e1d4d5477b9cc implemented
per_cpu_ptr_to_phys() but forgot to add UP definition.  Add UP
definition which is simple wrapper around __pa().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/linux/percpu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 6ac984fa34f8..8e4ead6435fb 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -180,6 +180,11 @@ static inline void free_percpu(void *p)
 	kfree(p);
 }
 
+static inline phys_addr_t per_cpu_ptr_to_phys(void *addr)
+{
+	return __pa(addr);
+}
+
 static inline void __init setup_per_cpu_areas(void) { }
 
 static inline void *pcpu_lpage_remapped(void *kaddr)
-- 
cgit v1.2.3


From a5ee155136b4a8f4ab0e4c9c064b661da475e298 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 29 Nov 2009 15:45:58 +0000
Subject: net: NETDEV_UNREGISTER_PERNET -> NETDEV_UNREGISTER_BATCH

The motivation for an additional notifier in batched netdevice
notification (rt_do_flush) only needs to be called once per batch not
once per namespace.

For further batching improvements I need a guarantee that the
netdevices are unregistered in order allowing me to unregister an all
of the network devices in a network namespace at the same time with
the guarantee that the loopback device is really and truly
unregistered last.

Additionally it appears that we moved the route cache flush after
the final synchronize_net, which seems wrong and there was no
explanation.  So I have restored the original location of the final
synchronize_net.

Cc: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h |  2 +-
 include/net/route.h      |  1 +
 net/core/dev.c           | 36 +++++++++---------------------------
 net/ipv4/fib_frontend.c  |  4 +++-
 net/ipv4/route.c         |  6 ++++++
 5 files changed, 20 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b0c3671d463c..fee6c2f68075 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -202,7 +202,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_BONDING_OLDTYPE  0x000E
 #define NETDEV_BONDING_NEWTYPE  0x000F
 #define NETDEV_POST_INIT	0x0010
-#define NETDEV_UNREGISTER_PERNET 0x0011
+#define NETDEV_UNREGISTER_BATCH 0x0011
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/include/net/route.h b/include/net/route.h
index cfb4c071a136..bce6dd68d27b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -108,6 +108,7 @@ extern int		ip_rt_init(void);
 extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
 				       __be32 src, struct net_device *dev);
 extern void		rt_cache_flush(struct net *net, int how);
+extern void		rt_cache_flush_batch(void);
 extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5d131c2f84cc..bb37ee1e0901 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1353,7 +1353,7 @@ rollback:
 				nb->notifier_call(nb, NETDEV_DOWN, dev);
 			}
 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
-			nb->notifier_call(nb, NETDEV_UNREGISTER_PERNET, dev);
+			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
 		}
 	}
 
@@ -4771,8 +4771,7 @@ static void net_set_todo(struct net_device *dev)
 
 static void rollback_registered_many(struct list_head *head)
 {
-	struct net_device *dev, *aux, *fdev;
-	LIST_HEAD(pernet_list);
+	struct net_device *dev;
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -4828,26 +4827,14 @@ static void rollback_registered_many(struct list_head *head)
 		netdev_unregister_kobject(dev);
 	}
 
-	synchronize_net();
+	/* Process any work delayed until the end of the batch */
+	dev = list_entry(head->next, struct net_device, unreg_list);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-	list_for_each_entry_safe(dev, aux, head, unreg_list) {
-		int new_net = 1;
-		list_for_each_entry(fdev, &pernet_list, unreg_list) {
-			if (net_eq(dev_net(dev), dev_net(fdev))) {
-				new_net = 0;
-				dev_put(dev);
-				break;
-			}
-		}
-		if (new_net)
-			list_move(&dev->unreg_list, &pernet_list);
-	}
+	synchronize_net();
 
-	list_for_each_entry_safe(dev, aux, &pernet_list, unreg_list) {
-		call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
-		list_move(&dev->unreg_list, head);
+	list_for_each_entry(dev, head, unreg_list)
 		dev_put(dev);
-	}
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -5129,7 +5116,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-			/* don't resend NETDEV_UNREGISTER_PERNET, _PERNET users
+			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
 			 * should have already handle it the first time */
 
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -5442,11 +5429,6 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
 /**
  *	unregister_netdevice_many - unregister many devices
  *	@head: list of devices
- *
- *	WARNING: Calling this modifies the given list
- *	(in rollback_registered_many). It may change the order of the elements
- *	in the list. However, you can assume it does not add or delete elements
- *	to/from the list.
  */
 void unregister_netdevice_many(struct list_head *head)
 {
@@ -5555,7 +5537,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	   this device. They should clean all the things.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-	call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
 	/*
 	 *	Flush the unicast and multicast chains
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6c1e56aef1f4..3b373a8b0473 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -959,9 +959,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		break;
 	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
-	case NETDEV_UNREGISTER_PERNET:
 		rt_cache_flush(dev_net(dev), 0);
 		break;
+	case NETDEV_UNREGISTER_BATCH:
+		rt_cache_flush_batch();
+		break;
 	}
 	return NOTIFY_DONE;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9889fbd96487..90cdcfc32937 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -900,6 +900,12 @@ void rt_cache_flush(struct net *net, int delay)
 		rt_do_flush(!in_softirq());
 }
 
+/* Flush previous cache invalidated entries from the cache */
+void rt_cache_flush_batch(void)
+{
+	rt_do_flush(!in_softirq());
+}
+
 /*
  * We change rt_genid and let gc do the cleanup
  */
-- 
cgit v1.2.3


From dcbccbd4f1f6ad0f0e169d4b2e816e42bde06f82 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 29 Nov 2009 22:25:26 +0000
Subject: net: Implement for_each_netdev_reverse.

I will need this shortly to implement network namespace shutdown
batching.  For sanity sake network devices should be removed in
the reverse order they were created in.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9428793775a0..daf13d367498 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1112,6 +1112,8 @@ extern rwlock_t				dev_base_lock;		/* Device list lock */
 
 #define for_each_netdev(net, d)		\
 		list_for_each_entry(d, &(net)->dev_base_head, dev_list)
+#define for_each_netdev_reverse(net, d)	\
+		list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_rcu(net, d)		\
 		list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list)
 #define for_each_netdev_safe(net, d, n)	\
-- 
cgit v1.2.3


From 66d2a5952eab875f1286e04f738ef029afdaf013 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 1 Dec 2009 21:54:35 -0800
Subject: Input: keyboard - fix lack of locking when traversing handler->h_list

Keyboard handler should not attempt to traverse handler->h_list on
its own, without any locking, otherwise it races with registering
and unregistering of input handles which leads to crashes.

Introduce input_handler_for_each_handle() helper that allows safely
iterate over all handles attached to a particular handler and switch
keyboard handler to use it.

Reported-by: Jim Paradis <jparadis@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c | 202 +++++++++++++++++++++++++-----------------------
 drivers/input/input.c   |  37 ++++++++-
 include/linux/input.h   |  10 ++-
 3 files changed, 148 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index ca090e57e161..ff8e9345f3c9 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -46,8 +46,6 @@
 
 extern void ctrl_alt_del(void);
 
-#define to_handle_h(n) container_of(n, struct input_handle, h_node)
-
 /*
  * Exported functions/variables
  */
@@ -191,78 +189,85 @@ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
  *  etc.). So this means that scancodes for the extra function keys won't
  *  be valid for the first event device, but will be for the second.
  */
+
+struct getset_keycode_data {
+	unsigned int scancode;
+	unsigned int keycode;
+	int error;
+};
+
+static int getkeycode_helper(struct input_handle *handle, void *data)
+{
+	struct getset_keycode_data *d = data;
+
+	d->error = input_get_keycode(handle->dev, d->scancode, &d->keycode);
+
+	return d->error == 0; /* stop as soon as we successfully get one */
+}
+
 int getkeycode(unsigned int scancode)
 {
-	struct input_handle *handle;
-	int keycode;
-	int error = -ENODEV;
+	struct getset_keycode_data d = { scancode, 0, -ENODEV };
 
-	list_for_each_entry(handle, &kbd_handler.h_list, h_node) {
-		error = input_get_keycode(handle->dev, scancode, &keycode);
-		if (!error)
-			return keycode;
-	}
+	input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper);
 
-	return error;
+	return d.error ?: d.keycode;
+}
+
+static int setkeycode_helper(struct input_handle *handle, void *data)
+{
+	struct getset_keycode_data *d = data;
+
+	d->error = input_set_keycode(handle->dev, d->scancode, d->keycode);
+
+	return d->error == 0; /* stop as soon as we successfully set one */
 }
 
 int setkeycode(unsigned int scancode, unsigned int keycode)
 {
-	struct input_handle *handle;
-	int error = -ENODEV;
+	struct getset_keycode_data d = { scancode, keycode, -ENODEV };
 
-	list_for_each_entry(handle, &kbd_handler.h_list, h_node) {
-		error = input_set_keycode(handle->dev, scancode, keycode);
-		if (!error)
-			break;
-	}
+	input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper);
 
-	return error;
+	return d.error;
 }
 
 /*
  * Making beeps and bells.
  */
-static void kd_nosound(unsigned long ignored)
+
+static int kd_sound_helper(struct input_handle *handle, void *data)
 {
-	struct input_handle *handle;
+	unsigned int *hz = data;
+	struct input_dev *dev = handle->dev;
 
-	list_for_each_entry(handle, &kbd_handler.h_list, h_node) {
-		if (test_bit(EV_SND, handle->dev->evbit)) {
-			if (test_bit(SND_TONE, handle->dev->sndbit))
-				input_inject_event(handle, EV_SND, SND_TONE, 0);
-			if (test_bit(SND_BELL, handle->dev->sndbit))
-				input_inject_event(handle, EV_SND, SND_BELL, 0);
-		}
+	if (test_bit(EV_SND, dev->evbit)) {
+		if (test_bit(SND_TONE, dev->sndbit))
+			input_inject_event(handle, EV_SND, SND_TONE, *hz);
+		if (test_bit(SND_BELL, handle->dev->sndbit))
+			input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0);
 	}
+
+	return 0;
+}
+
+static void kd_nosound(unsigned long ignored)
+{
+	static unsigned int zero;
+
+	input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper);
 }
 
 static DEFINE_TIMER(kd_mksound_timer, kd_nosound, 0, 0);
 
 void kd_mksound(unsigned int hz, unsigned int ticks)
 {
-	struct list_head *node;
+	del_timer_sync(&kd_mksound_timer);
 
-	del_timer(&kd_mksound_timer);
+	input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper);
 
-	if (hz) {
-		list_for_each_prev(node, &kbd_handler.h_list) {
-			struct input_handle *handle = to_handle_h(node);
-			if (test_bit(EV_SND, handle->dev->evbit)) {
-				if (test_bit(SND_TONE, handle->dev->sndbit)) {
-					input_inject_event(handle, EV_SND, SND_TONE, hz);
-					break;
-				}
-				if (test_bit(SND_BELL, handle->dev->sndbit)) {
-					input_inject_event(handle, EV_SND, SND_BELL, 1);
-					break;
-				}
-			}
-		}
-		if (ticks)
-			mod_timer(&kd_mksound_timer, jiffies + ticks);
-	} else
-		kd_nosound(0);
+	if (hz && ticks)
+		mod_timer(&kd_mksound_timer, jiffies + ticks);
 }
 EXPORT_SYMBOL(kd_mksound);
 
@@ -270,27 +275,34 @@ EXPORT_SYMBOL(kd_mksound);
  * Setting the keyboard rate.
  */
 
-int kbd_rate(struct kbd_repeat *rep)
+static int kbd_rate_helper(struct input_handle *handle, void *data)
 {
-	struct list_head *node;
-	unsigned int d = 0;
-	unsigned int p = 0;
-
-	list_for_each(node, &kbd_handler.h_list) {
-		struct input_handle *handle = to_handle_h(node);
-		struct input_dev *dev = handle->dev;
-
-		if (test_bit(EV_REP, dev->evbit)) {
-			if (rep->delay > 0)
-				input_inject_event(handle, EV_REP, REP_DELAY, rep->delay);
-			if (rep->period > 0)
-				input_inject_event(handle, EV_REP, REP_PERIOD, rep->period);
-			d = dev->rep[REP_DELAY];
-			p = dev->rep[REP_PERIOD];
-		}
+	struct input_dev *dev = handle->dev;
+	struct kbd_repeat *rep = data;
+
+	if (test_bit(EV_REP, dev->evbit)) {
+
+		if (rep[0].delay > 0)
+			input_inject_event(handle,
+					   EV_REP, REP_DELAY, rep[0].delay);
+		if (rep[0].period > 0)
+			input_inject_event(handle,
+					   EV_REP, REP_PERIOD, rep[0].period);
+
+		rep[1].delay = dev->rep[REP_DELAY];
+		rep[1].period = dev->rep[REP_PERIOD];
 	}
-	rep->delay  = d;
-	rep->period = p;
+
+	return 0;
+}
+
+int kbd_rate(struct kbd_repeat *rep)
+{
+	struct kbd_repeat data[2] = { *rep };
+
+	input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper);
+	*rep = data[1];	/* Copy currently used settings */
+
 	return 0;
 }
 
@@ -998,36 +1010,36 @@ static inline unsigned char getleds(void)
 	return leds;
 }
 
+static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+{
+	unsigned char leds = *(unsigned char *)data;
+
+	if (test_bit(EV_LED, handle->dev->evbit)) {
+		input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
+		input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & 0x02));
+		input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & 0x04));
+		input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
+	}
+
+	return 0;
+}
+
 /*
- * This routine is the bottom half of the keyboard interrupt
- * routine, and runs with all interrupts enabled. It does
- * console changing, led setting and copy_to_cooked, which can
- * take a reasonably long time.
- *
- * Aside from timing (which isn't really that important for
- * keyboard interrupts as they happen often), using the software
- * interrupt routines for this thing allows us to easily mask
- * this when we don't want any of the above to happen.
- * This allows for easy and efficient race-condition prevention
- * for kbd_start => input_inject_event(dev, EV_LED, ...) => ...
+ * This is the tasklet that updates LED state on all keyboards
+ * attached to the box. The reason we use tasklet is that we
+ * need to handle the scenario when keyboard handler is not
+ * registered yet but we already getting updates form VT to
+ * update led state.
  */
-
 static void kbd_bh(unsigned long dummy)
 {
-	struct list_head *node;
 	unsigned char leds = getleds();
 
 	if (leds != ledstate) {
-		list_for_each(node, &kbd_handler.h_list) {
-			struct input_handle *handle = to_handle_h(node);
-			input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
-			input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & 0x02));
-			input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & 0x04));
-			input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
-		}
+		input_handler_for_each_handle(&kbd_handler, &leds,
+					      kbd_update_leds_helper);
+		ledstate = leds;
 	}
-
-	ledstate = leds;
 }
 
 DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1370,15 +1382,11 @@ static void kbd_disconnect(struct input_handle *handle)
  */
 static void kbd_start(struct input_handle *handle)
 {
-	unsigned char leds = ledstate;
-
 	tasklet_disable(&keyboard_tasklet);
-	if (leds != 0xff) {
-		input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
-		input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & 0x02));
-		input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & 0x04));
-		input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
-	}
+
+	if (ledstate != 0xff)
+		kbd_update_leds_helper(handle, &ledstate);
+
 	tasklet_enable(&keyboard_tasklet);
 }
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index cc763c96fada..5d6421bde4ba 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1650,6 +1650,38 @@ void input_unregister_handler(struct input_handler *handler)
 }
 EXPORT_SYMBOL(input_unregister_handler);
 
+/**
+ * input_handler_for_each_handle - handle iterator
+ * @handler: input handler to iterate
+ * @data: data for the callback
+ * @fn: function to be called for each handle
+ *
+ * Iterate over @bus's list of devices, and call @fn for each, passing
+ * it @data and stop when @fn returns a non-zero value. The function is
+ * using RCU to traverse the list and therefore may be usind in atonic
+ * contexts. The @fn callback is invoked from RCU critical section and
+ * thus must not sleep.
+ */
+int input_handler_for_each_handle(struct input_handler *handler, void *data,
+				  int (*fn)(struct input_handle *, void *))
+{
+	struct input_handle *handle;
+	int retval = 0;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(handle, &handler->h_list, h_node) {
+		retval = fn(handle, data);
+		if (retval)
+			break;
+	}
+
+	rcu_read_unlock();
+
+	return retval;
+}
+EXPORT_SYMBOL(input_handler_for_each_handle);
+
 /**
  * input_register_handle - register a new input handle
  * @handle: handle to register
@@ -1683,7 +1715,7 @@ int input_register_handle(struct input_handle *handle)
 	 * we can't be racing with input_unregister_handle()
 	 * and so separate lock is not needed here.
 	 */
-	list_add_tail(&handle->h_node, &handler->h_list);
+	list_add_tail_rcu(&handle->h_node, &handler->h_list);
 
 	if (handler->start)
 		handler->start(handle);
@@ -1706,7 +1738,7 @@ void input_unregister_handle(struct input_handle *handle)
 {
 	struct input_dev *dev = handle->dev;
 
-	list_del_init(&handle->h_node);
+	list_del_rcu(&handle->h_node);
 
 	/*
 	 * Take dev->mutex to prevent race with input_release_device().
@@ -1714,6 +1746,7 @@ void input_unregister_handle(struct input_handle *handle)
 	mutex_lock(&dev->mutex);
 	list_del_rcu(&handle->d_node);
 	mutex_unlock(&dev->mutex);
+
 	synchronize_rcu();
 }
 EXPORT_SYMBOL(input_unregister_handle);
diff --git a/include/linux/input.h b/include/linux/input.h
index 56d8e048c646..db563bbac9dd 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1021,9 +1021,12 @@ struct ff_effect {
  * @keycodesize: size of elements in keycode table
  * @keycode: map of scancodes to keycodes for this device
  * @setkeycode: optional method to alter current keymap, used to implement
- *	sparse keymaps. If not supplied default mechanism will be used
+ *	sparse keymaps. If not supplied default mechanism will be used.
+ *	The method is being called while holding event_lock and thus must
+ *	not sleep
  * @getkeycode: optional method to retrieve current keymap. If not supplied
- *	default mechanism will be used
+ *	default mechanism will be used. The method is being called while
+ *	holding event_lock and thus must not sleep
  * @ff: force feedback structure associated with the device if device
  *	supports force feedback effects
  * @repeat_key: stores key code of the last key pressed; used to implement
@@ -1295,6 +1298,9 @@ void input_unregister_device(struct input_dev *);
 int __must_check input_register_handler(struct input_handler *);
 void input_unregister_handler(struct input_handler *);
 
+int input_handler_for_each_handle(struct input_handler *, void *data,
+				  int (*fn)(struct input_handle *, void *));
+
 int input_register_handle(struct input_handle *);
 void input_unregister_handle(struct input_handle *);
 
-- 
cgit v1.2.3


From 595dd3d8bf953254d8d2f30f99c54fe09c470040 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 1 Dec 2009 10:52:02 -0800
Subject: kmsg_dump: fix build for CONFIG_PRINTK=n

kmsg_dump() fails to build when CONFIG_PRINTK=n; provide stubs
for the kmsg_dump*() functions when CONFIG_PRINTK=n.

kernel/printk.c: In function 'kmsg_dump':
kernel/printk.c:1501: error: 'log_buf_len' undeclared (first use in this function)
kernel/printk.c:1502: error: 'logged_chars' undeclared (first use in this function)
kernel/printk.c:1506: error: 'log_buf' undeclared (first use in this function)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/kmsg_dump.h | 16 ++++++++++++++++
 kernel/printk.c           |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 7f089ec5ef32..e32aa268efac 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -35,10 +35,26 @@ struct kmsg_dumper {
 	int registered;
 };
 
+#ifdef CONFIG_PRINTK
 void kmsg_dump(enum kmsg_dump_reason reason);
 
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+#else
+static inline void kmsg_dump(enum kmsg_dump_reason reason)
+{
+}
+
+static inline int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+	return -EINVAL;
+}
+
+static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+	return -EINVAL;
+}
+#endif
 
 #endif /* _LINUX_KMSG_DUMP_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index 051d1f50648f..2a564570f822 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1405,7 +1405,6 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 	return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
-#endif
 
 static DEFINE_SPINLOCK(dump_list_lock);
 static LIST_HEAD(dump_list);
@@ -1524,3 +1523,4 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 		dumper->dump(dumper, reason, s1, l1, s2, l2);
 	spin_unlock_irqrestore(&dump_list_lock, flags);
 }
+#endif
-- 
cgit v1.2.3


From 8592e6486a177a02f048567cb928bc3a1f9a86c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 2 Dec 2009 12:56:46 +0900
Subject: sched: Revert 498657a478c60be092208422fefa9c7b248729c2

498657a478c60be092208422fefa9c7b248729c2 incorrectly assumed
that preempt wasn't disabled around context_switch() and thus
was fixing imaginary problem.  It also broke KVM because it
depended on ->sched_in() to be called with irq enabled so that
it can do smp calls from there.

Revert the incorrect commit and add comment describing different
contexts under with the two callbacks are invoked.

Avi: spotted transposed in/out in the added comment.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Avi Kivity <avi@redhat.com>
Cc: peterz@infradead.org
Cc: efault@gmx.de
Cc: rusty@rustcorp.com.au
LKML-Reference: <1259726212-30259-2-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/preempt.h | 5 +++++
 kernel/sched.c          | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 72b1a10a59b6..2e681d9555bd 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -105,6 +105,11 @@ struct preempt_notifier;
  * @sched_out: we've just been preempted
  *    notifier: struct preempt_notifier for the task being preempted
  *    next: the task that's kicking us out
+ *
+ * Please note that sched_in and out are called under different
+ * contexts.  sched_out is called with rq lock held and irq disabled
+ * while sched_in is called without rq lock and irq enabled.  This
+ * difference is intentional and depended upon by its users.
  */
 struct preempt_ops {
 	void (*sched_in)(struct preempt_notifier *notifier, int cpu);
diff --git a/kernel/sched.c b/kernel/sched.c
index b3d4e2be95aa..1031cae39c4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2768,9 +2768,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	perf_event_task_sched_in(current, cpu_of(rq));
-	fire_sched_in_preempt_notifiers(current);
 	finish_lock_switch(rq, prev);
 
+	fire_sched_in_preempt_notifiers(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
-- 
cgit v1.2.3


From 6b62fe019e39edfd1dbe3f224ecd0a87d9365223 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 2 Dec 2009 07:23:10 +0100
Subject: tracing/syscalls: Make syscall events print callbacks static

enter_syscall_print_##sname and exit_syscall_print_##sname don't
need to have a global scope. Make them static.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <1259734990-9034-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c2df3a593236..e79e2f3ccc51 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -134,7 +134,7 @@ struct perf_event_attr;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_enter_##sname;		\
-	struct trace_event enter_syscall_print_##sname = {		\
+	static struct trace_event enter_syscall_print_##sname = {	\
 		.trace                  = print_syscall_enter,		\
 	};								\
 	static struct ftrace_event_call __used				\
@@ -156,7 +156,7 @@ struct perf_event_attr;
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_exit_##sname;		\
-	struct trace_event exit_syscall_print_##sname = {		\
+	static struct trace_event exit_syscall_print_##sname = {	\
 		.trace                  = print_syscall_exit,		\
 	};								\
 	static struct ftrace_event_call __used				\
-- 
cgit v1.2.3


From fa1452e808732ae10e8b1267fd75fc2d028d634b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 30 Nov 2009 14:59:44 +0900
Subject: locking, task_struct: Reduce size on TRACE_IRQFLAGS and 64bit

Reorder task_struct field for TRACE_IRQFLAGS to remove padding
on 64-bit.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4B135F50.8070302@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..49be8f7c05f6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1421,17 +1421,17 @@ struct task_struct {
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	unsigned int irq_events;
-	int hardirqs_enabled;
 	unsigned long hardirq_enable_ip;
-	unsigned int hardirq_enable_event;
 	unsigned long hardirq_disable_ip;
+	unsigned int hardirq_enable_event;
 	unsigned int hardirq_disable_event;
-	int softirqs_enabled;
+	int hardirqs_enabled;
+	int hardirq_context;
 	unsigned long softirq_disable_ip;
-	unsigned int softirq_disable_event;
 	unsigned long softirq_enable_ip;
+	unsigned int softirq_disable_event;
 	unsigned int softirq_enable_event;
-	int hardirq_context;
+	int softirqs_enabled;
 	int softirq_context;
 #endif
 #ifdef CONFIG_LOCKDEP
-- 
cgit v1.2.3


From d99ca3b977fc5a93141304f571475c2af9e6c1c5 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 2 Dec 2009 17:26:47 +0900
Subject: sched, cputime: Cleanups related to task_times()

- Remove if({u,s}t)s because no one call it with NULL now.
- Use cputime_{add,sub}().
- Add ifndef-endif for prev_{u,s}time since they are used
  only when !VIRT_CPU_ACCOUNTING.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
LKML-Reference: <4B1624C7.7040302@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 ++
 kernel/fork.c         |  2 ++
 kernel/sched.c        | 16 ++++++----------
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0395b0f4df3a..dff85e58264e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1331,7 +1331,9 @@ struct task_struct {
 
 	cputime_t utime, stime, utimescaled, stimescaled;
 	cputime_t gtime;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	cputime_t prev_utime, prev_stime;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
 	struct timespec real_start_time;	/* boot based time */
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..ad7cb6d1193c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1066,8 +1066,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->gtime = cputime_zero;
 	p->utimescaled = cputime_zero;
 	p->stimescaled = cputime_zero;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_utime = cputime_zero;
 	p->prev_stime = cputime_zero;
+#endif
 
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 4883fee99314..17e2c1db2bde 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5184,10 +5184,8 @@ void account_idle_ticks(unsigned long ticks)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	if (ut)
-		*ut = p->utime;
-	if (st)
-		*st = p->stime;
+	*ut = p->utime;
+	*st = p->stime;
 }
 #else
 
@@ -5197,7 +5195,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, utime = p->utime, total = utime + p->stime;
+	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
 
 	/*
 	 * Use CFS's precise accounting:
@@ -5217,12 +5215,10 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 * Compare with previous values, to keep monotonicity:
 	 */
 	p->prev_utime = max(p->prev_utime, utime);
-	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+	p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
 
-	if (ut)
-		*ut = p->prev_utime;
-	if (st)
-		*st = p->prev_stime;
+	*ut = p->prev_utime;
+	*st = p->prev_stime;
 }
 #endif
 
-- 
cgit v1.2.3


From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 2 Dec 2009 17:28:07 +0900
Subject: sched, cputime: Introduce thread_group_times()

This is a real fix for problem of utime/stime values decreasing
described in the thread:

   http://lkml.org/lkml/2009/11/3/522

Now cputime is accounted in the following way:

 - {u,s}time in task_struct are increased every time when the thread
   is interrupted by a tick (timer interrupt).

 - When a thread exits, its {u,s}time are added to signal->{u,s}time,
   after adjusted by task_times().

 - When all threads in a thread_group exits, accumulated {u,s}time
   (and also c{u,s}time) in signal struct are added to c{u,s}time
   in signal struct of the group's parent.

So {u,s}time in task struct are "raw" tick count, while
{u,s}time and c{u,s}time in signal struct are "adjusted" values.

And accounted values are used by:

 - task_times(), to get cputime of a thread:
   This function returns adjusted values that originates from raw
   {u,s}time and scaled by sum_exec_runtime that accounted by CFS.

 - thread_group_cputime(), to get cputime of a thread group:
   This function returns sum of all {u,s}time of living threads in
   the group, plus {u,s}time in the signal struct that is sum of
   adjusted cputimes of all exited threads belonged to the group.

The problem is the return value of thread_group_cputime(),
because it is mixed sum of "raw" value and "adjusted" value:

  group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time)

This misbehavior can break {u,s}time monotonicity.
Assume that if there is a thread that have raw values greater
than adjusted values (e.g. interrupted by 1000Hz ticks 50 times
but only runs 45ms) and if it exits, cputime will decrease (e.g.
-5ms).

To fix this, we could do:

  group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time)

But task_times() contains hard divisions, so applying it for
every thread should be avoided.

This patch fixes the above problem in the following way:

 - Modify thread's exit (= __exit_signal()) not to use task_times().
   It means {u,s}time in signal struct accumulates raw values instead
   of adjusted values.  As the result it makes thread_group_cputime()
   to return pure sum of "raw" values.

 - Introduce a new function thread_group_times(*task, *utime, *stime)
   that converts "raw" values of thread_group_cputime() to "adjusted"
   values, in same calculation procedure as task_times().

 - Modify group's exit (= wait_task_zombie()) to use this introduced
   thread_group_times().  It make c{u,s}time in signal struct to
   have adjusted values like before this patch.

 - Replace some thread_group_cputime() by thread_group_times().
   This replacements are only applied where conveys the "adjusted"
   cputime to users, and where already uses task_times() near by it.
   (i.e. sys_times(), getrusage(), and /proc/<PID>/stat.)

This patch have a positive side effect:

 - Before this patch, if a group contains many short-life threads
   (e.g. runs 0.9ms and not interrupted by ticks), the group's
   cputime could be invisible since thread's cputime was accumulated
   after adjusted: imagine adjustment function as adj(ticks, runtime),
     {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0.
   After this patch it will not happen because the adjustment is
   applied after accumulated.

v2:
 - remove if()s, put new variables into signal_struct.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
LKML-Reference: <4B162517.8040909@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  5 +----
 include/linux/sched.h |  4 ++++
 kernel/exit.c         | 23 ++++++++++++-----------
 kernel/fork.c         |  3 +++
 kernel/sched.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/sys.c          | 18 ++++++++----------
 6 files changed, 69 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index ca61a88aed66..2571da43c736 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -506,7 +506,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
-			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
@@ -517,9 +516,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			thread_group_cputime(task, &cputime);
-			utime = cputime.utime;
-			stime = cputime.stime;
+			thread_group_times(task, &utime, &stime);
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dff85e58264e..34238bd10ebf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -624,6 +624,9 @@ struct signal_struct {
 	cputime_t utime, stime, cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+	cputime_t prev_utime, prev_stime;
+#endif
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
@@ -1723,6 +1726,7 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index 2eaf68b634e3..b221ad65fd20 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,8 +91,6 @@ static void __exit_signal(struct task_struct *tsk)
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
 	else {
-		cputime_t utime, stime;
-
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
@@ -112,9 +110,8 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		task_times(tsk, &utime, &stime);
-		sig->utime = cputime_add(sig->utime, utime);
-		sig->stime = cputime_add(sig->stime, stime);
+		sig->utime = cputime_add(sig->utime, tsk->utime);
+		sig->stime = cputime_add(sig->stime, tsk->stime);
 		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -1208,6 +1205,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		struct signal_struct *psig;
 		struct signal_struct *sig;
 		unsigned long maxrss;
+		cputime_t tgutime, tgstime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1223,20 +1221,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		 * need to protect the access to parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_times() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
+		thread_group_times(p, &tgutime, &tgstime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(tgutime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(tgstime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index ad7cb6d1193c..3d6f121bbe8a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+	sig->prev_utime = sig->prev_stime = cputime_zero;
+#endif
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 17e2c1db2bde..e6ba726941ae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5187,6 +5187,16 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*ut = p->utime;
 	*st = p->stime;
 }
+
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+
+	*ut = cputime.utime;
+	*st = cputime.stime;
+}
 #else
 
 #ifndef nsecs_to_cputime
@@ -5220,6 +5230,37 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*ut = p->prev_utime;
 	*st = p->prev_stime;
 }
+
+/*
+ * Must be called with siglock held.
+ */
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	struct signal_struct *sig = p->signal;
+	struct task_cputime cputime;
+	cputime_t rtime, utime, total;
+
+	thread_group_cputime(p, &cputime);
+
+	total = cputime_add(cputime.utime, cputime.stime);
+	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
+
+	if (total) {
+		u64 temp;
+
+		temp = (u64)(rtime * cputime.utime);
+		do_div(temp, total);
+		utime = (cputime_t)temp;
+	} else
+		utime = rtime;
+
+	sig->prev_utime = max(sig->prev_utime, utime);
+	sig->prev_stime = max(sig->prev_stime,
+			      cputime_sub(rtime, sig->prev_utime));
+
+	*ut = sig->prev_utime;
+	*st = sig->prev_stime;
+}
 #endif
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index bbdfce0d4347..9968c5fb55b9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -911,16 +911,15 @@ change_okay:
 
 void do_sys_times(struct tms *tms)
 {
-	struct task_cputime cputime;
-	cputime_t cutime, cstime;
+	cputime_t tgutime, tgstime, cutime, cstime;
 
-	thread_group_cputime(current, &cputime);
 	spin_lock_irq(&current->sighand->siglock);
+	thread_group_times(current, &tgutime, &tgstime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
 	spin_unlock_irq(&current->sighand->siglock);
-	tms->tms_utime = cputime_to_clock_t(cputime.utime);
-	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_utime = cputime_to_clock_t(tgutime);
+	tms->tms_stime = cputime_to_clock_t(tgstime);
 	tms->tms_cutime = cputime_to_clock_t(cutime);
 	tms->tms_cstime = cputime_to_clock_t(cstime);
 }
@@ -1338,8 +1337,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 {
 	struct task_struct *t;
 	unsigned long flags;
-	cputime_t utime, stime;
-	struct task_cputime cputime;
+	cputime_t tgutime, tgstime, utime, stime;
 	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
@@ -1372,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			thread_group_cputime(p, &cputime);
-			utime = cputime_add(utime, cputime.utime);
-			stime = cputime_add(stime, cputime.stime);
+			thread_group_times(p, &tgutime, &tgstime);
+			utime = cputime_add(utime, tgutime);
+			stime = cputime_add(stime, tgstime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
-- 
cgit v1.2.3


From c81c2d95449cd218c2022ce6014c52fef1eb1f66 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Dec 2009 16:49:02 +0000
Subject: skbuff: remove skb_dma_map/unmap

The two functions skb_dma_map/unmap are unsafe to use as they cause
problems when packets are cloned and sent to multiple devices while a HW
IOMMU is enabled.  Due to this it is best to remove the code so it is not
used by any other network driver maintainters.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  8 -------
 net/core/Makefile      |  1 -
 net/core/skb_dma_map.c | 65 --------------------------------------------------
 3 files changed, 74 deletions(-)
 delete mode 100644 net/core/skb_dma_map.c

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 89eed8cdd318..ae836fded530 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -416,14 +416,6 @@ struct sk_buff {
 
 #include <asm/system.h>
 
-#ifdef CONFIG_HAS_DMA
-#include <linux/dma-mapping.h>
-extern int skb_dma_map(struct device *dev, struct sk_buff *skb,
-		       enum dma_data_direction dir);
-extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
-			  enum dma_data_direction dir);
-#endif
-
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
 	return (struct dst_entry *)skb->_skb_dst;
diff --git a/net/core/Makefile b/net/core/Makefile
index 796f46eece5f..08791ac3e05a 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 	 gen_stats.o gen_estimator.o net_namespace.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-$(CONFIG_HAS_DMA) += skb_dma_map.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
deleted file mode 100644
index 79687dfd6957..000000000000
--- a/net/core/skb_dma_map.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* skb_dma_map.c: DMA mapping helpers for socket buffers.
- *
- * Copyright (C) David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-#include <linux/skbuff.h>
-
-int skb_dma_map(struct device *dev, struct sk_buff *skb,
-		enum dma_data_direction dir)
-{
-	struct skb_shared_info *sp = skb_shinfo(skb);
-	dma_addr_t map;
-	int i;
-
-	map = dma_map_single(dev, skb->data,
-			     skb_headlen(skb), dir);
-	if (dma_mapping_error(dev, map))
-		goto out_err;
-
-	sp->dma_head = map;
-	for (i = 0; i < sp->nr_frags; i++) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		map = dma_map_page(dev, fp->page, fp->page_offset,
-				   fp->size, dir);
-		if (dma_mapping_error(dev, map))
-			goto unwind;
-		sp->dma_maps[i] = map;
-	}
-
-	return 0;
-
-unwind:
-	while (--i >= 0) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		dma_unmap_page(dev, sp->dma_maps[i],
-			       fp->size, dir);
-	}
-	dma_unmap_single(dev, sp->dma_head,
-			 skb_headlen(skb), dir);
-out_err:
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(skb_dma_map);
-
-void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
-		   enum dma_data_direction dir)
-{
-	struct skb_shared_info *sp = skb_shinfo(skb);
-	int i;
-
-	dma_unmap_single(dev, sp->dma_head,
-			 skb_headlen(skb), dir);
-	for (i = 0; i < sp->nr_frags; i++) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		dma_unmap_page(dev, sp->dma_maps[i],
-			       fp->size, dir);
-	}
-}
-EXPORT_SYMBOL(skb_dma_unmap);
-- 
cgit v1.2.3


From 602da297e293eb2cbd28dcdbbe247593a46a853a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 2 Dec 2009 21:58:33 -0800
Subject: ide: Increase WAIT_DRQ to accomodate some CF cards and SSD drives.

Based upon a patch by Philippe De Muyter, and feedback from Mark
Lord and Robert Hancock.

As noted by Mark Lord, the outdated ATA1 spec specifies a 20msec
timeout for setting DRQ but lots of common devices overshoot this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ide.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index e4135d6e0556..0ec612959042 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -125,8 +125,8 @@ struct ide_io_ports {
  * Timeouts for various operations:
  */
 enum {
-	/* spec allows up to 20ms */
-	WAIT_DRQ	= HZ / 10,	/* 100ms */
+	/* spec allows up to 20ms, but CF cards and SSD drives need more */
+	WAIT_DRQ	= 1 * HZ,	/* 1s */
 	/* some laptops are very slow */
 	WAIT_READY	= 5 * HZ,	/* 5s */
 	/* should be less than 3ms (?), if all ATAPI CD is closed at boot */
-- 
cgit v1.2.3


From da5c78c82629a167794436e4306b4cf1faddea90 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:12:09 +0000
Subject: TCPCT part 1b: generate Responder Cookie secret

Define (missing) hash message size for SHA1.

Define hashing size constants specific to TCP cookies.

Add new function: tcp_cookie_generator().

Maintain global secret values for tcp_cookie_generator().

This is a significantly revised implementation of earlier (15-year-old)
Photuris [RFC-2522] code for the KA9Q cooperative multitasking platform.

Linux RCU technique appears to be well-suited to this application, though
neither of the circular queue items are freed.

These functions will also be used in subsequent patches that implement
additional features.

Signed-off-by: William.Allen.Simpson@gmail.com
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cryptohash.h |   1 +
 include/net/tcp.h          |   8 +++
 net/ipv4/tcp.c             | 140 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index c118b2ad9807..ec78a4bbe1d5 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -2,6 +2,7 @@
 #define __CRYPTOHASH_H
 
 #define SHA_DIGEST_WORDS 5
+#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
 #define SHA_WORKSPACE_WORDS 80
 
 void sha_init(__u32 *buf);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ec183fda05d0..4a99a8e39121 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1478,6 +1478,14 @@ struct tcp_request_sock_ops {
 #endif
 };
 
+/* Using SHA1 for now, define some constants.
+ */
+#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
+#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
+#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
+
+extern int tcp_cookie_generator(u32 *bakery);
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7d4648f8b3d3..ba03ac80435a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -264,6 +264,7 @@
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/crypto.h>
+#include <linux/time.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -2848,6 +2849,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
+/**
+ * Each Responder maintains up to two secret values concurrently for
+ * efficient secret rollover.  Each secret value has 4 states:
+ *
+ * Generating.  (tcp_secret_generating != tcp_secret_primary)
+ *    Generates new Responder-Cookies, but not yet used for primary
+ *    verification.  This is a short-term state, typically lasting only
+ *    one round trip time (RTT).
+ *
+ * Primary.  (tcp_secret_generating == tcp_secret_primary)
+ *    Used both for generation and primary verification.
+ *
+ * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
+ *    Used for verification, until the first failure that can be
+ *    verified by the newer Generating secret.  At that time, this
+ *    cookie's state is changed to Secondary, and the Generating
+ *    cookie's state is changed to Primary.  This is a short-term state,
+ *    typically lasting only one round trip time (RTT).
+ *
+ * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
+ *    Used for secondary verification, after primary verification
+ *    failures.  This state lasts no more than twice the Maximum Segment
+ *    Lifetime (2MSL).  Then, the secret is discarded.
+ */
+struct tcp_cookie_secret {
+	/* The secret is divided into two parts.  The digest part is the
+	 * equivalent of previously hashing a secret and saving the state,
+	 * and serves as an initialization vector (IV).  The message part
+	 * serves as the trailing secret.
+	 */
+	u32				secrets[COOKIE_WORKSPACE_WORDS];
+	unsigned long			expires;
+};
+
+#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
+#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
+#define TCP_SECRET_LIFE (HZ * 600)
+
+static struct tcp_cookie_secret tcp_secret_one;
+static struct tcp_cookie_secret tcp_secret_two;
+
+/* Essentially a circular list, without dynamic allocation. */
+static struct tcp_cookie_secret *tcp_secret_generating;
+static struct tcp_cookie_secret *tcp_secret_primary;
+static struct tcp_cookie_secret *tcp_secret_retiring;
+static struct tcp_cookie_secret *tcp_secret_secondary;
+
+static DEFINE_SPINLOCK(tcp_secret_locker);
+
+/* Select a pseudo-random word in the cookie workspace.
+ */
+static inline u32 tcp_cookie_work(const u32 *ws, const int n)
+{
+	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
+}
+
+/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
+ * Called in softirq context.
+ * Returns: 0 for success.
+ */
+int tcp_cookie_generator(u32 *bakery)
+{
+	unsigned long jiffy = jiffies;
+
+	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
+		spin_lock_bh(&tcp_secret_locker);
+		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
+			/* refreshed by another */
+			memcpy(bakery,
+			       &tcp_secret_generating->secrets[0],
+			       COOKIE_WORKSPACE_WORDS);
+		} else {
+			/* still needs refreshing */
+			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
+
+			/* The first time, paranoia assumes that the
+			 * randomization function isn't as strong.  But,
+			 * this secret initialization is delayed until
+			 * the last possible moment (packet arrival).
+			 * Although that time is observable, it is
+			 * unpredictably variable.  Mash in the most
+			 * volatile clock bits available, and expire the
+			 * secret extra quickly.
+			 */
+			if (unlikely(tcp_secret_primary->expires ==
+				     tcp_secret_secondary->expires)) {
+				struct timespec tv;
+
+				getnstimeofday(&tv);
+				bakery[COOKIE_DIGEST_WORDS+0] ^=
+					(u32)tv.tv_nsec;
+
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_1MSL
+					+ (0x0f & tcp_cookie_work(bakery, 0));
+			} else {
+				tcp_secret_secondary->expires = jiffy
+					+ TCP_SECRET_LIFE
+					+ (0xff & tcp_cookie_work(bakery, 1));
+				tcp_secret_primary->expires = jiffy
+					+ TCP_SECRET_2MSL
+					+ (0x1f & tcp_cookie_work(bakery, 2));
+			}
+			memcpy(&tcp_secret_secondary->secrets[0],
+			       bakery, COOKIE_WORKSPACE_WORDS);
+
+			rcu_assign_pointer(tcp_secret_generating,
+					   tcp_secret_secondary);
+			rcu_assign_pointer(tcp_secret_retiring,
+					   tcp_secret_primary);
+			/*
+			 * Neither call_rcu() nor synchronize_rcu() needed.
+			 * Retiring data is not freed.  It is replaced after
+			 * further (locked) pointer updates, and a quiet time
+			 * (minimum 1MSL, maximum LIFE - 2MSL).
+			 */
+		}
+		spin_unlock_bh(&tcp_secret_locker);
+	} else {
+		rcu_read_lock_bh();
+		memcpy(bakery,
+		       &rcu_dereference(tcp_secret_generating)->secrets[0],
+		       COOKIE_WORKSPACE_WORDS);
+		rcu_read_unlock_bh();
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcp_cookie_generator);
+
 void tcp_done(struct sock *sk)
 {
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
@@ -2882,6 +3012,7 @@ void __init tcp_init(void)
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
 	int order, i, max_share;
+	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -2975,6 +3106,15 @@ void __init tcp_init(void)
 	       tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
+
+	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
+	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
+	tcp_secret_one.expires = jiffy; /* past due */
+	tcp_secret_two.expires = jiffy; /* past due */
+	tcp_secret_generating = &tcp_secret_one;
+	tcp_secret_primary = &tcp_secret_one;
+	tcp_secret_retiring = &tcp_secret_two;
+	tcp_secret_secondary = &tcp_secret_two;
 }
 
 EXPORT_SYMBOL(tcp_close);
-- 
cgit v1.2.3


From 519855c508b9a17878c0977a3cdefc09b59b30df Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:14:19 +0000
Subject: TCPCT part 1c: sysctl_tcp_cookie_size, socket option
 TCP_COOKIE_TRANSACTIONS

Define sysctl (tcp_cookie_size) to turn on and off the cookie option
default globally, instead of a compiled configuration option.

Define per socket option (TCP_COOKIE_TRANSACTIONS) for setting constant
data values, retrieving variable cookie values, and other facilities.

Move inline tcp_clear_options() unchanged from net/tcp.h to linux/tcp.h,
near its corresponding struct tcp_options_received (prior to changes).

This is a straightforward re-implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

These functions will also be used in subsequent patches that implement
additional features.

Requires:
   net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED

Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  8 ++++++++
 include/linux/tcp.h                    | 33 ++++++++++++++++++++++++++++++++-
 include/net/tcp.h                      |  6 +-----
 net/ipv4/sysctl_net_ipv4.c             |  8 ++++++++
 net/ipv4/tcp_output.c                  |  3 +++
 5 files changed, 52 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 554440af675c..989f5538b8dd 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -164,6 +164,14 @@ tcp_congestion_control - STRING
 	additional choices may be available based on kernel configuration.
 	Default is set as part of kernel configuration.
 
+tcp_cookie_size - INTEGER
+	Default size of TCP Cookie Transactions (TCPCT) option, that may be
+	overridden on a per socket basis by the TCPCT socket option.
+	Values greater than the maximum (16) are interpreted as the maximum.
+	Values greater than zero and less than the minimum (8) are interpreted
+	as the minimum.  Odd values are interpreted as the next even value.
+	Default: 0 (off).
+
 tcp_dsack - BOOLEAN
 	Allows TCP to send "duplicate" SACKs.
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 32d7d77b4a01..eaa3113b3786 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -102,7 +102,9 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 
+/* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
 #define TCPI_OPT_WSCALE		4
@@ -174,6 +176,30 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */
+#define TCP_COOKIE_MIN		 8		/*  64-bits */
+#define TCP_COOKIE_MAX		16		/* 128-bits */
+#define TCP_COOKIE_PAIR_SIZE	(2*TCP_COOKIE_MAX)
+
+/* Flags for both getsockopt and setsockopt */
+#define TCP_COOKIE_IN_ALWAYS	(1 << 0)	/* Discard SYN without cookie */
+#define TCP_COOKIE_OUT_NEVER	(1 << 1)	/* Prohibit outgoing cookies,
+						 * supercedes everything. */
+
+/* Flags for getsockopt */
+#define TCP_S_DATA_IN		(1 << 2)	/* Was data received? */
+#define TCP_S_DATA_OUT		(1 << 3)	/* Was data sent? */
+
+/* TCP_COOKIE_TRANSACTIONS data */
+struct tcp_cookie_transactions {
+	__u16	tcpct_flags;			/* see above */
+	__u8	__tcpct_pad1;			/* zero */
+	__u8	tcpct_cookie_desired;		/* bytes */
+	__u16	tcpct_s_data_desired;		/* bytes of variable data */
+	__u16	tcpct_used;			/* bytes in value */
+	__u8	tcpct_value[TCP_MSS_DEFAULT];
+};
+
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
@@ -227,6 +253,11 @@ struct tcp_options_received {
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+{
+	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+}
+
 /* This is the max number of SACKS that we'll generate and process. It's safe
  * to increse this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -435,6 +466,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 	return (struct tcp_timewait_sock *)sk;
 }
 
-#endif
+#endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4a99a8e39121..738b65f01e26 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -234,6 +234,7 @@ extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_cookie_size;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -340,11 +341,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
 
 extern void tcp_enter_quickack_mode(struct sock *sk);
 
-static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
-{
- 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
-}
-
 #define	TCP_ECN_OK		1
 #define	TCP_ECN_QUEUE_CWR	2
 #define	TCP_ECN_DEMAND_CWR	4
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c00323bae044..13f7ab6ad6a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -712,6 +712,14 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_cookie_size",
+		.data		= &sysctl_tcp_cookie_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "udp_mem",
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b8b25049f257..307f318fe931 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,9 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
+
+
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 435cf559f02ea3a3159eb316f97dc88bdebe9432 Mon Sep 17 00:00:00 2001
From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Wed, 2 Dec 2009 18:17:05 +0000
Subject: TCPCT part 1d: define TCP cookie option, extend existing struct's

Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.

This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):

    http://thread.gmane.org/gmane.linux.network/102586

The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.

    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html

    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail

These functions will also be used in subsequent patches that implement
additional features.

Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS

Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 29 +++++++++++++----
 include/net/tcp.h        | 83 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c      | 20 ++++++++++++
 net/ipv4/tcp_minisocks.c | 46 ++++++++++++++++++++++-----
 net/ipv6/tcp_ipv6.c      | 13 ++++++++
 5 files changed, 177 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eaa3113b3786..7fee8a4df931 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -247,31 +247,38 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-/*	SACKs data	*/
+	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
+		cookie_out_never:1,
+		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
-	u16	user_mss;  	/* mss requested by user in ioctl */
+	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
-	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
- * to increse this, although since:
+ * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +448,12 @@ struct tcp_sock {
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
+
+	/* When the cookie options are generated and exchanged, then this
+	 * object holds a reference to them (cookie_values->kref).  Also
+	 * contains related tcp_cookie_transactions fields.
+	 */
+	struct tcp_cookie_values  *cookie_values;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +472,10 @@ struct tcp_timewait_sock {
 	u16			  tw_md5_keylen;
 	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
 #endif
+	/* Few sockets in timewait have cookies; in that case, then this
+	 * object holds a reference to them (tw_cookie_values->kref).
+	 */
+	struct tcp_cookie_values  *tw_cookie_values;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 738b65f01e26..f9abd9becabd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 
 /*
  *     TCP option lengths
@@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_COOKIE_BASE    2	/* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR    3	/* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
+#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -1482,6 +1488,83 @@ struct tcp_request_sock_ops {
 
 extern int tcp_cookie_generator(u32 *bakery);
 
+/**
+ *	struct tcp_cookie_values - each socket needs extra space for the
+ *	cookies, together with (optional) space for any SYN data.
+ *
+ *	A tcp_sock contains a pointer to the current value, and this is
+ *	cloned to the tcp_timewait_sock.
+ *
+ * @cookie_pair:	variable data from the option exchange.
+ *
+ * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
+ *			indicates default (sysctl_tcp_cookie_size).
+ *			After cookie sent, remembers size of cookie.
+ *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
+ *
+ * @s_data_desired:	user specified tcpct_s_data_desired.  When the
+ *			constant payload is specified (@s_data_constant),
+ *			holds its length instead.
+ *			Range 0 to TCP_MSS_DESIRED.
+ *
+ * @s_data_payload:	constant data that is to be included in the
+ *			payload of SYN or SYNACK segments when the
+ *			cookie option is present.
+ */
+struct tcp_cookie_values {
+	struct kref	kref;
+	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
+	u8		cookie_pair_size;
+	u8		cookie_desired;
+	u16		s_data_desired:11,
+			s_data_constant:1,
+			s_data_in:1,
+			s_data_out:1,
+			s_data_unused:2;
+	u8		s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data.  Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
+		? tp->cookie_values->s_data_desired
+		: 0;
+}
+
+/**
+ *	struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
+ *
+ *	As tcp_request_sock has already been extended in other places, the
+ *	only remaining method is to pass stack values along as function
+ *	parameters.  These parameters are not needed after sending SYNACK.
+ *
+ * @cookie_bakery:	cryptographic secret and message workspace.
+ *
+ * @cookie_plus:	bytes in authenticator/cookie option, copied from
+ *			struct tcp_options_received (above).
+ */
+struct tcp_extend_values {
+	struct request_values		rv;
+	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
+	u8				cookie_plus:6,
+					cookie_out_never:1,
+					cookie_in_always:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
+{
+	return (struct tcp_extend_values *)rvp;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 649a36d99c73..a2bcac9b388e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1833,6 +1833,19 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1886,6 +1899,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/* TCP Cookie Transactions */
+	if (tp->cookie_values != NULL) {
+		kref_put(&tp->cookie_values->kref,
+			 tcp_cookie_values_release);
+		tp->cookie_values = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d3f6bbfc76f0..96852af43ca7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -383,14 +383,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp;
+		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
+
+		/* TCP Cookie Transactions require space for the cookie pair,
+		 * as it differs for each connection.  There is no need to
+		 * copy any s_data_payload stored at the original socket.
+		 * Failure will prevent resuming the connection.
+		 *
+		 * Presumed copied, in order of appearance:
+		 *	cookie_in_always, cookie_out_never
+		 */
+		if (oldcvp != NULL) {
+			struct tcp_cookie_values *newcvp =
+				kzalloc(sizeof(*newtp->cookie_values),
+					GFP_ATOMIC);
+
+			if (newcvp != NULL) {
+				kref_init(&newcvp->kref);
+				newcvp->cookie_desired =
+						oldcvp->cookie_desired;
+				newtp->cookie_values = newcvp;
+			} else {
+				/* Not Yet Implemented */
+				newtp->cookie_values = NULL;
+			}
+		}
 
 		/* Now setup tcp_sock */
-		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
-		newtp->snd_up = treq->snt_isn + 1;
+
+		newtp->rcv_wup = newtp->copied_seq =
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+
+		newtp->snd_sml = newtp->snd_una =
+		newtp->snd_nxt = newtp->snd_up =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		tcp_prequeue_init(newtp);
 
@@ -423,8 +452,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
-		newtp->pushed_seq = newtp->write_seq;
+		newtp->write_seq = newtp->pushed_seq =
+			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -590,7 +619,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq !=
+	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index da6e24416d75..f2ec38289a4a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1864,6 +1864,19 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+	/* TCP Cookie Transactions */
+	if (sysctl_tcp_cookie_size > 0) {
+		/* Default, cookies without s_data_payload. */
+		tp->cookie_values =
+			kzalloc(sizeof(*tp->cookie_values),
+				sk->sk_allocation);
+		if (tp->cookie_values != NULL)
+			kref_init(&tp->cookie_values->kref);
+	}
+	/* Presumed zeroed, in order of appearance:
+	 *	cookie_in_always, cookie_out_never,
+	 *	s_data_constant, s_data_in, s_data_out
+	 */
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-- 
cgit v1.2.3


From 7cff7ce94a7df2ccf5ac76b48ee0995fee2060df Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 9 Oct 2009 00:01:39 -0700
Subject: include/linux/compiler-gcc4.h: Fix build bug - gcc-4.0.2 doesn't
 understand __builtin_object_size

Maybe 4.1.0 doesn't too, but this fixed it for me.

Caused by:

 4a31276: x86: Turn the copy_from_user check into an (optional) compile time warning
 9f0cf4a: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user()

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <200910090724.n997OQl6013538@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler-gcc4.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 77542c57e20a..e6ef279ca20c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -38,7 +38,9 @@
 
 #endif
 
+#if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#endif
 #if __GNUC_MINOR__ >= 4
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
-- 
cgit v1.2.3


From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:19 +0300
Subject: KVM: Move irq sharing information to irqchip level

This removes assumptions that max GSIs is smaller than number of pins.
Sharing is tracked on pin level not GSI level.

[avi: no PIC on ia64]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/irq.h              |  1 +
 include/linux/kvm_host.h        |  2 +-
 virt/kvm/ioapic.h               |  1 +
 virt/kvm/irq_comm.c             | 59 +++++++++++++++++++++++++----------------
 5 files changed, 39 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b113f2b58cf..35d3236c9de4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,7 +410,6 @@ struct kvm_arch{
 	gpa_t ept_identity_map_addr;
 
 	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 	u64 vm_init_tsc;
 };
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7d6058a2fd38..c025a2362aae 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
 	int output;		/* intr from master PIC */
 	struct kvm_io_device dev;
 	void (*ack_notifier)(void *opaque, int irq);
+	unsigned long irq_states[16];
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7bbb5ddd7ae..1c7f8c49e4e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
-		    struct kvm *kvm, int level);
+		   struct kvm *kvm, int irq_source_id, int level);
 	union {
 		struct {
 			unsigned irqchip;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b713c160..6e461ade6365 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -41,6 +41,7 @@ struct kvm_ioapic {
 	u32 irr;
 	u32 pad;
 	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+	unsigned long irq_states[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 001663ff401a..9783f5c43dae 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,20 +31,39 @@
 
 #include "ioapic.h"
 
+static inline int kvm_irq_line_state(unsigned long *irq_state,
+				     int irq_source_id, int level)
+{
+	/* Logical OR for level trig interrupt */
+	if (level)
+		set_bit(irq_source_id, irq_state);
+	else
+		clear_bit(irq_source_id, irq_state);
+
+	return !!(*irq_state);
+}
+
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			   struct kvm *kvm, int level)
+			   struct kvm *kvm, int irq_source_id, int level)
 {
 #ifdef CONFIG_X86
-	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+	struct kvm_pic *pic = pic_irqchip(kvm);
+	level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
+				   irq_source_id, level);
+	return kvm_pic_set_irq(pic, e->irqchip.pin, level);
 #else
 	return -1;
 #endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			      struct kvm *kvm, int level)
+			      struct kvm *kvm, int irq_source_id, int level)
 {
-	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
+				   irq_source_id, level);
+
+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
 }
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		       struct kvm *kvm, int level)
+		       struct kvm *kvm, int irq_source_id, int level)
 {
 	struct kvm_lapic_irq irq;
 
+	if (!level)
+		return -1;
+
 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
 	irq.dest_id = (e->msi.address_lo &
@@ -125,34 +147,19 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
-	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
 	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
 
-	if (irq < KVM_IOAPIC_NUM_PINS) {
-		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
-
-		/* Logical OR for level trig interrupt */
-		if (level)
-			set_bit(irq_source_id, irq_state);
-		else
-			clear_bit(irq_source_id, irq_state);
-		sig_level = !!(*irq_state);
-	} else if (!level)
-		return ret;
-	else /* Deal with MSI/MSI-X */
-		sig_level = 1;
-
 	/* Not possible to detect if the guest uses the PIC or the
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
 	list_for_each_entry(e, &kvm->irq_routing, link)
 		if (e->gsi == irq) {
-			int r = e->set(e, kvm, sig_level);
+			int r = e->set(e, kvm, irq_source_id, level);
 			if (r < 0)
 				continue;
 
@@ -232,8 +239,14 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
 		return;
 	}
-	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
-		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
+		clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
+		if (i >= 16)
+			continue;
+#ifdef CONFIG_X86
+		clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
+#endif
+	}
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
 	mutex_unlock(&kvm->irq_lock);
 }
-- 
cgit v1.2.3


From 46e624b95c36d729bdf24010fff11d16f6fe94fa Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:20 +0300
Subject: KVM: Change irq routing table to use gsi indexed array

Use gsi indexed array instead of scanning all entries on each interrupt
injection.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 21 ++++++++++--
 virt/kvm/irq_comm.c      | 88 +++++++++++++++++++++++++++++-------------------
 virt/kvm/kvm_main.c      |  1 -
 3 files changed, 71 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c7f8c49e4e8..f403e66557fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -128,7 +128,17 @@ struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		struct msi_msg msi;
 	};
-	struct list_head link;
+	struct hlist_node link;
+};
+
+struct kvm_irq_routing_table {
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
 };
 
 struct kvm {
@@ -166,7 +176,7 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
 #endif
 
@@ -390,7 +400,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask);
+#endif
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9783f5c43dae..81950f6f6fd9 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -144,10 +144,12 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
  *  = 0   Interrupt was coalesced (previous irq is still pending)
  *  > 0   Number of CPUs interrupt was delivered to
  */
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -1;
+	struct kvm_irq_routing_table *irq_rt;
+	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -157,8 +159,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->gsi == irq) {
+	irq_rt = kvm->irq_routing;
+	if (irq < irq_rt->nr_rt_entries)
+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
 			int r = e->set(e, kvm, irq_source_id, level);
 			if (r < 0)
 				continue;
@@ -170,20 +173,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
 	unsigned gsi = pin;
+	int i;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	list_for_each_entry(e, &kvm->irq_routing, link)
+	for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) {
+		struct kvm_kernel_irq_routing_entry *e;
+		e = &kvm->irq_routing->rt_entries[i];
 		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
 		    e->irqchip.irqchip == irqchip &&
 		    e->irqchip.pin == pin) {
 			gsi = e->gsi;
 			break;
 		}
+	}
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -280,26 +286,30 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
-	struct kvm_kernel_irq_routing_entry *e, *n;
-
-	list_for_each_entry_safe(e, n, irq_routing, link)
-		kfree(e);
-}
-
 void kvm_free_irq_routing(struct kvm *kvm)
 {
 	mutex_lock(&kvm->irq_lock);
-	__kvm_free_irq_routing(&kvm->irq_routing);
+	kfree(kvm->irq_routing);
 	mutex_unlock(&kvm->irq_lock);
 }
 
-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+			       struct kvm_kernel_irq_routing_entry *e,
 			       const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
 	int delta;
+	struct kvm_kernel_irq_routing_entry *ei;
+	struct hlist_node *n;
+
+	/*
+	 * Do not allow GSI to be mapped to the same irqchip more than once.
+	 * Allow only one to one mapping between GSI and MSI.
+	 */
+	hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
+		if (ei->type == KVM_IRQ_ROUTING_MSI ||
+		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
+			return r;
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
@@ -332,6 +342,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	default:
 		goto out;
 	}
+
+	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
 out:
 	return r;
@@ -343,43 +355,49 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned nr,
 			unsigned flags)
 {
-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
-	struct list_head tmp = LIST_HEAD_INIT(tmp);
-	struct kvm_kernel_irq_routing_entry *e = NULL;
-	unsigned i;
+	struct kvm_irq_routing_table *new, *old;
+	u32 i, nr_rt_entries = 0;
 	int r;
 
+	for (i = 0; i < nr; ++i) {
+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+			return -EINVAL;
+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
+	}
+
+	nr_rt_entries += 1;
+
+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
+		      GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	new->rt_entries = (void *)&new->map[nr_rt_entries];
+
+	new->nr_rt_entries = nr_rt_entries;
+
 	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
-			goto out;
 		if (ue->flags)
 			goto out;
-		r = -ENOMEM;
-		e = kzalloc(sizeof(*e), GFP_KERNEL);
-		if (!e)
-			goto out;
-		r = setup_routing_entry(e, ue);
+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
 		if (r)
 			goto out;
 		++ue;
-		list_add(&e->link, &irq_list);
-		e = NULL;
 	}
 
 	mutex_lock(&kvm->irq_lock);
-	list_splice(&kvm->irq_routing, &tmp);
-	INIT_LIST_HEAD(&kvm->irq_routing);
-	list_splice(&irq_list, &kvm->irq_routing);
-	INIT_LIST_HEAD(&irq_list);
-	list_splice(&tmp, &irq_list);
+	old = kvm->irq_routing;
+	kvm->irq_routing = new;
 	mutex_unlock(&kvm->irq_lock);
 
+	new = old;
 	r = 0;
 
 out:
-	kfree(e);
-	__kvm_free_irq_routing(&irq_list);
+	kfree(new);
 	return r;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 22b520b54411..3bee94892774 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -957,7 +957,6 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 #endif
 
-- 
cgit v1.2.3


From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:21 +0300
Subject: KVM: Maintain back mapping from irqchip/pin to gsi

Maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

[avi: build fix on non-x86/ia64]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h |  1 +
 arch/x86/include/asm/kvm.h  |  1 +
 include/linux/kvm_host.h    |  9 +++++++++
 virt/kvm/irq_comm.c         | 31 ++++++++++++++-----------------
 4 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 18a7e49abbc5..bc90c75adf67 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE    1
 #define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
 
 #define KVM_CONTEXT_SIZE	8*1024
 
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4a5fe914dc59..f02e87a5206f 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -79,6 +79,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE    1
 #define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f403e66557fb..cc2d7493598b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -131,7 +131,10 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
+#ifdef __KVM_HAVE_IOAPIC
+
 struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
 	struct kvm_kernel_irq_routing_entry *rt_entries;
 	u32 nr_rt_entries;
 	/*
@@ -141,6 +144,12 @@ struct kvm_irq_routing_table {
 	struct hlist_head map[0];
 };
 
+#else
+
+struct kvm_irq_routing_table {};
+
+#endif
+
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 81950f6f6fd9..59cf8dae0062 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
-	unsigned gsi = pin;
-	int i;
+	int gsi;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) {
-		struct kvm_kernel_irq_routing_entry *e;
-		e = &kvm->irq_routing->rt_entries[i];
-		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
-		    e->irqchip.irqchip == irqchip &&
-		    e->irqchip.pin == pin) {
-			gsi = e->gsi;
-			break;
-		}
-	}
-
-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
-		if (kian->gsi == gsi)
-			kian->irq_acked(kian);
+	gsi = kvm->irq_routing->chip[irqchip][pin];
+	if (gsi != -1)
+		hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list,
+				     link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -332,6 +323,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 		}
 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+			goto out;
+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
@@ -356,7 +350,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned flags)
 {
 	struct kvm_irq_routing_table *new, *old;
-	u32 i, nr_rt_entries = 0;
+	u32 i, j, nr_rt_entries = 0;
 	int r;
 
 	for (i = 0; i < nr; ++i) {
@@ -377,6 +371,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	new->rt_entries = (void *)&new->map[nr_rt_entries];
 
 	new->nr_rt_entries = nr_rt_entries;
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
+			new->chip[i][j] = -1;
 
 	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-- 
cgit v1.2.3


From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 24 Aug 2009 11:54:23 +0300
Subject: KVM: Move irq ack notifier list to arch independent code

Mask irq notifier list is already there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h | 1 -
 arch/x86/include/asm/kvm_host.h  | 1 -
 include/linux/kvm_host.h         | 1 +
 virt/kvm/irq_comm.c              | 5 ++---
 virt/kvm/kvm_main.c              | 1 +
 5 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index d9b6325a9328..a362e67e0ca6 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	int iommu_flags;
-	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35d3236c9de4..a46e2dd9aca8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -397,7 +397,6 @@ struct kvm_arch{
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
-	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
 	unsigned int tss_addr;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cc2d7493598b..4aa5e1d9a797 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -187,6 +187,7 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
+	struct hlist_head irq_ack_notifier_list;
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index fb861dd956fc..f01972595938 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	rcu_read_unlock();
 
 	if (gsi != -1)
-		hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list,
-				     link)
+		hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
 }
@@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3bee94892774..6eca153e1a02 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -958,6 +958,7 @@ static struct kvm *kvm_create_vm(void)
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-- 
cgit v1.2.3


From bfd99ff5d483b11c32bca49fbff7a5ac59038b0a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 26 Aug 2009 14:57:50 +0300
Subject: KVM: Move assigned device code to own file

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/Makefile   |   2 +-
 arch/x86/kvm/Makefile    |   3 +-
 include/linux/kvm_host.h |  17 +
 virt/kvm/assigned-dev.c  | 818 +++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 798 +--------------------------------------------
 5 files changed, 840 insertions(+), 798 deletions(-)
 create mode 100644 virt/kvm/assigned-dev.c

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 0bb99b732908..1089b3e918ac 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o)
+		coalesced_mmio.o irq_comm.o assigned-dev.o)
 
 ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e7fe78d0f74..31a7035c4bd9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o eventfd.o)
+				coalesced_mmio.o irq_comm.o eventfd.o \
+				assigned-dev.o)
 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4aa5e1d9a797..c0a1cc35f080 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -577,4 +577,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
 }
 #endif
+
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+#else
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
 #endif
+
+#endif
+
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
new file mode 100644
index 000000000000..fd9c097b760a
--- /dev/null
+++ b/virt/kvm/assigned-dev.c
@@ -0,0 +1,818 @@
+/*
+ * Kernel-based Virtual Machine - device assignment support
+ *
+ * Copyright (C) 2006-9 Red Hat, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include "irq.h"
+
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+						      int assigned_dev_id)
+{
+	struct list_head *ptr;
+	struct kvm_assigned_dev_kernel *match;
+
+	list_for_each(ptr, head) {
+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+		if (match->assigned_dev_id == assigned_dev_id)
+			return match;
+	}
+	return NULL;
+}
+
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+
+	return index;
+}
+
+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	int i;
+
+	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
+				    interrupt_work);
+	kvm = assigned_dev->kvm;
+
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		struct kvm_guest_msix_entry *guest_entries =
+			assigned_dev->guest_msix_entries;
+		for (i = 0; i < assigned_dev->entries_nr; i++) {
+			if (!(guest_entries[i].flags &
+					KVM_ASSIGNED_MSIX_PENDING))
+				continue;
+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id,
+				    guest_entries[i].vector, 1);
+		}
+	} else
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
+}
+
+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
+{
+	unsigned long flags;
+	struct kvm_assigned_dev_kernel *assigned_dev =
+		(struct kvm_assigned_dev_kernel *) dev_id;
+
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int index = find_index_from_host_irq(assigned_dev, irq);
+		if (index < 0)
+			goto out;
+		assigned_dev->guest_msix_entries[index].flags |=
+			KVM_ASSIGNED_MSIX_PENDING;
+	}
+
+	schedule_work(&assigned_dev->interrupt_work);
+
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
+		disable_irq_nosync(irq);
+		assigned_dev->host_irq_disabled = true;
+	}
+
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
+	return IRQ_HANDLED;
+}
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
+
+	if (kian->gsi == -1)
+		return;
+
+	dev = container_of(kian, struct kvm_assigned_dev_kernel,
+			   ack_notifier);
+
+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
+
+	/* The guest irq may be shared so this ack may be
+	 * from another device.
+	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
+	if (dev->host_irq_disabled) {
+		enable_irq(dev->host_irq);
+		dev->host_irq_disabled = false;
+	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
+}
+
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+	assigned_dev->ack_notifier.gsi = -1;
+
+	if (assigned_dev->irq_source_id != -1)
+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
+
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	/*
+	 * In kvm_free_device_irq, cancel_work_sync return true if:
+	 * 1. work is scheduled, and then cancelled.
+	 * 2. work callback is executed.
+	 *
+	 * The first one ensured that the irq is disabled and no more events
+	 * would happen. But for the second one, the irq may be enabled (e.g.
+	 * for MSI). So we disable irq here to prevent further events.
+	 *
+	 * Notice this maybe result in nested disable if the interrupt type is
+	 * INTx, but it's OK for we are going to free it.
+	 *
+	 * If this function is a part of VM destroy, please ensure that till
+	 * now, the kvm state is still legal for probably we also have to wait
+	 * interrupt_work done.
+	 */
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq_nosync(assigned_dev->
+					   host_msix_entries[i].vector);
+
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 (void *)assigned_dev);
+
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		disable_irq_nosync(assigned_dev->host_irq);
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
+
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
+}
+
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	kvm_free_assigned_irq(kvm, assigned_dev);
+
+	pci_reset_function(assigned_dev->dev);
+
+	pci_release_regions(assigned_dev->dev);
+	pci_disable_device(assigned_dev->dev);
+	pci_dev_put(assigned_dev->dev);
+
+	list_del(&assigned_dev->list);
+	kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+	struct list_head *ptr, *ptr2;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+		assigned_dev = list_entry(ptr,
+					  struct kvm_assigned_dev_kernel,
+					  list);
+
+		kvm_free_assigned_device(kvm, assigned_dev);
+	}
+}
+
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	dev->host_irq = dev->dev->irq;
+	/* Even though this is PCI, we don't want to use shared
+	 * interrupts. Sharing host devices with guest-assigned devices
+	 * on the same interrupt line is not a happy situation: there
+	 * are going to be long delays in accepting, acking, etc.
+	 */
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
+			0, "kvm_assigned_intx_device", (void *)dev))
+		return -EIO;
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
+{
+	int r;
+
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
+	}
+
+	dev->host_irq = dev->dev->irq;
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
+			"kvm_assigned_msi_device", (void *)dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	int i, r = -EINVAL;
+
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
+
+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
+
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_irq(dev->host_msix_entries[i].vector,
+				kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msix_device",
+				(void *)dev);
+		/* FIXME: free requested_irq's on failure */
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+#endif
+
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	dev->host_irq_disabled = false;
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	dev->host_irq_disabled = false;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
+
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
+
+	return r;
+}
+
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
+
+	return r;
+}
+
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq *assigned_irq)
+{
+	int r = -EINVAL;
+	struct kvm_assigned_dev_kernel *match;
+	unsigned long host_irq_type, guest_irq_type;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
+
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
+
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
+
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+				      struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+	struct pci_dev *dev;
+
+	down_read(&kvm->slots_lock);
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (match) {
+		/* device already assigned */
+		r = -EEXIST;
+		goto out;
+	}
+
+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+	if (match == NULL) {
+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
+		       __func__);
+		r = -ENOMEM;
+		goto out;
+	}
+	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+				   assigned_dev->devfn);
+	if (!dev) {
+		printk(KERN_INFO "%s: host device not found\n", __func__);
+		r = -EINVAL;
+		goto out_free;
+	}
+	if (pci_enable_device(dev)) {
+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+		r = -EBUSY;
+		goto out_put;
+	}
+	r = pci_request_regions(dev, "kvm_assigned_device");
+	if (r) {
+		printk(KERN_INFO "%s: Could not get access to device regions\n",
+		       __func__);
+		goto out_disable;
+	}
+
+	pci_reset_function(dev);
+
+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_busnr = assigned_dev->busnr;
+	match->host_devfn = assigned_dev->devfn;
+	match->flags = assigned_dev->flags;
+	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
+	match->irq_source_id = -1;
+	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
+
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		if (!kvm->arch.iommu_domain) {
+			r = kvm_iommu_map_guest(kvm);
+			if (r)
+				goto out_list_del;
+		}
+		r = kvm_assign_device(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+	up_read(&kvm->slots_lock);
+	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
+out_disable:
+	pci_disable_device(dev);
+out_put:
+	pci_dev_put(dev);
+out_free:
+	kfree(match);
+	mutex_unlock(&kvm->lock);
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		printk(KERN_INFO "%s: device hasn't been assigned before, "
+		  "so cannot be deassigned\n", __func__);
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+		kvm_deassign_device(kvm, match);
+
+	kvm_free_assigned_device(kvm, match);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries = kzalloc(
+				sizeof(struct kvm_guest_msix_entry) *
+				entry_nr->entry_nr, GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int r = -ENOTTY;
+
+	switch (ioctl) {
+	case KVM_ASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+	case KVM_ASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+	case KVM_DEASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
+#ifdef KVM_CAP_IRQ_ROUTING
+	case KVM_SET_GSI_ROUTING: {
+		struct kvm_irq_routing routing;
+		struct kvm_irq_routing __user *urouting;
+		struct kvm_irq_routing_entry *entries;
+
+		r = -EFAULT;
+		if (copy_from_user(&routing, argp, sizeof(routing)))
+			goto out;
+		r = -EINVAL;
+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+			goto out;
+		if (routing.flags)
+			goto out;
+		r = -ENOMEM;
+		entries = vmalloc(routing.nr * sizeof(*entries));
+		if (!entries)
+			goto out;
+		r = -EFAULT;
+		urouting = argp;
+		if (copy_from_user(entries, urouting->entries,
+				   routing.nr * sizeof(*entries)))
+			goto out_free_irq_routing;
+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
+					routing.flags);
+	out_free_irq_routing:
+		vfree(entries);
+		break;
+	}
+#endif /* KVM_CAP_IRQ_ROUTING */
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
+	}
+out:
+	return r;
+}
+
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c12c95b1b641..38e4d2c34ac1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -53,12 +53,6 @@
 #include "coalesced_mmio.h"
 #endif
 
-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include "irq.h"
-#endif
-
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
@@ -90,608 +84,6 @@ static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
 
-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
-						      int assigned_dev_id)
-{
-	struct list_head *ptr;
-	struct kvm_assigned_dev_kernel *match;
-
-	list_for_each(ptr, head) {
-		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
-		if (match->assigned_dev_id == assigned_dev_id)
-			return match;
-	}
-	return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
-				    *assigned_dev, int irq)
-{
-	int i, index;
-	struct msix_entry *host_msix_entries;
-
-	host_msix_entries = assigned_dev->host_msix_entries;
-
-	index = -1;
-	for (i = 0; i < assigned_dev->entries_nr; i++)
-		if (irq == host_msix_entries[i].vector) {
-			index = i;
-			break;
-		}
-	if (index < 0) {
-		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-		return 0;
-	}
-
-	return index;
-}
-
-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev;
-	struct kvm *kvm;
-	int i;
-
-	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
-				    interrupt_work);
-	kvm = assigned_dev->kvm;
-
-	spin_lock_irq(&assigned_dev->assigned_dev_lock);
-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-		struct kvm_guest_msix_entry *guest_entries =
-			assigned_dev->guest_msix_entries;
-		for (i = 0; i < assigned_dev->entries_nr; i++) {
-			if (!(guest_entries[i].flags &
-					KVM_ASSIGNED_MSIX_PENDING))
-				continue;
-			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
-			kvm_set_irq(assigned_dev->kvm,
-				    assigned_dev->irq_source_id,
-				    guest_entries[i].vector, 1);
-		}
-	} else
-		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-			    assigned_dev->guest_irq, 1);
-
-	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-}
-
-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-{
-	unsigned long flags;
-	struct kvm_assigned_dev_kernel *assigned_dev =
-		(struct kvm_assigned_dev_kernel *) dev_id;
-
-	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-		int index = find_index_from_host_irq(assigned_dev, irq);
-		if (index < 0)
-			goto out;
-		assigned_dev->guest_msix_entries[index].flags |=
-			KVM_ASSIGNED_MSIX_PENDING;
-	}
-
-	schedule_work(&assigned_dev->interrupt_work);
-
-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
-		disable_irq_nosync(irq);
-		assigned_dev->host_irq_disabled = true;
-	}
-
-out:
-	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
-	return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-	struct kvm_assigned_dev_kernel *dev;
-	unsigned long flags;
-
-	if (kian->gsi == -1)
-		return;
-
-	dev = container_of(kian, struct kvm_assigned_dev_kernel,
-			   ack_notifier);
-
-	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-
-	/* The guest irq may be shared so this ack may be
-	 * from another device.
-	 */
-	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
-	if (dev->host_irq_disabled) {
-		enable_irq(dev->host_irq);
-		dev->host_irq_disabled = false;
-	}
-	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
-			       struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
-	assigned_dev->ack_notifier.gsi = -1;
-
-	if (assigned_dev->irq_source_id != -1)
-		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
-	assigned_dev->irq_source_id = -1;
-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
-			      struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	/*
-	 * In kvm_free_device_irq, cancel_work_sync return true if:
-	 * 1. work is scheduled, and then cancelled.
-	 * 2. work callback is executed.
-	 *
-	 * The first one ensured that the irq is disabled and no more events
-	 * would happen. But for the second one, the irq may be enabled (e.g.
-	 * for MSI). So we disable irq here to prevent further events.
-	 *
-	 * Notice this maybe result in nested disable if the interrupt type is
-	 * INTx, but it's OK for we are going to free it.
-	 *
-	 * If this function is a part of VM destroy, please ensure that till
-	 * now, the kvm state is still legal for probably we also have to wait
-	 * interrupt_work done.
-	 */
-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-		int i;
-		for (i = 0; i < assigned_dev->entries_nr; i++)
-			disable_irq_nosync(assigned_dev->
-					   host_msix_entries[i].vector);
-
-		cancel_work_sync(&assigned_dev->interrupt_work);
-
-		for (i = 0; i < assigned_dev->entries_nr; i++)
-			free_irq(assigned_dev->host_msix_entries[i].vector,
-				 (void *)assigned_dev);
-
-		assigned_dev->entries_nr = 0;
-		kfree(assigned_dev->host_msix_entries);
-		kfree(assigned_dev->guest_msix_entries);
-		pci_disable_msix(assigned_dev->dev);
-	} else {
-		/* Deal with MSI and INTx */
-		disable_irq_nosync(assigned_dev->host_irq);
-		cancel_work_sync(&assigned_dev->interrupt_work);
-
-		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-
-		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
-			pci_disable_msi(assigned_dev->dev);
-	}
-
-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
-			    struct kvm_assigned_dev_kernel *assigned_dev,
-			    unsigned long irq_requested_type)
-{
-	unsigned long guest_irq_type, host_irq_type;
-
-	if (!irqchip_in_kernel(kvm))
-		return -EINVAL;
-	/* no irq assignment to deassign */
-	if (!assigned_dev->irq_requested_type)
-		return -ENXIO;
-
-	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
-	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
-	if (host_irq_type)
-		deassign_host_irq(kvm, assigned_dev);
-	if (guest_irq_type)
-		deassign_guest_irq(kvm, assigned_dev);
-
-	return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
-{
-	kvm_free_assigned_irq(kvm, assigned_dev);
-
-	pci_reset_function(assigned_dev->dev);
-
-	pci_release_regions(assigned_dev->dev);
-	pci_disable_device(assigned_dev->dev);
-	pci_dev_put(assigned_dev->dev);
-
-	list_del(&assigned_dev->list);
-	kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
-	struct list_head *ptr, *ptr2;
-	struct kvm_assigned_dev_kernel *assigned_dev;
-
-	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-		assigned_dev = list_entry(ptr,
-					  struct kvm_assigned_dev_kernel,
-					  list);
-
-		kvm_free_assigned_device(kvm, assigned_dev);
-	}
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
-					    struct kvm_assigned_dev_kernel *dev)
-{
-	dev->host_irq = dev->dev->irq;
-	/* Even though this is PCI, we don't want to use shared
-	 * interrupts. Sharing host devices with guest-assigned devices
-	 * on the same interrupt line is not a happy situation: there
-	 * are going to be long delays in accepting, acking, etc.
-	 */
-	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
-			0, "kvm_assigned_intx_device", (void *)dev))
-		return -EIO;
-	return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_host_msi(struct kvm *kvm,
-					   struct kvm_assigned_dev_kernel *dev)
-{
-	int r;
-
-	if (!dev->dev->msi_enabled) {
-		r = pci_enable_msi(dev->dev);
-		if (r)
-			return r;
-	}
-
-	dev->host_irq = dev->dev->irq;
-	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
-			"kvm_assigned_msi_device", (void *)dev)) {
-		pci_disable_msi(dev->dev);
-		return -EIO;
-	}
-
-	return 0;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_host_msix(struct kvm *kvm,
-					    struct kvm_assigned_dev_kernel *dev)
-{
-	int i, r = -EINVAL;
-
-	/* host_msix_entries and guest_msix_entries should have been
-	 * initialized */
-	if (dev->entries_nr == 0)
-		return r;
-
-	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
-	if (r)
-		return r;
-
-	for (i = 0; i < dev->entries_nr; i++) {
-		r = request_irq(dev->host_msix_entries[i].vector,
-				kvm_assigned_dev_intr, 0,
-				"kvm_assigned_msix_device",
-				(void *)dev);
-		/* FIXME: free requested_irq's on failure */
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-#endif
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
-				struct kvm_assigned_dev_kernel *dev,
-				struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = irq->guest_irq;
-	return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *dev,
-			struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = -1;
-	dev->host_irq_disabled = false;
-	return 0;
-}
-#endif
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *dev,
-			struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = -1;
-	dev->host_irq_disabled = false;
-	return 0;
-}
-#endif
-
-static int assign_host_irq(struct kvm *kvm,
-			   struct kvm_assigned_dev_kernel *dev,
-			   __u32 host_irq_type)
-{
-	int r = -EEXIST;
-
-	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
-		return r;
-
-	switch (host_irq_type) {
-	case KVM_DEV_IRQ_HOST_INTX:
-		r = assigned_device_enable_host_intx(kvm, dev);
-		break;
-#ifdef __KVM_HAVE_MSI
-	case KVM_DEV_IRQ_HOST_MSI:
-		r = assigned_device_enable_host_msi(kvm, dev);
-		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-	case KVM_DEV_IRQ_HOST_MSIX:
-		r = assigned_device_enable_host_msix(kvm, dev);
-		break;
-#endif
-	default:
-		r = -EINVAL;
-	}
-
-	if (!r)
-		dev->irq_requested_type |= host_irq_type;
-
-	return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
-			    struct kvm_assigned_dev_kernel *dev,
-			    struct kvm_assigned_irq *irq,
-			    unsigned long guest_irq_type)
-{
-	int id;
-	int r = -EEXIST;
-
-	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
-		return r;
-
-	id = kvm_request_irq_source_id(kvm);
-	if (id < 0)
-		return id;
-
-	dev->irq_source_id = id;
-
-	switch (guest_irq_type) {
-	case KVM_DEV_IRQ_GUEST_INTX:
-		r = assigned_device_enable_guest_intx(kvm, dev, irq);
-		break;
-#ifdef __KVM_HAVE_MSI
-	case KVM_DEV_IRQ_GUEST_MSI:
-		r = assigned_device_enable_guest_msi(kvm, dev, irq);
-		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-	case KVM_DEV_IRQ_GUEST_MSIX:
-		r = assigned_device_enable_guest_msix(kvm, dev, irq);
-		break;
-#endif
-	default:
-		r = -EINVAL;
-	}
-
-	if (!r) {
-		dev->irq_requested_type |= guest_irq_type;
-		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
-	} else
-		kvm_free_irq_source_id(kvm, dev->irq_source_id);
-
-	return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq *assigned_irq)
-{
-	int r = -EINVAL;
-	struct kvm_assigned_dev_kernel *match;
-	unsigned long host_irq_type, guest_irq_type;
-
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	if (!irqchip_in_kernel(kvm))
-		return r;
-
-	mutex_lock(&kvm->lock);
-	r = -ENODEV;
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_irq->assigned_dev_id);
-	if (!match)
-		goto out;
-
-	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
-	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
-	r = -EINVAL;
-	/* can only assign one type at a time */
-	if (hweight_long(host_irq_type) > 1)
-		goto out;
-	if (hweight_long(guest_irq_type) > 1)
-		goto out;
-	if (host_irq_type == 0 && guest_irq_type == 0)
-		goto out;
-
-	r = 0;
-	if (host_irq_type)
-		r = assign_host_irq(kvm, match, host_irq_type);
-	if (r)
-		goto out;
-
-	if (guest_irq_type)
-		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
-					 struct kvm_assigned_irq
-					 *assigned_irq)
-{
-	int r = -ENODEV;
-	struct kvm_assigned_dev_kernel *match;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_irq->assigned_dev_id);
-	if (!match)
-		goto out;
-
-	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
-				      struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-	struct pci_dev *dev;
-
-	down_read(&kvm->slots_lock);
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (match) {
-		/* device already assigned */
-		r = -EEXIST;
-		goto out;
-	}
-
-	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
-	if (match == NULL) {
-		printk(KERN_INFO "%s: Couldn't allocate memory\n",
-		       __func__);
-		r = -ENOMEM;
-		goto out;
-	}
-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
-				   assigned_dev->devfn);
-	if (!dev) {
-		printk(KERN_INFO "%s: host device not found\n", __func__);
-		r = -EINVAL;
-		goto out_free;
-	}
-	if (pci_enable_device(dev)) {
-		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
-		r = -EBUSY;
-		goto out_put;
-	}
-	r = pci_request_regions(dev, "kvm_assigned_device");
-	if (r) {
-		printk(KERN_INFO "%s: Could not get access to device regions\n",
-		       __func__);
-		goto out_disable;
-	}
-
-	pci_reset_function(dev);
-
-	match->assigned_dev_id = assigned_dev->assigned_dev_id;
-	match->host_busnr = assigned_dev->busnr;
-	match->host_devfn = assigned_dev->devfn;
-	match->flags = assigned_dev->flags;
-	match->dev = dev;
-	spin_lock_init(&match->assigned_dev_lock);
-	match->irq_source_id = -1;
-	match->kvm = kvm;
-	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-	INIT_WORK(&match->interrupt_work,
-		  kvm_assigned_dev_interrupt_work_handler);
-
-	list_add(&match->list, &kvm->arch.assigned_dev_head);
-
-	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		if (!kvm->arch.iommu_domain) {
-			r = kvm_iommu_map_guest(kvm);
-			if (r)
-				goto out_list_del;
-		}
-		r = kvm_assign_device(kvm, match);
-		if (r)
-			goto out_list_del;
-	}
-
-out:
-	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
-	return r;
-out_list_del:
-	list_del(&match->list);
-	pci_release_regions(dev);
-out_disable:
-	pci_disable_device(dev);
-out_put:
-	pci_dev_put(dev);
-out_free:
-	kfree(match);
-	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
-	return r;
-}
-#endif
-
-#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
-		struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (!match) {
-		printk(KERN_INFO "%s: device hasn't been assigned before, "
-		  "so cannot be deassigned\n", __func__);
-		r = -EINVAL;
-		goto out;
-	}
-
-	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
-		kvm_deassign_device(kvm, match);
-
-	kvm_free_assigned_device(kvm, match);
-
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-#endif
-
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -1824,88 +1216,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
-#ifdef __KVM_HAVE_MSIX
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
-				    struct kvm_assigned_msix_nr *entry_nr)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *adev;
-
-	mutex_lock(&kvm->lock);
-
-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      entry_nr->assigned_dev_id);
-	if (!adev) {
-		r = -EINVAL;
-		goto msix_nr_out;
-	}
-
-	if (adev->entries_nr == 0) {
-		adev->entries_nr = entry_nr->entry_nr;
-		if (adev->entries_nr == 0 ||
-		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
-			r = -EINVAL;
-			goto msix_nr_out;
-		}
-
-		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
-						entry_nr->entry_nr,
-						GFP_KERNEL);
-		if (!adev->host_msix_entries) {
-			r = -ENOMEM;
-			goto msix_nr_out;
-		}
-		adev->guest_msix_entries = kzalloc(
-				sizeof(struct kvm_guest_msix_entry) *
-				entry_nr->entry_nr, GFP_KERNEL);
-		if (!adev->guest_msix_entries) {
-			kfree(adev->host_msix_entries);
-			r = -ENOMEM;
-			goto msix_nr_out;
-		}
-	} else /* Not allowed set MSI-X number twice */
-		r = -EINVAL;
-msix_nr_out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
-				       struct kvm_assigned_msix_entry *entry)
-{
-	int r = 0, i;
-	struct kvm_assigned_dev_kernel *adev;
-
-	mutex_lock(&kvm->lock);
-
-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      entry->assigned_dev_id);
-
-	if (!adev) {
-		r = -EINVAL;
-		goto msix_entry_out;
-	}
-
-	for (i = 0; i < adev->entries_nr; i++)
-		if (adev->guest_msix_entries[i].vector == 0 ||
-		    adev->guest_msix_entries[i].entry == entry->entry) {
-			adev->guest_msix_entries[i].entry = entry->entry;
-			adev->guest_msix_entries[i].vector = entry->gsi;
-			adev->host_msix_entries[i].entry = entry->entry;
-			break;
-		}
-	if (i == adev->entries_nr) {
-		r = -ENOSPC;
-		goto msix_entry_out;
-	}
-
-msix_entry_out:
-	mutex_unlock(&kvm->lock);
-
-	return r;
-}
-#endif
-
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2163,112 +1473,6 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-#endif
-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
-	case KVM_ASSIGN_PCI_DEVICE: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_ASSIGN_IRQ: {
-		r = -EOPNOTSUPP;
-		break;
-	}
-#ifdef KVM_CAP_ASSIGN_DEV_IRQ
-	case KVM_ASSIGN_DEV_IRQ: {
-		struct kvm_assigned_irq assigned_irq;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-			goto out;
-		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_DEASSIGN_DEV_IRQ: {
-		struct kvm_assigned_irq assigned_irq;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-			goto out;
-		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
-		if (r)
-			goto out;
-		break;
-	}
-#endif
-#endif
-#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
-	case KVM_DEASSIGN_PCI_DEVICE: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
-		if (r)
-			goto out;
-		break;
-	}
-#endif
-#ifdef KVM_CAP_IRQ_ROUTING
-	case KVM_SET_GSI_ROUTING: {
-		struct kvm_irq_routing routing;
-		struct kvm_irq_routing __user *urouting;
-		struct kvm_irq_routing_entry *entries;
-
-		r = -EFAULT;
-		if (copy_from_user(&routing, argp, sizeof(routing)))
-			goto out;
-		r = -EINVAL;
-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
-			goto out;
-		if (routing.flags)
-			goto out;
-		r = -ENOMEM;
-		entries = vmalloc(routing.nr * sizeof(*entries));
-		if (!entries)
-			goto out;
-		r = -EFAULT;
-		urouting = argp;
-		if (copy_from_user(entries, urouting->entries,
-				   routing.nr * sizeof(*entries)))
-			goto out_free_irq_routing;
-		r = kvm_set_irq_routing(kvm, entries, routing.nr,
-					routing.flags);
-	out_free_irq_routing:
-		vfree(entries);
-		break;
-	}
-#endif /* KVM_CAP_IRQ_ROUTING */
-#ifdef __KVM_HAVE_MSIX
-	case KVM_ASSIGN_SET_MSIX_NR: {
-		struct kvm_assigned_msix_nr entry_nr;
-		r = -EFAULT;
-		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
-			goto out;
-		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_ASSIGN_SET_MSIX_ENTRY: {
-		struct kvm_assigned_msix_entry entry;
-		r = -EFAULT;
-		if (copy_from_user(&entry, argp, sizeof entry))
-			goto out;
-		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
-		if (r)
-			goto out;
-		break;
-	}
 #endif
 	case KVM_IRQFD: {
 		struct kvm_irqfd data;
@@ -2301,6 +1505,8 @@ static long kvm_vm_ioctl(struct file *filp,
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
+		if (r == -ENOTTY)
+			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
-- 
cgit v1.2.3


From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 15 Sep 2009 11:37:46 +0200
Subject: KVM: Activate Virtualization On Demand

X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        |  8 ++--
 arch/powerpc/kvm/powerpc.c      |  3 +-
 arch/s390/kvm/kvm-s390.c        |  3 +-
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              | 13 ++++--
 arch/x86/kvm/vmx.c              | 11 +++--
 arch/x86/kvm/x86.c              |  4 +-
 include/linux/kvm_host.h        |  2 +-
 virt/kvm/kvm_main.c             | 90 +++++++++++++++++++++++++++++++++++------
 9 files changed, 108 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f6471c882667..5fdeec5fddcf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	long  status;
 	long  tmp_base;
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
 	if (slot < 0)
-		return;
+		return -EINVAL;
 
 	spin_lock(&vp_lock);
 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
 	if (status != 0) {
 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return ;
+		return -EINVAL;
 	}
 
 	if (!kvm_vsa_base) {
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 	}
 	spin_unlock(&vp_lock);
 	ia64_ptr_entry(0x3, slot);
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95af62217b6b..5902bbc2411e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 00e2ce8e91f5..544505893c9f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	/* every s390 is virtualization enabled ;-) */
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a46e2dd9aca8..295c7c4d9c90 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -459,7 +459,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	void (*hardware_enable)(void *dummy);      /* __init */
+	int (*hardware_enable)(void *dummy);
 	void (*hardware_disable)(void *dummy);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f54c4f9d2865..59fe4d54da11 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
 	cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
 	struct svm_cpu_data *svm_data;
@@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
+	rdmsrl(MSR_EFER, efer);
+	if (efer & EFER_SVME)
+		return -EBUSY;
+
 	if (!has_svm()) {
 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
-		return;
+		return -EINVAL;
 	}
 	svm_data = per_cpu(svm_data, me);
 
 	if (!svm_data) {
 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
 		       me);
-		return;
+		return -EINVAL;
 	}
 
 	svm_data->asid_generation = 1;
@@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
 	gdt = (struct desc_struct *)gdt_descr.base;
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
-	rdmsrl(MSR_EFER, efer);
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
 
 	wrmsrl(MSR_VM_HSAVE_PA,
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+	return 0;
 }
 
 static void svm_cpu_uninit(int cpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 73cb5dd960cf..a187570e4837 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
 	/* locked but not enabled */
 }
 
-static void hardware_enable(void *garbage)
+static int hardware_enable(void *garbage)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 	u64 old;
 
+	if (read_cr4() & X86_CR4_VMXE)
+		return -EBUSY;
+
 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 	if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage)
 	asm volatile (ASM_VMX_VMXON_RAX
 		      : : "a"(&phys_addr), "m"(phys_addr)
 		      : "memory", "cc");
+
+	ept_sync_global();
+
+	return 0;
 }
 
 static void vmclear_local_vcpus(void)
@@ -4040,8 +4047,6 @@ static int __init vmx_init(void)
 	if (bypass_guest_pf)
 		kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
 
-	ept_sync_global();
-
 	return 0;
 
 out3:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 829e3063e2ab..3d83de8bcbf4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4691,9 +4691,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-	kvm_x86_ops->hardware_enable(garbage);
+	return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c0a1cc35f080..b985a29d8175 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -345,7 +345,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
-void kvm_arch_hardware_enable(void *garbage);
+int kvm_arch_hardware_enable(void *garbage);
 void kvm_arch_hardware_disable(void *garbage);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 38e4d2c34ac1..70c8cbea0a99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
+static int kvm_usage_count = 0;
+static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
+static int hardware_enable_all(void);
+static void hardware_disable_all(void);
 
 static bool kvm_rebooting;
 
@@ -339,6 +343,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
 static struct kvm *kvm_create_vm(void)
 {
+	int r = 0;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -346,6 +351,11 @@ static struct kvm *kvm_create_vm(void)
 
 	if (IS_ERR(kvm))
 		goto out;
+
+	r = hardware_enable_all();
+	if (r)
+		goto out_err_nodisable;
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
@@ -354,8 +364,8 @@ static struct kvm *kvm_create_vm(void)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
-		kfree(kvm);
-		return ERR_PTR(-ENOMEM);
+		r = -ENOMEM;
+		goto out_err;
 	}
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -363,15 +373,13 @@ static struct kvm *kvm_create_vm(void)
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	{
-		int err;
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
-		if (err) {
+		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (r) {
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
-			kfree(kvm);
-			return ERR_PTR(err);
+			goto out_err;
 		}
 	}
 #endif
@@ -395,6 +403,12 @@ static struct kvm *kvm_create_vm(void)
 #endif
 out:
 	return kvm;
+
+out_err:
+	hardware_disable_all();
+out_err_nodisable:
+	kfree(kvm);
+	return ERR_PTR(r);
 }
 
 /*
@@ -453,6 +467,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	hardware_disable_all();
 	mmdrop(mm);
 }
 
@@ -1644,11 +1659,21 @@ static struct miscdevice kvm_dev = {
 static void hardware_enable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
+	int r;
 
 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
+
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_enable(NULL);
+
+	r = kvm_arch_hardware_enable(NULL);
+
+	if (r) {
+		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
+		atomic_inc(&hardware_enable_failed);
+		printk(KERN_INFO "kvm: enabling virtualization on "
+				 "CPU%d failed\n", cpu);
+	}
 }
 
 static void hardware_disable(void *junk)
@@ -1661,11 +1686,52 @@ static void hardware_disable(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
+static void hardware_disable_all_nolock(void)
+{
+	BUG_ON(!kvm_usage_count);
+
+	kvm_usage_count--;
+	if (!kvm_usage_count)
+		on_each_cpu(hardware_disable, NULL, 1);
+}
+
+static void hardware_disable_all(void)
+{
+	spin_lock(&kvm_lock);
+	hardware_disable_all_nolock();
+	spin_unlock(&kvm_lock);
+}
+
+static int hardware_enable_all(void)
+{
+	int r = 0;
+
+	spin_lock(&kvm_lock);
+
+	kvm_usage_count++;
+	if (kvm_usage_count == 1) {
+		atomic_set(&hardware_enable_failed, 0);
+		on_each_cpu(hardware_enable, NULL, 1);
+
+		if (atomic_read(&hardware_enable_failed)) {
+			hardware_disable_all_nolock();
+			r = -EBUSY;
+		}
+	}
+
+	spin_unlock(&kvm_lock);
+
+	return r;
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 			   void *v)
 {
 	int cpu = (long)v;
 
+	if (!kvm_usage_count)
+		return NOTIFY_OK;
+
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
@@ -1868,13 +1934,15 @@ static void kvm_exit_debug(void)
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
-	hardware_disable(NULL);
+	if (kvm_usage_count)
+		hardware_disable(NULL);
 	return 0;
 }
 
 static int kvm_resume(struct sys_device *dev)
 {
-	hardware_enable(NULL);
+	if (kvm_usage_count)
+		hardware_enable(NULL);
 	return 0;
 }
 
@@ -1949,7 +2017,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 			goto out_free_1;
 	}
 
-	on_each_cpu(hardware_enable, NULL, 1);
 	r = register_cpu_notifier(&kvm_cpu_notifier);
 	if (r)
 		goto out_free_2;
@@ -1999,7 +2066,6 @@ out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
 out_free_0a:
-- 
cgit v1.2.3


From d255f4f2bac81eb798fcf76938147f1f6c756ae2 Mon Sep 17 00:00:00 2001
From: "Zhai, Edwin" <edwin.zhai@intel.com>
Date: Fri, 9 Oct 2009 18:03:20 +0800
Subject: KVM: introduce kvm_vcpu_on_spin

Introduce kvm_vcpu_on_spin, to be used by VMX/SVM to yield processing
once the cpu detects pause-based looping.

Signed-off-by: "Zhai, Edwin" <edwin.zhai@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b985a29d8175..bd5a616d9373 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -286,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70c8cbea0a99..cac69c4415df 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1108,6 +1108,21 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
+{
+	ktime_t expires;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+
+	/* Sleep for 100 us, and hope lock-holder got scheduled */
+	expires = ktime_add_ns(ktime_get(), 100000UL);
+	schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+
+	finish_wait(&vcpu->wq, &wait);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
+
 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct kvm_vcpu *vcpu = vma->vm_file->private_data;
-- 
cgit v1.2.3


From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001
From: Ed Swierk <eswierk@aristanetworks.com>
Date: Thu, 15 Oct 2009 15:21:43 -0700
Subject: KVM: Xen PV-on-HVM guest support

Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

[jan: fix i386 build warning]
[avi: future proof abi with a flags field]

Signed-off-by: Ed Swierk <eswierk@aristanetworks.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt       | 24 +++++++++++++++++++++
 arch/x86/include/asm/kvm.h      |  1 +
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 46 +++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm.h             | 16 ++++++++++++++
 5 files changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 5a4bc8cf6d04..3e8684e48506 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -593,6 +593,30 @@ struct kvm_irqchip {
 	} chip;
 };
 
+4.27 KVM_XEN_HVM_CONFIG
+
+Capability: KVM_CAP_XEN_HVM
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_xen_hvm_config (in)
+Returns: 0 on success, -1 on error
+
+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
+page, and provides the starting address and size of the hypercall
+blobs in userspace.  When the guest writes the MSR, kvm copies one
+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
+memory.
+
+struct kvm_xen_hvm_config {
+	__u32 flags;
+	__u32 msr;
+	__u64 blob_addr_32;
+	__u64 blob_addr_64;
+	__u8 blob_size_32;
+	__u8 blob_size_64;
+	__u8 pad2[30];
+};
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f02e87a5206f..ef9b4b73cce4 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 179a919f53a4..36f3b53f5c27 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,6 +410,8 @@ struct kvm_arch{
 
 	unsigned long irq_sources_bitmap;
 	u64 vm_init_tsc;
+
+	struct kvm_xen_hvm_config xen_hvm_config;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d450cc6f841..bb842db3ee7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -857,6 +857,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	return 0;
 }
 
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int lm = is_long_mode(vcpu);
+	u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
+		: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
+	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
+		: kvm->arch.xen_hvm_config.blob_size_32;
+	u32 page_num = data & ~PAGE_MASK;
+	u64 page_addr = data & PAGE_MASK;
+	u8 *page;
+	int r;
+
+	r = -E2BIG;
+	if (page_num >= blob_size)
+		goto out;
+	r = -ENOMEM;
+	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!page)
+		goto out;
+	r = -EFAULT;
+	if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
+		goto out_free;
+	if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+		goto out_free;
+	r = 0;
+out_free:
+	kfree(page);
+out:
+	return r;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	switch (msr) {
@@ -972,6 +1004,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 			"0x%x data 0x%llx\n", msr, data);
 		break;
 	default:
+		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
+			return xen_hvm_config(vcpu, data);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				msr, data);
@@ -1246,6 +1280,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PIT2:
 	case KVM_CAP_PIT_STATE2:
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
+	case KVM_CAP_XEN_HVM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2441,6 +2476,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_XEN_HVM_CONFIG: {
+		r = -EFAULT;
+		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
+				   sizeof(struct kvm_xen_hvm_config)))
+			goto out;
+		r = -EINVAL;
+		if (kvm->arch.xen_hvm_config.flags)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f8f8900fc5ec..b694c1d2f918 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -436,6 +436,9 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#ifdef __KVM_HAVE_XEN_HVM
+#define KVM_CAP_XEN_HVM 38
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -488,6 +491,18 @@ struct kvm_x86_mce {
 };
 #endif
 
+#ifdef KVM_CAP_XEN_HVM
+struct kvm_xen_hvm_config {
+	__u32 flags;
+	__u32 msr;
+	__u64 blob_addr_32;
+	__u64 blob_addr_64;
+	__u8 blob_size_32;
+	__u8 blob_size_64;
+	__u8 pad2[30];
+};
+#endif
+
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
 
 struct kvm_irqfd {
@@ -546,6 +561,7 @@ struct kvm_irqfd {
 #define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Fri, 16 Oct 2009 15:28:36 -0400
Subject: KVM: allow userspace to adjust kvmclock offset

When we migrate a kvm guest that uses pvclock between two hosts, we may
suffer a large skew. This is because there can be significant differences
between the monotonic clock of the hosts involved. When a new host with
a much larger monotonic time starts running the guest, the view of time
will be significantly impacted.

Situation is much worse when we do the opposite, and migrate to a host with
a smaller monotonic clock.

This proposed ioctl will allow userspace to inform us what is the monotonic
clock value in the source host, so we can keep the time skew short, and
more importantly, never goes backwards. Userspace may also need to trigger
the current data, since from the first migration onwards, it won't be
reflected by a simple call to clock_gettime() anymore.

[marcelo: future-proof abi with a flags field]
[jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it]

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt       | 36 +++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 42 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/kvm.h             | 10 ++++++++++
 4 files changed, 88 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 3e8684e48506..36594ba57723 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -617,6 +617,42 @@ struct kvm_xen_hvm_config {
 	__u8 pad2[30];
 };
 
+4.27 KVM_GET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (out)
+Returns: 0 on success, -1 on error
+
+Gets the current timestamp of kvmclock as seen by the current guest. In
+conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+struct kvm_clock_data {
+	__u64 clock;  /* kvmclock current value */
+	__u32 flags;
+	__u32 pad[9];
+};
+
+4.28 KVM_SET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (in)
+Returns: 0 on success, -1 on error
+
+Sets the current timestamp of kvmclock to the valued specific in its parameter.
+In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+struct kvm_clock_data {
+	__u64 clock;  /* kvmclock current value */
+	__u32 flags;
+	__u32 pad[9];
+};
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4d994ad5051a..0558ff8c32ae 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,7 @@ struct kvm_arch{
 
 	unsigned long irq_sources_bitmap;
 	u64 vm_init_tsc;
+	s64 kvmclock_offset;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 13f30aac460b..e16cdc9ec0c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -680,7 +680,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	/* With all the info we got, fill in the values */
 
 	vcpu->hv_clock.system_time = ts.tv_nsec +
-				     (NSEC_PER_SEC * (u64)ts.tv_sec);
+				     (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
+
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
@@ -1262,6 +1263,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PIT_STATE2:
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
 	case KVM_CAP_XEN_HVM:
+	case KVM_CAP_ADJUST_CLOCK:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2468,6 +2470,44 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_SET_CLOCK: {
+		struct timespec now;
+		struct kvm_clock_data user_ns;
+		u64 now_ns;
+		s64 delta;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
+			goto out;
+
+		r = -EINVAL;
+		if (user_ns.flags)
+			goto out;
+
+		r = 0;
+		ktime_get_ts(&now);
+		now_ns = timespec_to_ns(&now);
+		delta = user_ns.clock - now_ns;
+		kvm->arch.kvmclock_offset = delta;
+		break;
+	}
+	case KVM_GET_CLOCK: {
+		struct timespec now;
+		struct kvm_clock_data user_ns;
+		u64 now_ns;
+
+		ktime_get_ts(&now);
+		now_ns = timespec_to_ns(&now);
+		user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
+		user_ns.flags = 0;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
+			goto out;
+		r = 0;
+		break;
+	}
+
 	default:
 		;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index b694c1d2f918..6ed1a12ed526 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -439,6 +439,7 @@ struct kvm_ioeventfd {
 #ifdef __KVM_HAVE_XEN_HVM
 #define KVM_CAP_XEN_HVM 38
 #endif
+#define KVM_CAP_ADJUST_CLOCK 39
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -512,6 +513,12 @@ struct kvm_irqfd {
 	__u8  pad[20];
 };
 
+struct kvm_clock_data {
+	__u64 clock;
+	__u32 flags;
+	__u32 pad[9];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -562,6 +569,9 @@ struct kvm_irqfd {
 #define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
+#define KVM_SET_CLOCK             _IOW(KVMIO, 0x7b, struct kvm_clock_data)
+#define KVM_GET_CLOCK             _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From c54d2aba27f0c505d61700d656c5943e96982e60 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 2 Nov 2009 17:20:28 +0100
Subject: KVM: Reorder IOCTLs in main kvm.h

Obviously, people tend to extend this header at the bottom - more or
less blindly. Ensure that deprecated stuff gets its own corner again by
moving things to the top. Also add some comments and reindent IOCTLs to
make them more readable and reduce the risk of number collisions.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 235 ++++++++++++++++++++++++++--------------------------
 1 file changed, 117 insertions(+), 118 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6ed1a12ed526..ca62b8e056f9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,12 +14,76 @@
 
 #define KVM_API_VERSION 12
 
-/* for KVM_TRACE_ENABLE, deprecated */
+/* *** Deprecated interfaces *** */
+
+#define KVM_TRC_SHIFT           16
+
+#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
+#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1))
+
+#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
+#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
+#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
+
+#define KVM_TRC_HEAD_SIZE       12
+#define KVM_TRC_CYCLE_SIZE      8
+#define KVM_TRC_EXTRA_MAX       7
+
+#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
+#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
+#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
+#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
+#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
+#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
+#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
+#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
+#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
+#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
+#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
+#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
+#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
+#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
+#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
+#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
+#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
+#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
+#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
+#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
+#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
+#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
+#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
+#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
+
 struct kvm_user_trace_setup {
-	__u32 buf_size; /* sub_buffer size of each per-cpu */
-	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
+	__u32 buf_size;
+	__u32 buf_nr;
+};
+
+#define __KVM_DEPRECATED_MAIN_W_0x06 \
+	_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
+#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
+#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
+
+#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
+
+struct kvm_breakpoint {
+	__u32 enabled;
+	__u32 padding;
+	__u64 address;
+};
+
+struct kvm_debug_guest {
+	__u32 enabled;
+	__u32 pad;
+	struct kvm_breakpoint breakpoints[4];
+	__u32 singlestep;
 };
 
+#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
+
+/* *** End of deprecated interfaces *** */
+
+
 /* for KVM_CREATE_MEMORY_REGION */
 struct kvm_memory_region {
 	__u32 slot;
@@ -329,24 +393,6 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
-#define KVM_TRC_SHIFT           16
-/*
- * kvm trace categories
- */
-#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
-#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
-
-/*
- * kvm trace action
- */
-#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
-#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
-#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
-
-#define KVM_TRC_HEAD_SIZE       12
-#define KVM_TRC_CYCLE_SIZE      8
-#define KVM_TRC_EXTRA_MAX       7
-
 #define KVMIO 0xAE
 
 /*
@@ -367,12 +413,10 @@ struct kvm_ioeventfd {
  */
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
 #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
-/*
- * ioctls for kvm trace
- */
-#define KVM_TRACE_ENABLE          _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
-#define KVM_TRACE_PAUSE           _IO(KVMIO,  0x07)
-#define KVM_TRACE_DISABLE         _IO(KVMIO,  0x08)
+#define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
+#define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
+#define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
+
 /*
  * Extension capability list.
  */
@@ -522,56 +566,57 @@ struct kvm_clock_data {
 /*
  * ioctls for VM fds
  */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
  */
-#define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
-#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
+#define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
+#define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
+#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
 					struct kvm_userspace_memory_region)
-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
-#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
 /* Device model IOC */
-#define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
-#define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
-#define KVM_GET_IRQCHIP		  _IOWR(KVMIO, 0x62, struct kvm_irqchip)
-#define KVM_SET_IRQCHIP		  _IOR(KVMIO,  0x63, struct kvm_irqchip)
-#define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
-#define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
-#define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
-#define KVM_IRQ_LINE_STATUS	  _IOWR(KVMIO, 0x67, struct kvm_irq_level)
+#define KVM_CREATE_IRQCHIP        _IO(KVMIO,   0x60)
+#define KVM_IRQ_LINE              _IOW(KVMIO,  0x61, struct kvm_irq_level)
+#define KVM_GET_IRQCHIP           _IOWR(KVMIO, 0x62, struct kvm_irqchip)
+#define KVM_SET_IRQCHIP           _IOR(KVMIO,  0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT            _IO(KVMIO,   0x64)
+#define KVM_GET_PIT               _IOWR(KVMIO, 0x65, struct kvm_pit_state)
+#define KVM_SET_PIT               _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_IRQ_LINE_STATUS       _IOWR(KVMIO, 0x67, struct kvm_irq_level)
 #define KVM_REGISTER_COALESCED_MMIO \
 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
 #define KVM_UNREGISTER_COALESCED_MMIO \
 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
-				   struct kvm_assigned_pci_dev)
-#define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
+#define KVM_ASSIGN_PCI_DEVICE     _IOR(KVMIO,  0x69, \
+				       struct kvm_assigned_pci_dev)
+#define KVM_SET_GSI_ROUTING       _IOW(KVMIO,  0x6a, struct kvm_irq_routing)
 /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
-#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
-			    struct kvm_assigned_irq)
-#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
-#define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
-#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
-				     struct kvm_assigned_pci_dev)
-#define KVM_ASSIGN_SET_MSIX_NR \
-			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
-#define KVM_ASSIGN_SET_MSIX_ENTRY \
-			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
-#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
-#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
-#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
-#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
-#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
-#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
-#define KVM_SET_CLOCK             _IOW(KVMIO, 0x7b, struct kvm_clock_data)
-#define KVM_GET_CLOCK             _IOR(KVMIO, 0x7c, struct kvm_clock_data)
-
+#define KVM_ASSIGN_IRQ            __KVM_DEPRECATED_VM_R_0x70
+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO,  0x70, struct kvm_assigned_irq)
+#define KVM_REINJECT_CONTROL      _IO(KVMIO,   0x71)
+#define KVM_DEASSIGN_PCI_DEVICE   _IOW(KVMIO,  0x72, \
+				       struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_SET_MSIX_NR    _IOW(KVMIO,  0x73, \
+				       struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO,  0x74, \
+				       struct kvm_assigned_msix_entry)
+#define KVM_DEASSIGN_DEV_IRQ      _IOW(KVMIO,  0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD                 _IOW(KVMIO,  0x76, struct kvm_irqfd)
+#define KVM_CREATE_PIT2		  _IOW(KVMIO,  0x77, struct kvm_pit_config)
+#define KVM_SET_BOOT_CPU_ID       _IO(KVMIO,   0x78)
+#define KVM_IOEVENTFD             _IOW(KVMIO,  0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
+#define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
+#define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_PIT_STATE2 */
+#define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
+#define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
 
 /*
  * ioctls for vcpu fds
@@ -584,7 +629,7 @@ struct kvm_clock_data {
 #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
 #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
 /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
-#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_DEBUG_GUEST
+#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_VCPU_W_0x87
 #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
 #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
 #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
@@ -596,7 +641,7 @@ struct kvm_clock_data {
 #define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
 #define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
 /* Available with KVM_CAP_VAPIC */
-#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
+#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
 /* Available with KVM_CAP_VAPIC */
 #define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
 /* valid for virtual machine (for floating interrupt)_and_ vcpu */
@@ -608,67 +653,21 @@ struct kvm_clock_data {
 /* initial ipl psw for s390 */
 #define KVM_S390_SET_INITIAL_PSW  _IOW(KVMIO,  0x96, struct kvm_s390_psw)
 /* initial reset for s390 */
-#define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
+#define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
 /* Available with KVM_CAP_NMI */
-#define KVM_NMI                   _IO(KVMIO,  0x9a)
+#define KVM_NMI                   _IO(KVMIO,   0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
 /* MCE for x86 */
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-
-/*
- * Deprecated interfaces
- */
-struct kvm_breakpoint {
-	__u32 enabled;
-	__u32 padding;
-	__u64 address;
-};
-
-struct kvm_debug_guest {
-	__u32 enabled;
-	__u32 pad;
-	struct kvm_breakpoint breakpoints[4];
-	__u32 singlestep;
-};
-
-#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
-
+/* IA64 stack access */
 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 
-#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
-#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
-
-#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
-#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
-#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
-#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
-#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
-#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
-#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
-#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
-#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
-#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
-#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
-#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
-#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
-#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
-#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
-#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
-#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
-#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
-#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
-#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
-#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
-#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
-#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
-#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
-
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
 struct kvm_assigned_pci_dev {
@@ -722,4 +721,4 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
-#endif
+#endif /* __LINUX_KVM_H */
-- 
cgit v1.2.3


From a9c7399d6cda0a092b347f8ee49bbe44f6e1fe66 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 4 Nov 2009 11:54:59 +0200
Subject: KVM: Allow internal errors reported to userspace to carry extra data

Usually userspace will freeze the guest so we can inspect it, but some
internal state is not available.  Add extra data to internal error
reporting so we can expose it to the debugger.  Extra data is specific
to the suberror.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c  | 1 +
 arch/x86/kvm/vmx.c  | 1 +
 include/linux/kvm.h | 4 ++++
 virt/kvm/kvm_main.c | 1 +
 4 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a9024797b21f..4c3e5b2314cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 	case EMULATE_FAIL:
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
 		return 0;
 	default:
 		BUG();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c9cc9596e1a6..c0e66dd58a47 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3352,6 +3352,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 			kvm_report_emulation_failure(vcpu, "emulation failure");
 			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+			vcpu->run->internal.ndata = 0;
 			ret = 0;
 			goto out;
 		}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ca62b8e056f9..172639e94392 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -251,6 +251,9 @@ struct kvm_run {
 		} dcr;
 		struct {
 			__u32 suberror;
+			/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
+			__u32 ndata;
+			__u64 data[16];
 		} internal;
 		/* Fix the size of the union. */
 		char padding[256];
@@ -484,6 +487,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_XEN_HVM 38
 #endif
 #define KVM_CAP_ADJUST_CLOCK 39
+#define KVM_CAP_INTERNAL_ERROR_DATA 40
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bd44fb48ac43..f92ba138007a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1653,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	case KVM_CAP_SET_BOOT_CPU_ID:
 #endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
 		return 1;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	case KVM_CAP_IRQ_ROUTING:
-- 
cgit v1.2.3


From 65ac7264043740572ba804edca03c374d70427c9 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 4 Nov 2009 11:59:01 +0200
Subject: KVM: VMX: Report unexpected simultaneous exceptions as internal
 errors

These happen when we trap an exception when another exception is being
delivered; we only expect these with MCEs and page faults.  If something
unexpected happens, things probably went south and we're better off reporting
an internal error and freezing.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c  | 11 ++++++++---
 include/linux/kvm.h |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c0e66dd58a47..22fcd27a0b58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2744,9 +2744,14 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		return handle_machine_check(vcpu);
 
 	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
-						!is_page_fault(intr_info))
-		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
-		       "intr info 0x%x\n", __func__, vect_info, intr_info);
+	    !is_page_fault(intr_info)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+		vcpu->run->internal.ndata = 2;
+		vcpu->run->internal.data[0] = vect_info;
+		vcpu->run->internal.data[1] = intr_info;
+		return 0;
+	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 172639e94392..976f4d181858 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@ struct kvm_pit_config {
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
+#define KVM_INTERNAL_ERROR_SIMUL_EX 2
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
-- 
cgit v1.2.3


From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Thu, 12 Nov 2009 01:04:25 +0100
Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS

This new IOCTL exports all yet user-invisible states related to
exceptions, interrupts, and NMIs. Together with appropriate user space
changes, this fixes sporadic problems of vmsave/restore, live migration
and system reset.

[avi: future-proof abi by adding a flags field]

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt       | 49 ++++++++++++++++++++++++++
 arch/x86/include/asm/kvm.h      | 28 +++++++++++++++
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm.c              | 22 ++++++++++++
 arch/x86/kvm/vmx.c              | 30 ++++++++++++++++
 arch/x86/kvm/x86.c              | 77 +++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm.h             |  6 ++++
 7 files changed, 214 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 36594ba57723..e1a114161027 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -653,6 +653,55 @@ struct kvm_clock_data {
 	__u32 pad[9];
 };
 
+4.29 KVM_GET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_vcpu_event (out)
+Returns: 0 on success, -1 on error
+
+Gets currently pending exceptions, interrupts, and NMIs as well as related
+states of the vcpu.
+
+struct kvm_vcpu_events {
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 has_error_code;
+		__u8 pad;
+		__u32 error_code;
+	} exception;
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 soft;
+		__u8 pad;
+	} interrupt;
+	struct {
+		__u8 injected;
+		__u8 pending;
+		__u8 masked;
+		__u8 pad;
+	} nmi;
+	__u32 sipi_vector;
+	__u32 flags;   /* must be zero */
+};
+
+4.30 KVM_SET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_vcpu_event (in)
+Returns: 0 on success, -1 on error
+
+Set pending exceptions, interrupts, and NMIs as well as related states of the
+vcpu.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index ef9b4b73cce4..950df434763f 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -20,6 +20,7 @@
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
 #define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -252,4 +253,31 @@ struct kvm_reinject_control {
 	__u8 pit_reinject;
 	__u8 reserved[31];
 };
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 has_error_code;
+		__u8 pad;
+		__u32 error_code;
+	} exception;
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 soft;
+		__u8 pad;
+	} interrupt;
+	struct {
+		__u8 injected;
+		__u8 pending;
+		__u8 masked;
+		__u8 pad;
+	} nmi;
+	__u32 sipi_vector;
+	__u32 flags;
+	__u32 reserved[10];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 26a74b7bb6bc..06e085614dad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -523,6 +523,8 @@ struct kvm_x86_ops {
 				bool has_error_code, u32 error_code);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
+	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 34b700f9e498..3de0b37ec038 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (masked) {
+		svm->vcpu.arch.hflags |= HF_NMI_MASK;
+		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+	} else {
+		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+		svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	}
+}
+
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.queue_exception = svm_queue_exception,
 	.interrupt_allowed = svm_interrupt_allowed,
 	.nmi_allowed = svm_nmi_allowed,
+	.get_nmi_mask = svm_get_nmi_mask,
+	.set_nmi_mask = svm_set_nmi_mask,
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 22fcd27a0b58..778f059ae423 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2639,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 				GUEST_INTR_STATE_NMI));
 }
 
+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+	if (!cpu_has_virtual_nmis())
+		return to_vmx(vcpu)->soft_vnmi_blocked;
+	else
+		return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			  GUEST_INTR_STATE_NMI);
+}
+
+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!cpu_has_virtual_nmis()) {
+		if (vmx->soft_vnmi_blocked != masked) {
+			vmx->soft_vnmi_blocked = masked;
+			vmx->vnmi_blocked_time = 0;
+		}
+	} else {
+		if (masked)
+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+				      GUEST_INTR_STATE_NMI);
+		else
+			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+					GUEST_INTR_STATE_NMI);
+	}
+}
+
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -3985,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.queue_exception = vmx_queue_exception,
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.nmi_allowed = vmx_nmi_allowed,
+	.get_nmi_mask = vmx_get_nmi_mask,
+	.set_nmi_mask = vmx_set_nmi_mask,
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ba8958dca3c4..35eea30821d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1342,6 +1342,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
 	case KVM_CAP_XEN_HVM:
 	case KVM_CAP_ADJUST_CLOCK:
+	case KVM_CAP_VCPU_EVENTS:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1883,6 +1884,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+					       struct kvm_vcpu_events *events)
+{
+	vcpu_load(vcpu);
+
+	events->exception.injected = vcpu->arch.exception.pending;
+	events->exception.nr = vcpu->arch.exception.nr;
+	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
+	events->exception.error_code = vcpu->arch.exception.error_code;
+
+	events->interrupt.injected = vcpu->arch.interrupt.pending;
+	events->interrupt.nr = vcpu->arch.interrupt.nr;
+	events->interrupt.soft = vcpu->arch.interrupt.soft;
+
+	events->nmi.injected = vcpu->arch.nmi_injected;
+	events->nmi.pending = vcpu->arch.nmi_pending;
+	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+
+	events->sipi_vector = vcpu->arch.sipi_vector;
+
+	events->flags = 0;
+
+	vcpu_put(vcpu);
+}
+
+static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
+					      struct kvm_vcpu_events *events)
+{
+	if (events->flags)
+		return -EINVAL;
+
+	vcpu_load(vcpu);
+
+	vcpu->arch.exception.pending = events->exception.injected;
+	vcpu->arch.exception.nr = events->exception.nr;
+	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
+	vcpu->arch.exception.error_code = events->exception.error_code;
+
+	vcpu->arch.interrupt.pending = events->interrupt.injected;
+	vcpu->arch.interrupt.nr = events->interrupt.nr;
+	vcpu->arch.interrupt.soft = events->interrupt.soft;
+	if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
+		kvm_pic_clear_isr_ack(vcpu->kvm);
+
+	vcpu->arch.nmi_injected = events->nmi.injected;
+	vcpu->arch.nmi_pending = events->nmi.pending;
+	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
+
+	vcpu->arch.sipi_vector = events->sipi_vector;
+
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2040,6 +2096,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
 		break;
 	}
+	case KVM_GET_VCPU_EVENTS: {
+		struct kvm_vcpu_events events;
+
+		kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
+			break;
+		r = 0;
+		break;
+	}
+	case KVM_SET_VCPU_EVENTS: {
+		struct kvm_vcpu_events events;
+
+		r = -EFAULT;
+		if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
+			break;
+
+		r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 976f4d181858..92045a92d714 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -489,6 +489,9 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_ADJUST_CLOCK 39
 #define KVM_CAP_INTERNAL_ERROR_DATA 40
+#ifdef __KVM_HAVE_VCPU_EVENTS
+#define KVM_CAP_VCPU_EVENTS 41
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -672,6 +675,9 @@ struct kvm_clock_data {
 /* IA64 stack access */
 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
+/* Available with KVM_CAP_VCPU_EVENTS */
+#define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
+#define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
cgit v1.2.3


From d7b0b5eb3000c6fb902f08c619fcd673a23d8fab Mon Sep 17 00:00:00 2001
From: Carsten Otte <carsteno@de.ibm.com>
Date: Thu, 19 Nov 2009 14:21:16 +0100
Subject: KVM: s390: Make psw available on all exits, not just a subset

This patch moves s390 processor status word into the base kvm_run
struct and keeps it up-to date on all userspace exits.

The userspace ABI is broken by this, however there are no applications
in the wild using this.  A capability check is provided so users can
verify the updated API exists.

Cc: stable@kernel.org
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/include/asm/kvm.h |  3 ++-
 arch/s390/kvm/kvm-s390.c    | 25 +++++++++++++++++--------
 include/linux/kvm.h         |  8 ++++++--
 3 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index 3dfcaeb5d7f4..82b32a100c7d 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -1,6 +1,5 @@
 #ifndef __LINUX_KVM_S390_H
 #define __LINUX_KVM_S390_H
-
 /*
  * asm-s390/kvm.h - KVM s390 specific structures and definitions
  *
@@ -15,6 +14,8 @@
  */
 #include <linux/types.h>
 
+#define __KVM_S390
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 544505893c9f..f8bcaefd7d34 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -117,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
 int kvm_dev_ioctl_check_extension(long ext)
 {
+	int r;
+
 	switch (ext) {
+	case KVM_CAP_S390_PSW:
+		r = 1;
+		break;
 	default:
-		return 0;
+		r = 0;
 	}
+	return r;
 }
 
 /* Section: vm related */
@@ -420,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
 	vcpu_load(vcpu);
 	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
 		rc = -EBUSY;
-	else
-		vcpu->arch.sie_block->gpsw = psw;
+	else {
+		vcpu->run->psw_mask = psw.mask;
+		vcpu->run->psw_addr = psw.addr;
+	}
 	vcpu_put(vcpu);
 	return rc;
 }
@@ -509,9 +517,6 @@ rerun_vcpu:
 
 	switch (kvm_run->exit_reason) {
 	case KVM_EXIT_S390_SIEIC:
-		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
-		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
-		break;
 	case KVM_EXIT_UNKNOWN:
 	case KVM_EXIT_INTR:
 	case KVM_EXIT_S390_RESET:
@@ -520,6 +525,9 @@ rerun_vcpu:
 		BUG();
 	}
 
+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+
 	might_fault();
 
 	do {
@@ -539,8 +547,6 @@ rerun_vcpu:
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
-		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
-		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
 		rc = 0;
@@ -552,6 +558,9 @@ rerun_vcpu:
 		rc = 0;
 	}
 
+	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
+	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 92045a92d714..2d241da07236 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -181,6 +181,11 @@ struct kvm_run {
 	__u64 cr8;
 	__u64 apic_base;
 
+#ifdef __KVM_S390
+	/* the processor status word for s390 */
+	__u64 psw_mask; /* psw upper half */
+	__u64 psw_addr; /* psw lower half */
+#endif
 	union {
 		/* KVM_EXIT_UNKNOWN */
 		struct {
@@ -232,8 +237,6 @@ struct kvm_run {
 		/* KVM_EXIT_S390_SIEIC */
 		struct {
 			__u8 icptcode;
-			__u64 mask; /* psw upper half */
-			__u64 addr; /* psw lower half */
 			__u16 ipa;
 			__u32 ipb;
 		} s390_sieic;
@@ -492,6 +495,7 @@ struct kvm_ioeventfd {
 #ifdef __KVM_HAVE_VCPU_EVENTS
 #define KVM_CAP_VCPU_EVENTS 41
 #endif
+#define KVM_CAP_S390_PSW 42
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 6013efd8860bf15c1f86f365332642cfe557152f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 19 Nov 2009 15:36:45 +0900
Subject: libata: retry failed FLUSH if device didn't fail it

If ATA device failed FLUSH, it means that the device failed to write
out some amount of data and the error needs to be reported to upper
layers. As retries can't recover the lost data, FLUSH failures need to
be reported immediately in general.

However, if FLUSH fails due to transmission errors, the FLUSH needs to
be retried; otherwise, filesystems may switch to RO mode and/or raid
array may drop a drive for a random transmission glitch.

This condition can be rather easily reproduced on certain ahci
controllers which go through a PHY event after powersave mode switch +
ext4 combination.  Powersave mode switch is often closely followed by
flush from the filesystem failing the FLUSH with ATA bus error which
makes the filesystem code believe that data is lost and drop to RO
mode.  This was reported in the following bugzilla bug.

  http://bugzilla.kernel.org/show_bug.cgi?id=14543

This patch makes libata EH retry FLUSH if it wasn't failed by the
device.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andrey Vihrov <andrey.vihrov@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-eh.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h  |  2 +-
 2 files changed, 95 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bba2ae5df1c2..0ea97c942ced 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -110,6 +110,13 @@ static const unsigned long ata_eh_identify_timeouts[] = {
 	ULONG_MAX,
 };
 
+static const unsigned long ata_eh_flush_timeouts[] = {
+	15000,	/* be generous with flush */
+	15000,  /* ditto */
+	30000,	/* and even more generous */
+	ULONG_MAX,
+};
+
 static const unsigned long ata_eh_other_timeouts[] = {
 	 5000,	/* same rationale as identify timeout */
 	10000,	/* ditto */
@@ -147,6 +154,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
 	  .timeouts = ata_eh_other_timeouts, },
 	{ .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
 	  .timeouts = ata_eh_other_timeouts, },
+	{ .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
+	  .timeouts = ata_eh_flush_timeouts },
 };
 #undef CMDS
 
@@ -3112,6 +3121,82 @@ static int atapi_eh_clear_ua(struct ata_device *dev)
 	return 0;
 }
 
+/**
+ *	ata_eh_maybe_retry_flush - Retry FLUSH if necessary
+ *	@dev: ATA device which may need FLUSH retry
+ *
+ *	If @dev failed FLUSH, it needs to be reported upper layer
+ *	immediately as it means that @dev failed to remap and already
+ *	lost at least a sector and further FLUSH retrials won't make
+ *	any difference to the lost sector.  However, if FLUSH failed
+ *	for other reasons, for example transmission error, FLUSH needs
+ *	to be retried.
+ *
+ *	This function determines whether FLUSH failure retry is
+ *	necessary and performs it if so.
+ *
+ *	RETURNS:
+ *	0 if EH can continue, -errno if EH needs to be repeated.
+ */
+static int ata_eh_maybe_retry_flush(struct ata_device *dev)
+{
+	struct ata_link *link = dev->link;
+	struct ata_port *ap = link->ap;
+	struct ata_queued_cmd *qc;
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+	int rc = 0;
+
+	/* did flush fail for this device? */
+	if (!ata_tag_valid(link->active_tag))
+		return 0;
+
+	qc = __ata_qc_from_tag(ap, link->active_tag);
+	if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
+			       qc->tf.command != ATA_CMD_FLUSH))
+		return 0;
+
+	/* if the device failed it, it should be reported to upper layers */
+	if (qc->err_mask & AC_ERR_DEV)
+		return 0;
+
+	/* flush failed for some other reason, give it another shot */
+	ata_tf_init(dev, &tf);
+
+	tf.command = qc->tf.command;
+	tf.flags |= ATA_TFLAG_DEVICE;
+	tf.protocol = ATA_PROT_NODATA;
+
+	ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n",
+		       tf.command, qc->err_mask);
+
+	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+	if (!err_mask) {
+		/*
+		 * FLUSH is complete but there's no way to
+		 * successfully complete a failed command from EH.
+		 * Making sure retry is allowed at least once and
+		 * retrying it should do the trick - whatever was in
+		 * the cache is already on the platter and this won't
+		 * cause infinite loop.
+		 */
+		qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
+	} else {
+		ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n",
+			       err_mask);
+		rc = -EIO;
+
+		/* if device failed it, report it to upper layers */
+		if (err_mask & AC_ERR_DEV) {
+			qc->err_mask |= AC_ERR_DEV;
+			qc->result_tf = tf;
+			if (!(ap->pflags & ATA_PFLAG_FROZEN))
+				rc = 0;
+		}
+	}
+	return rc;
+}
+
 static int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
@@ -3455,6 +3540,15 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 			}
 		}
 
+		/* retry flush if necessary */
+		ata_for_each_dev(dev, link, ALL) {
+			if (dev->class != ATA_DEV_ATA)
+				continue;
+			rc = ata_eh_maybe_retry_flush(dev);
+			if (rc)
+				goto dev_fail;
+		}
+
 		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
 			ata_for_each_dev(dev, link, ALL)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 87698640c091..ba07e84c9840 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -365,7 +365,7 @@ enum {
 	/* This should match the actual table size of
 	 * ata_eh_cmd_timeout_table in libata-eh.c.
 	 */
-	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,
+	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
 
 	/* Horkage types. May be set by libata or controller on drives
 	   (some horkage may be drive/controller pair dependant */
-- 
cgit v1.2.3


From 18f0f97850059303ed73b1f02084f55ca330a80c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 17 Nov 2009 10:00:47 -0500
Subject: libata: add translation for SCSI WRITE SAME (aka TRIM support)

Add support for the ATA TRIM command in libata.  We translate a WRITE SAME 16
command with the unmap bit set into an ATA TRIM command and export enough
information in READ CAPACITY 16 and the block limits EVPD page so that the new
SCSI layer discard support will driver this for us.

Note that I hardcode the WRITE_SAME_16 opcode for now as the patch to introduce
the symbolic is not in 2.6.32 yet but only in the SCSI tree - as soon as it is
merged we can fix it up to properly use the symbolic name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ata.h       | 13 +++++++
 2 files changed, 111 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 512a3ee8caf6..340a616d226b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -47,6 +47,7 @@
 #include <linux/hdreg.h>
 #include <linux/uaccess.h>
 #include <linux/suspend.h>
+#include <asm/unaligned.h>
 
 #include "libata.h"
 
@@ -1963,6 +1964,7 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
 		0x80,	/* page 0x80, unit serial no page */
 		0x83,	/* page 0x83, device ident page */
 		0x89,	/* page 0x89, ata info page */
+		0xb0,	/* page 0xb0, block limits page */
 		0xb1,	/* page 0xb1, block device characteristics page */
 	};
 
@@ -2084,6 +2086,41 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
 	return 0;
 }
 
+static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
+{
+	u32 min_io_sectors;
+
+	rbuf[1] = 0xb0;
+	rbuf[3] = 0x3c;		/* required VPD size with unmap support */
+
+	/*
+	 * Optimal transfer length granularity.
+	 *
+	 * This is always one physical block, but for disks with a smaller
+	 * logical than physical sector size we need to figure out what the
+	 * latter is.
+	 */
+	if (ata_id_has_large_logical_sectors(args->id))
+		min_io_sectors = ata_id_logical_per_physical_sectors(args->id);
+	else
+		min_io_sectors = 1;
+	put_unaligned_be16(min_io_sectors, &rbuf[6]);
+
+	/*
+	 * Optimal unmap granularity.
+	 *
+	 * The ATA spec doesn't even know about a granularity or alignment
+	 * for the TRIM command.  We can leave away most of the unmap related
+	 * VPD page entries, but we have specifify a granularity to signal
+	 * that we support some form of unmap - in thise case via WRITE SAME
+	 * with the unmap bit set.
+	 */
+	if (ata_id_has_trim(args->id))
+		put_unaligned_be32(1, &rbuf[28]);
+
+	return 0;
+}
+
 static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
 {
 	int form_factor = ata_id_form_factor(args->id);
@@ -2373,6 +2410,9 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 		rbuf[13] = log_per_phys;
 		rbuf[14] = (lowest_aligned >> 8) & 0x3f;
 		rbuf[15] = lowest_aligned;
+
+		if (ata_id_has_trim(args->id))
+			rbuf[14] |= 0x80;
 	}
 
 	return 0;
@@ -2895,6 +2935,58 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 	return 1;
 }
 
+static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
+{
+	struct ata_taskfile *tf = &qc->tf;
+	struct scsi_cmnd *scmd = qc->scsicmd;
+	struct ata_device *dev = qc->dev;
+	const u8 *cdb = scmd->cmnd;
+	u64 block;
+	u32 n_block;
+	u32 size;
+	void *buf;
+
+	/* we may not issue DMA commands if no DMA mode is set */
+	if (unlikely(!dev->dma_mode))
+		goto invalid_fld;
+
+	if (unlikely(scmd->cmd_len < 16))
+		goto invalid_fld;
+	scsi_16_lba_len(cdb, &block, &n_block);
+
+	/* for now we only support WRITE SAME with the unmap bit set */
+	if (unlikely(!(cdb[1] & 0x8)))
+		goto invalid_fld;
+
+	/*
+	 * WRITE SAME always has a sector sized buffer as payload, this
+	 * should never be a multiple entry S/G list.
+	 */
+	if (!scsi_sg_count(scmd))
+		goto invalid_fld;
+
+	buf = page_address(sg_page(scsi_sglist(scmd)));
+	size = ata_set_lba_range_entries(buf, 512 / 8, block, n_block);
+
+	tf->protocol = ATA_PROT_DMA;
+	tf->hob_feature = 0;
+	tf->feature = ATA_DSM_TRIM;
+	tf->hob_nsect = (size / 512) >> 8;
+	tf->nsect = size / 512;
+	tf->command = ATA_CMD_DSM;
+	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
+		     ATA_TFLAG_WRITE;
+
+	ata_qc_set_pc_nbytes(qc);
+
+	return 0;
+
+ invalid_fld:
+	ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x00);
+	/* "Invalid field in cdb" */
+	return 1;
+}
+
 /**
  *	ata_get_xlat_func - check if SCSI to ATA translation is possible
  *	@dev: ATA device
@@ -2919,6 +3011,9 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 	case WRITE_16:
 		return ata_scsi_rw_xlat;
 
+	case 0x93 /*WRITE_SAME_16*/:
+		return ata_scsi_write_same_xlat;
+
 	case SYNCHRONIZE_CACHE:
 		if (ata_try_flush_cache(dev))
 			return ata_scsi_flush_xlat;
@@ -3108,6 +3203,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 		case 0x89:
 			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
 			break;
+		case 0xb0:
+			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0);
+			break;
 		case 0xb1:
 			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
 			break;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 4fb357312b3b..e2595e877e44 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -87,6 +87,7 @@ enum {
 	ATA_ID_HW_CONFIG	= 93,
 	ATA_ID_SPG		= 98,
 	ATA_ID_LBA_CAPACITY_2	= 100,
+	ATA_ID_SECTOR_SIZE	= 106,
 	ATA_ID_LAST_LUN		= 126,
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
@@ -638,6 +639,18 @@ static inline int ata_id_flush_ext_enabled(const u16 *id)
 	return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400;
 }
 
+static inline int ata_id_has_large_logical_sectors(const u16 *id)
+{
+	if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000)
+		return 0;
+	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
+}
+
+static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+{
+	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+}
+
 static inline int ata_id_has_lba48(const u16 *id)
 {
 	if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
-- 
cgit v1.2.3


From 98262f2762f0067375f83824d81ea929e37e6bfe Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Thu, 3 Dec 2009 09:24:48 +0100
Subject: block: Allow devices to indicate whether discarded blocks are zeroed

The discard ioctl is used by mkfs utilities to clear a block device
prior to putting metadata down.  However, not all devices return zeroed
blocks after a discard.  Some drives return stale data, potentially
containing old superblocks.  It is therefore important to know whether
discarded blocks are properly zeroed.

Both ATA and SCSI drives have configuration bits that indicate whether
zeroes are returned after a discard operation.  Implement a block level
interface that allows this information to be bubbled up the stack and
queried via a new block device ioctl.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   |  2 ++
 block/blk-sysfs.c      | 11 +++++++++++
 block/compat_ioctl.c   |  2 ++
 block/ioctl.c          |  2 ++
 include/linux/blkdev.h | 14 ++++++++++++++
 include/linux/fs.h     |  1 +
 6 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1ebc1fdb9144..dd1f1e0e196f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -101,6 +101,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
+	lim->discard_zeroes_data = -1;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -544,6 +545,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	t->io_min = max(t->io_min, b->io_min);
 	t->no_cluster |= b->no_cluster;
+	t->discard_zeroes_data &= b->discard_zeroes_data;
 
 	/* Bottom device offset aligned? */
 	if (offset &&
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3147145edc15..8606c9543fdd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -136,6 +136,11 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 	return queue_var_show(q->limits.max_discard_sectors << 9, page);
 }
 
+static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_discard_zeroes_data(q), page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -313,6 +318,11 @@ static struct queue_sysfs_entry queue_discard_max_entry = {
 	.show = queue_discard_max_show,
 };
 
+static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
+	.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
+	.show = queue_discard_zeroes_data_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nonrot_show,
@@ -350,6 +360,7 @@ static struct attribute *default_attrs[] = {
 	&queue_io_opt_entry.attr,
 	&queue_discard_granularity_entry.attr,
 	&queue_discard_max_entry.attr,
+	&queue_discard_zeroes_data_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 9bd086c1a4d5..4eb8e9ea4af5 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -747,6 +747,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		return compat_put_uint(arg, bdev_io_opt(bdev));
 	case BLKALIGNOFF:
 		return compat_put_int(arg, bdev_alignment_offset(bdev));
+	case BLKDISCARDZEROES:
+		return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/ioctl.c b/block/ioctl.c
index 1f4d1de12b09..be48ea51faee 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -280,6 +280,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_uint(arg, bdev_io_opt(bdev));
 	case BLKALIGNOFF:
 		return put_int(arg, bdev_alignment_offset(bdev));
+	case BLKDISCARDZEROES:
+		return put_uint(arg, bdev_discard_zeroes_data(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e727f6c44c44..784a919aa0d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -322,6 +322,7 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		no_cluster;
+	signed char		discard_zeroes_data;
 };
 
 struct request_queue
@@ -1150,6 +1151,19 @@ static inline int queue_sector_discard_alignment(struct request_queue *q,
 		& (q->limits.discard_granularity - 1);
 }
 
+static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
+{
+	if (q->limits.discard_zeroes_data == 1)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
+{
+	return queue_discard_zeroes_data(bdev_get_queue(bdev));
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79cea8051736..891f7d642e5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -304,6 +304,7 @@ struct inodes_stat_t {
 #define BLKIOOPT _IO(0x12,121)
 #define BLKALIGNOFF _IO(0x12,122)
 #define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 29 Sep 2009 12:27:23 +0100
Subject: VFS: Add forget_all_cached_acls()

This is required for cluster filesystems which want to use
cached ACLs so that they can invalidate the cache when
required.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Alexander Viro <aviro@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
---
 include/linux/posix_acl.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 065a3652a3ea..67608161df6b 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type)
 	if (old != ACL_NOT_CACHED)
 		posix_acl_release(old);
 }
+
+static inline void forget_all_cached_acls(struct inode *inode)
+{
+	struct posix_acl *old_access, *old_default;
+	spin_lock(&inode->i_lock);
+	old_access = inode->i_acl;
+	old_default = inode->i_default_acl;
+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+	spin_unlock(&inode->i_lock);
+	if (old_access != ACL_NOT_CACHED)
+		posix_acl_release(old_access);
+	if (old_default != ACL_NOT_CACHED)
+		posix_acl_release(old_default);
+}
 #endif
 
 static inline void cache_no_acl(struct inode *inode)
-- 
cgit v1.2.3


From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 28 Sep 2009 12:35:17 +0100
Subject: VFS: Export dquot_send_warning

Sending a message to userspace in a generic format to warn
of events (e.g. quota exceeded) in the quota subsystem is
a generically useful feature. This patch makes some minor
changes to the send_message function from dquot.c renaming
it quota_send_message, moving it to quota.c and exporting it
for use by filesystems which do not use the dquot code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/quota/Kconfig      |  2 +-
 fs/quota/dquot.c      | 93 +++++----------------------------------------------
 fs/quota/quota.c      | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/quota.h | 11 ++++++
 4 files changed, 114 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 8047e01ef46b..353e78a9ebee 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -17,7 +17,7 @@ config QUOTA
 
 config QUOTA_NETLINK_INTERFACE
 	bool "Report quota messages through netlink interface"
-	depends on QUOTA && NET
+	depends on QUOTACTL && NET
 	help
 	  If you say Y here, quota warnings (about exceeding softlimit, reaching
 	  hardlimit, etc.) will be reported through netlink interface. If unsure,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39b49c42a7ed..9b6ad908dcb2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -77,10 +77,6 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/writeback.h> /* for inode_lock, oddly enough.. */
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#endif
 
 #include <asm/uaccess.h>
 
@@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype)
 }
 #endif
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-
-/* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = 0,
-	.name = "VFS_DQUOT",
-	.version = 1,
-	.maxattr = QUOTA_NL_A_MAX,
-};
-
-/* Send warning to userspace about user which exceeded quota */
-static void send_warning(const struct dquot *dquot, const char warntype)
-{
-	static atomic_t seq;
-	struct sk_buff *skb;
-	void *msg_head;
-	int ret;
-	int msg_size = 4 * nla_total_size(sizeof(u32)) +
-		       2 * nla_total_size(sizeof(u64));
-
-	/* We have to allocate using GFP_NOFS as we are called from a
-	 * filesystem performing write and thus further recursion into
-	 * the fs to free some data could cause deadlocks. */
-	skb = genlmsg_new(msg_size, GFP_NOFS);
-	if (!skb) {
-		printk(KERN_ERR
-		  "VFS: Not enough memory to send quota warning.\n");
-		return;
-	}
-	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
-			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
-	if (!msg_head) {
-		printk(KERN_ERR
-		  "VFS: Cannot store netlink header in quota warning.\n");
-		goto err_out;
-	}
-	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
-		MAJOR(dquot->dq_sb->s_dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
-		MINOR(dquot->dq_sb->s_dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
-	if (ret)
-		goto attr_err_out;
-	genlmsg_end(skb, msg_head);
-
-	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
-	return;
-attr_err_out:
-	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
-err_out:
-	kfree_skb(skb);
-}
-#endif
 /*
  * Write warnings to the console and send warning messages over netlink.
  *
@@ -1145,18 +1074,20 @@ err_out:
  */
 static void flush_warnings(struct dquot *const *dquots, char *warntype)
 {
+	struct dquot *dq;
 	int i;
 
-	for (i = 0; i < MAXQUOTAS; i++)
-		if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN &&
-		    !warning_issued(dquots[i], warntype[i])) {
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dq = dquots[i];
+		if (dq && warntype[i] != QUOTA_NL_NOWARN &&
+		    !warning_issued(dq, warntype[i])) {
 #ifdef CONFIG_PRINT_QUOTA_WARNING
-			print_warning(dquots[i], warntype[i]);
-#endif
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-			send_warning(dquots[i], warntype[i]);
+			print_warning(dq, warntype[i]);
 #endif
+			quota_send_warning(dq->dq_type, dq->dq_id,
+					   dq->dq_sb->s_dev, warntype[i]);
 		}
+	}
 }
 
 static int ignore_hardlimit(struct dquot *dquot)
@@ -2607,12 +2538,6 @@ static int __init dquot_init(void)
 
 	register_shrinker(&dqcache_shrinker);
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-	if (genl_register_family(&quota_genl_family) != 0)
-		printk(KERN_ERR
-		       "VFS: Failed to create quota netlink interface.\n");
-#endif
-
 	return 0;
 }
 module_init(dquot_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95c5b42384b2..ee91e2756950 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,8 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
 
 /* Check validity of generic quotactl commands */
 static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
@@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 	return ret;
 }
 #endif
+
+
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+
+/* Netlink family structure for quota */
+static struct genl_family quota_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "VFS_DQUOT",
+	.version = 1,
+	.maxattr = QUOTA_NL_A_MAX,
+};
+
+/**
+ * quota_send_warning - Send warning to userspace about exceeded quota
+ * @type: The quota type: USRQQUOTA, GRPQUOTA,...
+ * @id: The user or group id of the quota that was exceeded
+ * @dev: The device on which the fs is mounted (sb->s_dev)
+ * @warntype: The type of the warning: QUOTA_NL_...
+ *
+ * This can be used by filesystems (including those which don't use
+ * dquot) to send a message to userspace relating to quota limits.
+ *
+ */
+
+void quota_send_warning(short type, unsigned int id, dev_t dev,
+			const char warntype)
+{
+	static atomic_t seq;
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+	int msg_size = 4 * nla_total_size(sizeof(u32)) +
+		       2 * nla_total_size(sizeof(u64));
+
+	/* We have to allocate using GFP_NOFS as we are called from a
+	 * filesystem performing write and thus further recursion into
+	 * the fs to free some data could cause deadlocks. */
+	skb = genlmsg_new(msg_size, GFP_NOFS);
+	if (!skb) {
+		printk(KERN_ERR
+		  "VFS: Not enough memory to send quota warning.\n");
+		return;
+	}
+	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
+			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
+	if (!msg_head) {
+		printk(KERN_ERR
+		  "VFS: Cannot store netlink header in quota warning.\n");
+		goto err_out;
+	}
+	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
+	if (ret)
+		goto attr_err_out;
+	genlmsg_end(skb, msg_head);
+
+	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
+	return;
+attr_err_out:
+	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
+err_out:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(quota_send_warning);
+
+static int __init quota_init(void)
+{
+	if (genl_register_family(&quota_genl_family) != 0)
+		printk(KERN_ERR
+		       "VFS: Failed to create quota netlink interface.\n");
+	return 0;
+};
+
+module_init(quota_init);
+#endif
+
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 78c48895b12a..ce9a9b2e5cd4 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 	return flags >> _DQUOT_STATE_FLAGS;
 }
 
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+extern void quota_send_warning(short type, unsigned int id, dev_t dev,
+			       const char warntype);
+#else
+static inline void quota_send_warning(short type, unsigned int id, dev_t dev,
+				      const char warntype)
+{
+	return;
+}
+#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */
+
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct mutex dqio_mutex;		/* lock device while I/O in progress */
-- 
cgit v1.2.3


From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 6 Nov 2009 16:20:51 +0000
Subject: GFS2: Tag all metadata with jid

There are two spare field in the header common to all GFS2
metadata. One is just the right size to fit a journal id
in it, and this patch updates the journal code so that each
time a metadata block is modified, we tag it with the journal
id of the node which is performing the modification.

The reason for this is that it should make it much easier to
debug issues which arise if we can tell which node was the
last to modify a particular metadata block.

Since the field is updated before the block is written into
the journal, each journal should only contain metadata which
is tagged with its own journal id. The one exception to this
is the journal header block, which might have a different node's
id in it, if that journal was recovered by another node in the
cluster.

Thus each journal will contain a record of which nodes recovered
it, via the journal header.

The other field in the metadata header could potentially be
used to hold information about what kind of operation was
performed, but for the time being we just zero it on each
transaction so that if we use it for that in future, we'll
know that the information (where it exists) is reliable.

I did consider using the other field to hold the journal
sequence number, however since in GFS2's journaling we write
the modified data into the journal and not the original
data, this gives no information as to what action caused the
modification, so I think we can probably come up with a better
use for those 64 bits in the future.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c             | 2 --
 fs/gfs2/log.c               | 2 ++
 fs/gfs2/lops.c              | 4 ++++
 fs/gfs2/recovery.c          | 2 ++
 include/linux/gfs2_ondisk.h | 6 +++++-
 5 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6380cd9314b0..26ba2a4c4a2d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-	str->di_header.__pad0 = 0;
 	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-	str->di_header.__pad1 = 0;
 	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 13c6237c5f67..4511b08fc451 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	memset(lh, 0, sizeof(struct gfs2_log_header));
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.__pad0 = cpu_to_be64(0);
 	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
 	lh->lh_flags = cpu_to_be32(flags);
 	lh->lh_tail = cpu_to_be32(tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9969ff062c5b..de97632ba32f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
 	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+	struct gfs2_meta_header *mh;
 	struct gfs2_trans *tr;
 
 	lock_buffer(bd->bd_bh);
@@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
 	gfs2_meta_check(sdp, bd->bd_bh);
 	gfs2_pin(sdp, bd->bd_bh);
+	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+	mh->__pad0 = cpu_to_be64(0);
+	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	sdp->sd_log_num_buf++;
 	list_add(&le->le_list, &sdp->sd_log_le_buf);
 	tr->tr_num_buf_new++;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 09fa31965576..4b9bece3d437 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
 	memset(lh, 0, sizeof(struct gfs2_log_header));
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.__pad0 = cpu_to_be64(0);
 	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
 	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
 	lh->lh_blkno = cpu_to_be32(lblock);
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index b80c88dedbbb..81f90a59cda6 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -81,7 +81,11 @@ struct gfs2_meta_header {
 	__be32 mh_type;
 	__be64 __pad0;		/* Was generation number in gfs1 */
 	__be32 mh_format;
-	__be32 __pad1;		/* Was incarnation number in gfs1 */
+	/* This union is to keep userspace happy */
+	union {
+		__be32 mh_jid;		/* Was incarnation number in gfs1 */
+		__be32 __pad1;
+	};
 };
 
 /*
-- 
cgit v1.2.3


From b17621fed6aa039387e35f9b4d34d98f213e5673 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@gmail.com>
Date: Thu, 3 Dec 2009 13:54:25 +0100
Subject: writeback: introduce wbc.for_background

It will lower the flush priority for NFS, and maybe more in future.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c         | 1 +
 fs/nfs/write.c            | 2 +-
 include/linux/writeback.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0306c8e7d6b5..0793961f7699 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -738,6 +738,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		.sync_mode		= args->sync_mode,
 		.older_than_this	= NULL,
 		.for_kupdate		= args->for_kupdate,
+		.for_background		= args->for_background,
 		.range_cyclic		= args->range_cyclic,
 	};
 	unsigned long oldest_jif;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53eb26c16b50..c84b5cc1a943 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc)
 {
 	if (wbc->for_reclaim)
 		return FLUSH_HIGHPRI | FLUSH_STABLE;
-	if (wbc->for_kupdate)
+	if (wbc->for_kupdate || wbc->for_background)
 		return FLUSH_LOWPRI;
 	return 0;
 }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 66ebddcff664..705f01fe413a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -49,6 +49,7 @@ struct writeback_control {
 	unsigned nonblocking:1;		/* Don't get stuck on request queues */
 	unsigned encountered_congestion:1; /* An output: a queue is full */
 	unsigned for_kupdate:1;		/* A kupdate writeback */
+	unsigned for_background:1;	/* A background writeback */
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned more_io:1;		/* more io to be dispatched */
-- 
cgit v1.2.3


From 31e4c28d95e64f2d5d3c497a3ecf37c62de635b4 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Thu, 3 Dec 2009 12:59:42 -0500
Subject: blkio: Introduce blkio controller cgroup interface

o This is basic implementation of blkio controller cgroup interface. This is
  the common interface visible to user space and should be used by different
  IO control policies as we implement those.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/Kconfig                 |  13 ++++
 block/Kconfig.iosched         |   1 +
 block/Makefile                |   1 +
 block/blk-cgroup.c            | 177 ++++++++++++++++++++++++++++++++++++++++++
 block/blk-cgroup.h            |  58 ++++++++++++++
 include/linux/cgroup_subsys.h |   6 ++
 include/linux/iocontext.h     |   4 +
 7 files changed, 260 insertions(+)
 create mode 100644 block/blk-cgroup.c
 create mode 100644 block/blk-cgroup.h

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 9be0b56eaee1..6ba1a8e3388b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,19 @@ config BLK_DEV_INTEGRITY
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_CGROUP
+	bool
+	depends on CGROUPS
+	default n
+	---help---
+	Generic block IO controller cgroup interface. This is the common
+	cgroup interface which should be used by various IO controlling
+	policies.
+
+	Currently, CFQ IO scheduler uses it to recognize task groups and
+	control disk bandwidth allocation (proportional time slice allocation)
+	to such task groups.
+
 endif # BLOCK
 
 config BLOCK_COMPAT
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 8bd105115a69..be0280deec29 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
 
 config IOSCHED_CFQ
 	tristate "CFQ I/O scheduler"
+	select BLK_CGROUP
 	default y
 	---help---
 	  The CFQ I/O scheduler tries to distribute bandwidth equally
diff --git a/block/Makefile b/block/Makefile
index 7914108952f2..cb2d515ebd6e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
+obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
new file mode 100644
index 000000000000..4f6afd76ec59
--- /dev/null
+++ b/block/blk-cgroup.c
@@ -0,0 +1,177 @@
+/*
+ * Common Block IO controller cgroup interface
+ *
+ * Based on ideas and code from CFQ, CFS and BFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
+ * 	              Nauman Rafique <nauman@google.com>
+ */
+#include <linux/ioprio.h>
+#include "blk-cgroup.h"
+
+struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
+
+struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
+{
+	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
+			    struct blkio_cgroup, css);
+}
+
+void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
+				struct blkio_group *blkg, void *key)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkcg->lock, flags);
+	rcu_assign_pointer(blkg->key, key);
+	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
+	spin_unlock_irqrestore(&blkcg->lock, flags);
+}
+
+int blkiocg_del_blkio_group(struct blkio_group *blkg)
+{
+	/* Implemented later */
+	return 0;
+}
+
+/* called under rcu_read_lock(). */
+struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
+{
+	struct blkio_group *blkg;
+	struct hlist_node *n;
+	void *__key;
+
+	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		__key = blkg->key;
+		if (__key == key)
+			return blkg;
+	}
+
+	return NULL;
+}
+
+#define SHOW_FUNCTION(__VAR)						\
+static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup,		\
+				       struct cftype *cftype)		\
+{									\
+	struct blkio_cgroup *blkcg;					\
+									\
+	blkcg = cgroup_to_blkio_cgroup(cgroup);				\
+	return (u64)blkcg->__VAR;					\
+}
+
+SHOW_FUNCTION(weight);
+#undef SHOW_FUNCTION
+
+static int
+blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
+{
+	struct blkio_cgroup *blkcg;
+
+	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
+		return -EINVAL;
+
+	blkcg = cgroup_to_blkio_cgroup(cgroup);
+	blkcg->weight = (unsigned int)val;
+	return 0;
+}
+
+struct cftype blkio_files[] = {
+	{
+		.name = "weight",
+		.read_u64 = blkiocg_weight_read,
+		.write_u64 = blkiocg_weight_write,
+	},
+};
+
+static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+	return cgroup_add_files(cgroup, subsys, blkio_files,
+				ARRAY_SIZE(blkio_files));
+}
+
+static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+
+	free_css_id(&blkio_subsys, &blkcg->css);
+	kfree(blkcg);
+}
+
+static struct cgroup_subsys_state *
+blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+	struct blkio_cgroup *blkcg, *parent_blkcg;
+
+	if (!cgroup->parent) {
+		blkcg = &blkio_root_cgroup;
+		goto done;
+	}
+
+	/* Currently we do not support hierarchy deeper than two level (0,1) */
+	parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent);
+	if (css_depth(&parent_blkcg->css) > 0)
+		return ERR_PTR(-EINVAL);
+
+	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
+	if (!blkcg)
+		return ERR_PTR(-ENOMEM);
+
+	blkcg->weight = BLKIO_WEIGHT_DEFAULT;
+done:
+	spin_lock_init(&blkcg->lock);
+	INIT_HLIST_HEAD(&blkcg->blkg_list);
+
+	return &blkcg->css;
+}
+
+/*
+ * We cannot support shared io contexts, as we have no mean to support
+ * two tasks with the same ioc in two different groups without major rework
+ * of the main cic data structures.  For now we allow a task to change
+ * its cgroup only if it's the only owner of its ioc.
+ */
+static int blkiocg_can_attach(struct cgroup_subsys *subsys,
+				struct cgroup *cgroup, struct task_struct *tsk,
+				bool threadgroup)
+{
+	struct io_context *ioc;
+	int ret = 0;
+
+	/* task_lock() is needed to avoid races with exit_io_context() */
+	task_lock(tsk);
+	ioc = tsk->io_context;
+	if (ioc && atomic_read(&ioc->nr_tasks) > 1)
+		ret = -EINVAL;
+	task_unlock(tsk);
+
+	return ret;
+}
+
+static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
+				struct cgroup *prev, struct task_struct *tsk,
+				bool threadgroup)
+{
+	struct io_context *ioc;
+
+	task_lock(tsk);
+	ioc = tsk->io_context;
+	if (ioc)
+		ioc->cgroup_changed = 1;
+	task_unlock(tsk);
+}
+
+struct cgroup_subsys blkio_subsys = {
+	.name = "blkio",
+	.create = blkiocg_create,
+	.can_attach = blkiocg_can_attach,
+	.attach = blkiocg_attach,
+	.destroy = blkiocg_destroy,
+	.populate = blkiocg_populate,
+	.subsys_id = blkio_subsys_id,
+	.use_id = 1,
+};
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
new file mode 100644
index 000000000000..ba5703f69b42
--- /dev/null
+++ b/block/blk-cgroup.h
@@ -0,0 +1,58 @@
+#ifndef _BLK_CGROUP_H
+#define _BLK_CGROUP_H
+/*
+ * Common Block IO controller cgroup interface
+ *
+ * Based on ideas and code from CFQ, CFS and BFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
+ * 	              Nauman Rafique <nauman@google.com>
+ */
+
+#include <linux/cgroup.h>
+
+struct blkio_cgroup {
+	struct cgroup_subsys_state css;
+	unsigned int weight;
+	spinlock_t lock;
+	struct hlist_head blkg_list;
+};
+
+struct blkio_group {
+	/* An rcu protected unique identifier for the group */
+	void *key;
+	struct hlist_node blkcg_node;
+};
+
+#define BLKIO_WEIGHT_MIN	100
+#define BLKIO_WEIGHT_MAX	1000
+#define BLKIO_WEIGHT_DEFAULT	500
+
+#ifdef CONFIG_BLK_CGROUP
+extern struct blkio_cgroup blkio_root_cgroup;
+extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
+extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
+				struct blkio_group *blkg, void *key);
+extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
+extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
+						void *key);
+#else
+static inline struct blkio_cgroup *
+cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
+
+static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
+			struct blkio_group *blkg, void *key)
+{
+}
+
+static inline int
+blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
+
+static inline struct blkio_group *
+blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
+#endif
+#endif /* _BLK_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c8d31bacf46..ccefff02b6cb 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
 #endif
 
 /* */
+
+#ifdef CONFIG_BLK_CGROUP
+SUBSYS(blkio)
+#endif
+
+/* */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index eb73632440f1..d61b0b8b5cd1 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -68,6 +68,10 @@ struct io_context {
 	unsigned short ioprio;
 	unsigned short ioprio_changed;
 
+#ifdef CONFIG_BLK_CGROUP
+	unsigned short cgroup_changed;
+#endif
+
 	/*
 	 * For request batching
 	 */
-- 
cgit v1.2.3


From 8e182a90f91456335756d2ce304ad470795d98e1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 30 Nov 2009 13:23:11 +0000
Subject: pata_piccolo: Driver for old Toshiba chipsets

We were never able to get docs for this out of Toshiba for years. Dave
Barnes produced a NetBSD driver however and from that we can fill in the
needed tables.

As we correct the PCI identifiers a bit also update the old ide generic driver
at the same time so it stays compiling.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig           |   9 +++
 drivers/ata/Makefile          |   1 +
 drivers/ata/ata_generic.c     |   5 +-
 drivers/ata/pata_piccolo.c    | 140 ++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide-pci-generic.c |   3 +-
 include/linux/pci_ids.h       |   7 ++-
 6 files changed, 160 insertions(+), 5 deletions(-)
 create mode 100644 drivers/ata/pata_piccolo.c

(limited to 'include/linux')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a47ca75aa078..676f08b004b3 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -667,6 +667,15 @@ config PATA_SIS
 
 	  If unsure, say N.
 
+config PATA_TOSHIBA
+	tristate "Toshiba Piccolo support (Experimental)"
+	depends on PCI && EXPERIMENTAL
+	help
+	  Support for the Toshiba Piccolo controllers. Currently only the
+	  primary channel is supported by this driver.
+
+	  If unsure, say N.
+
 config PATA_VIA
 	tristate "VIA PATA support"
 	depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 01e126f343b3..d909435e9d81 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_PATA_RZ1000)	+= pata_rz1000.o
 obj-$(CONFIG_PATA_SC1200)	+= pata_sc1200.o
 obj-$(CONFIG_PATA_SERVERWORKS)	+= pata_serverworks.o
 obj-$(CONFIG_PATA_SIL680)	+= pata_sil680.o
+obj-$(CONFIG_PATA_TOSHIBA)	+= pata_piccolo.o
 obj-$(CONFIG_PATA_VIA)		+= pata_via.o
 obj-$(CONFIG_PATA_WINBOND)	+= pata_sl82c105.o
 obj-$(CONFIG_PATA_WINBOND_VLB)	+= pata_winbond.o
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index ecfd22b4f1ce..12e26c3c68e3 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -168,9 +168,12 @@ static struct pci_device_id ata_generic[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), },
-	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO), },
+#if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE)
 	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3),  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5),  },
+#endif	
 	/* Must come last. If you add entries adjust this table appropriately */
 	{ PCI_ANY_ID,		PCI_ANY_ID,			   PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 1},
 	{ 0, },
diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c
new file mode 100644
index 000000000000..bfe0180f3efa
--- /dev/null
+++ b/drivers/ata/pata_piccolo.c
@@ -0,0 +1,140 @@
+/*
+ *  pata_piccolo.c - Toshiba Piccolo PATA/SATA controller driver.
+ *
+ *  This is basically an update to ata_generic.c to add Toshiba Piccolo support
+ *  then split out to keep ata_generic "clean".
+ *
+ *  Copyright 2005 Red Hat Inc, all rights reserved.
+ *
+ *  Elements from ide/pci/generic.c
+ *	    Copyright (C) 2001-2002	Andre Hedrick <andre@linux-ide.org>
+ *	    Portions (C) Copyright 2002  Red Hat Inc <alan@redhat.com>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *
+ *  The timing data tables/programming info are courtesy of the NetBSD driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <scsi/scsi_host.h>
+#include <linux/libata.h>
+
+#define DRV_NAME "pata_piccolo"
+#define DRV_VERSION "0.0.1"
+
+
+
+static void tosh_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+	static const u16 pio[6] = {	/* For reg 0x50 low word & E088 */
+		0x0566, 0x0433, 0x0311, 0x0201, 0x0200, 0x0100
+	};
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	u16 conf;
+	pci_read_config_word(pdev, 0x50, &conf);
+	conf &= 0xE088;
+	conf |= pio[adev->pio_mode - XFER_PIO_0];
+	pci_write_config_word(pdev, 0x50, conf);
+}
+
+static void tosh_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	u32 conf;
+	pci_read_config_dword(pdev, 0x5C, &conf);
+	conf &= 0x78FFE088;	/* Keep the other bits */
+	if (adev->dma_mode >= XFER_UDMA_0) {
+		int udma = adev->dma_mode - XFER_UDMA_0;
+		conf |= 0x80000000;
+		conf |= (udma + 2) << 28;
+		conf |= (2 - udma) * 0x111;	/* spread into three nibbles */
+	} else {
+		static const u32 mwdma[4] = {
+			0x0655, 0x0200, 0x0200, 0x0100
+		};
+		conf |= mwdma[adev->dma_mode - XFER_MW_DMA_0];
+	}
+	pci_write_config_dword(pdev, 0x5C, conf);
+}
+
+
+static struct scsi_host_template tosh_sht = {
+	ATA_BMDMA_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations tosh_port_ops = {
+	.inherits	= &ata_bmdma_port_ops,
+	.cable_detect	= ata_cable_unknown,
+	.set_piomode	= tosh_set_piomode,
+	.set_dmamode	= tosh_set_dmamode
+};
+
+/**
+ *	ata_tosh_init		-	attach generic IDE
+ *	@dev: PCI device found
+ *	@id: match entry
+ *
+ *	Called each time a matching IDE interface is found. We check if the
+ *	interface is one we wish to claim and if so we perform any chip
+ *	specific hacks then let the ATA layer do the heavy lifting.
+ */
+
+static int ata_tosh_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	static const struct ata_port_info info = {
+		.flags = ATA_FLAG_SLAVE_POSS,
+		.pio_mask = ATA_PIO5,
+		.mwdma_mask = ATA_MWDMA2,
+		.udma_mask = ATA_UDMA2,
+		.port_ops = &tosh_port_ops
+	};
+	const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
+	/* Just one port for the moment */
+	return ata_pci_sff_init_one(dev, ppi, &tosh_sht, NULL);
+}
+
+static struct pci_device_id ata_tosh[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3),  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5),  },
+	{ 0, },
+};
+
+static struct pci_driver ata_tosh_pci_driver = {
+	.name 		= DRV_NAME,
+	.id_table	= ata_tosh,
+	.probe 		= ata_tosh_init_one,
+	.remove		= ata_pci_remove_one,
+#ifdef CONFIG_PM
+	.suspend	= ata_pci_device_suspend,
+	.resume		= ata_pci_device_resume,
+#endif
+};
+
+static int __init ata_tosh_init(void)
+{
+	return pci_register_driver(&ata_tosh_pci_driver);
+}
+
+
+static void __exit ata_tosh_exit(void)
+{
+	pci_unregister_driver(&ata_tosh_pci_driver);
+}
+
+
+MODULE_AUTHOR("Alan Cox");
+MODULE_DESCRIPTION("Low level driver for Toshiba Piccolo ATA");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ata_tosh);
+MODULE_VERSION(DRV_VERSION);
+
+module_init(ata_tosh_init);
+module_exit(ata_tosh_exit);
+
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index 39d4e01f5c9c..a743e68a8903 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -162,9 +162,10 @@ static const struct pci_device_id generic_pci_tbl[] = {
 #ifdef CONFIG_BLK_DEV_IDE_SATA
 	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_8237_SATA),		 5 },
 #endif
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO),		 4 },
 	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_1),	 4 },
 	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),	 4 },
+	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_3),	 4 },
+	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_5),	 4 },
 	{ PCI_VDEVICE(NETCELL,	PCI_DEVICE_ID_REVOLUTION),		 6 },
 	/*
 	 * Must come last.  If you add entries adjust
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 84cf1f3b7838..9ca483be68d5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1496,9 +1496,10 @@
 #define PCI_DEVICE_ID_SBE_WANXL400	0x0104
 
 #define PCI_VENDOR_ID_TOSHIBA		0x1179
-#define PCI_DEVICE_ID_TOSHIBA_PICCOLO	0x0102
-#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_1	0x0103
-#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_2	0x0105
+#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_1	0x0101
+#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_2	0x0102
+#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_3	0x0103
+#define PCI_DEVICE_ID_TOSHIBA_PICCOLO_5	0x0105
 #define PCI_DEVICE_ID_TOSHIBA_TOPIC95	0x060a
 #define PCI_DEVICE_ID_TOSHIBA_TOPIC97	0x060f
 #define PCI_DEVICE_ID_TOSHIBA_TOPIC100	0x0617
-- 
cgit v1.2.3


From 95514fd8ff0f30de7815950edfd84ef1e19fb1c8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Wed, 25 Nov 2009 18:12:48 +0100
Subject: libata: add private driver field to struct ata_device

This brings struct ata_device in-line with struct ata_{port,host}.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index ba07e84c9840..a5b3dc71e819 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -595,6 +595,7 @@ struct ata_device {
 	unsigned int		horkage;	/* List of broken features */
 	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	struct scsi_device	*sdev;		/* attached SCSI device */
+	void			*private_data;
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
 	union acpi_object	*gtf_cache;
-- 
cgit v1.2.3


From 491deb24bf5bf7124141287aaf02c3219783ceab Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 Dec 2009 01:25:54 +0000
Subject: net 02/05: fib_rules: rename ifindex/ifname/FRA_IFNAME to
 iifindex/iifname/FRA_IIFNAME

commit 229e77eec406ad68662f18e49fda8b5d366768c5
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Dec 3 12:05:23 2009 +0100

    net: fib_rules: rename ifindex/ifname/FRA_IFNAME to iifindex/iifname/FRA_IIFNAME

    The next patch will add oif classification, rename interface related members
    and attributes to reflect that they're used for iif classification.

    Signed-off-by: Patrick McHardy <kaber@trash.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  6 ++++--
 include/net/fib_rules.h   |  6 +++---
 net/core/fib_rules.c      | 36 ++++++++++++++++++------------------
 3 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index c7e5b700bb91..7e11bb2fa655 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -8,7 +8,8 @@
 #define FIB_RULE_PERMANENT	0x00000001
 #define FIB_RULE_INVERT		0x00000002
 #define FIB_RULE_UNRESOLVED	0x00000004
-#define FIB_RULE_DEV_DETACHED	0x00000008
+#define FIB_RULE_IIF_DETACHED	0x00000008
+#define FIB_RULE_DEV_DETACHED	FIB_RULE_IIF_DETACHED
 
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
@@ -31,7 +32,8 @@ enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
 	FRA_SRC,	/* source address */
-	FRA_IFNAME,	/* interface name */
+	FRA_IIFNAME,	/* interface name */
+#define FRA_IFNAME	FRA_IIFNAME
 	FRA_GOTO,	/* target to jump to (FR_ACT_GOTO) */
 	FRA_UNUSED2,
 	FRA_PRIORITY,	/* priority/preference */
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 22fb323cd85e..62bebcb2a51c 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -10,7 +10,7 @@
 struct fib_rule {
 	struct list_head	list;
 	atomic_t		refcnt;
-	int			ifindex;
+	int			iifindex;
 	u32			mark;
 	u32			mark_mask;
 	u32			pref;
@@ -19,7 +19,7 @@ struct fib_rule {
 	u8			action;
 	u32			target;
 	struct fib_rule *	ctarget;
-	char			ifname[IFNAMSIZ];
+	char			iifname[IFNAMSIZ];
 	struct rcu_head		rcu;
 	struct net *		fr_net;
 };
@@ -67,7 +67,7 @@ struct fib_rules_ops {
 };
 
 #define FRA_GENERIC_POLICY \
-	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
+	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_PRIORITY]	= { .type = NLA_U32 }, \
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd309384f8b8..8e8028cdc87f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -135,7 +135,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
 	int ret = 0;
 
-	if (rule->ifindex && (rule->ifindex != fl->iif))
+	if (rule->iifindex && (rule->iifindex != fl->iif))
 		goto out;
 
 	if ((rule->mark ^ fl->mark) & rule->mark_mask)
@@ -248,14 +248,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (tb[FRA_PRIORITY])
 		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
 
-	if (tb[FRA_IFNAME]) {
+	if (tb[FRA_IIFNAME]) {
 		struct net_device *dev;
 
-		rule->ifindex = -1;
-		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
-		dev = __dev_get_by_name(net, rule->ifname);
+		rule->iifindex = -1;
+		nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->iifname);
 		if (dev)
-			rule->ifindex = dev->ifindex;
+			rule->iifindex = dev->ifindex;
 	}
 
 	if (tb[FRA_FWMARK]) {
@@ -388,8 +388,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
 			continue;
 
-		if (tb[FRA_IFNAME] &&
-		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+		if (tb[FRA_IIFNAME] &&
+		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
 			continue;
 
 		if (tb[FRA_FWMARK] &&
@@ -447,7 +447,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 					 struct fib_rule *rule)
 {
 	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
-			 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
 			 + nla_total_size(4) /* FRA_FWMARK */
@@ -481,11 +481,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
 
-	if (rule->ifname[0]) {
-		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+	if (rule->iifname[0]) {
+		NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname);
 
-		if (rule->ifindex == -1)
-			frh->flags |= FIB_RULE_DEV_DETACHED;
+		if (rule->iifindex == -1)
+			frh->flags |= FIB_RULE_IIF_DETACHED;
 	}
 
 	if (rule->pref)
@@ -600,9 +600,9 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
 	struct fib_rule *rule;
 
 	list_for_each_entry(rule, rules, list) {
-		if (rule->ifindex == -1 &&
-		    strcmp(dev->name, rule->ifname) == 0)
-			rule->ifindex = dev->ifindex;
+		if (rule->iifindex == -1 &&
+		    strcmp(dev->name, rule->iifname) == 0)
+			rule->iifindex = dev->ifindex;
 	}
 }
 
@@ -611,8 +611,8 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
 	struct fib_rule *rule;
 
 	list_for_each_entry(rule, rules, list)
-		if (rule->ifindex == dev->ifindex)
-			rule->ifindex = -1;
+		if (rule->iifindex == dev->ifindex)
+			rule->iifindex = -1;
 }
 
 
-- 
cgit v1.2.3


From 1b038a5e60c7812f19818e8a5df96d029e49c38f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 Dec 2009 01:25:56 +0000
Subject: net 03/05: fib_rules: add oif classification

commit 68144d350f4f6c348659c825cde6a82b34c27a91
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Dec 3 12:05:25 2009 +0100

    net: fib_rules: add oif classification

    Support routing table lookup based on the flow's oif. This is useful to
    classify packets originating from sockets bound to interfaces differently.

    The route cache already includes the oif and needs no changes.

    Signed-off-by: Patrick McHardy <kaber@trash.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  2 ++
 include/net/fib_rules.h   |  3 +++
 net/core/fib_rules.c      | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 7e11bb2fa655..51da65b68b85 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -10,6 +10,7 @@
 #define FIB_RULE_UNRESOLVED	0x00000004
 #define FIB_RULE_IIF_DETACHED	0x00000008
 #define FIB_RULE_DEV_DETACHED	FIB_RULE_IIF_DETACHED
+#define FIB_RULE_OIF_DETACHED	0x00000010
 
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
@@ -47,6 +48,7 @@ enum {
 	FRA_UNUSED8,
 	FRA_TABLE,	/* Extended table id */
 	FRA_FWMASK,	/* mask for netfilter mark */
+	FRA_OIFNAME,
 	__FRA_MAX
 };
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 62bebcb2a51c..d4e875a58f8b 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -11,6 +11,7 @@ struct fib_rule {
 	struct list_head	list;
 	atomic_t		refcnt;
 	int			iifindex;
+	int			oifindex;
 	u32			mark;
 	u32			mark_mask;
 	u32			pref;
@@ -20,6 +21,7 @@ struct fib_rule {
 	u32			target;
 	struct fib_rule *	ctarget;
 	char			iifname[IFNAMSIZ];
+	char			oifname[IFNAMSIZ];
 	struct rcu_head		rcu;
 	struct net *		fr_net;
 };
@@ -68,6 +70,7 @@ struct fib_rules_ops {
 
 #define FRA_GENERIC_POLICY \
 	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
+	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_PRIORITY]	= { .type = NLA_U32 }, \
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8e8028cdc87f..d1a70ad4b544 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -138,6 +138,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 	if (rule->iifindex && (rule->iifindex != fl->iif))
 		goto out;
 
+	if (rule->oifindex && (rule->oifindex != fl->oif))
+		goto out;
+
 	if ((rule->mark ^ fl->mark) & rule->mark_mask)
 		goto out;
 
@@ -258,6 +261,16 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 			rule->iifindex = dev->ifindex;
 	}
 
+	if (tb[FRA_OIFNAME]) {
+		struct net_device *dev;
+
+		rule->oifindex = -1;
+		nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->oifname);
+		if (dev)
+			rule->oifindex = dev->ifindex;
+	}
+
 	if (tb[FRA_FWMARK]) {
 		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
 		if (rule->mark)
@@ -392,6 +405,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
 			continue;
 
+		if (tb[FRA_OIFNAME] &&
+		    nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
+			continue;
+
 		if (tb[FRA_FWMARK] &&
 		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
 			continue;
@@ -448,6 +465,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 {
 	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
 			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
+			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
 			 + nla_total_size(4) /* FRA_FWMARK */
@@ -488,6 +506,13 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 			frh->flags |= FIB_RULE_IIF_DETACHED;
 	}
 
+	if (rule->oifname[0]) {
+		NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname);
+
+		if (rule->oifindex == -1)
+			frh->flags |= FIB_RULE_OIF_DETACHED;
+	}
+
 	if (rule->pref)
 		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
 
@@ -603,6 +628,9 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
 		if (rule->iifindex == -1 &&
 		    strcmp(dev->name, rule->iifname) == 0)
 			rule->iifindex = dev->ifindex;
+		if (rule->oifindex == -1 &&
+		    strcmp(dev->name, rule->oifname) == 0)
+			rule->oifindex = dev->ifindex;
 	}
 }
 
@@ -610,9 +638,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
 {
 	struct fib_rule *rule;
 
-	list_for_each_entry(rule, rules, list)
+	list_for_each_entry(rule, rules, list) {
 		if (rule->iifindex == dev->ifindex)
 			rule->iifindex = -1;
+		if (rule->oifindex == dev->ifindex)
+			rule->oifindex = -1;
+	}
 }
 
 
-- 
cgit v1.2.3


From 8153a10c08f1312af563bb92532002e46d3f504a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 Dec 2009 01:25:58 +0000
Subject: ipv4 05/05: add sysctl to accept packets with local source addresses

commit 8ec1e0ebe26087bfc5c0394ada5feb5758014fc8
Author: Patrick McHardy <kaber@trash.net>
Date:   Thu Dec 3 12:16:35 2009 +0100

    ipv4: add sysctl to accept packets with local source addresses

    Change fib_validate_source() to accept packets with a local source address when
    the "accept_local" sysctl is set for the incoming inet device. Combined with the
    previous patches, this allows to communicate between multiple local interfaces
    over the wire.

    Signed-off-by: Patrick McHardy <kaber@trash.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  6 ++++++
 include/linux/inetdevice.h             |  1 +
 include/linux/sysctl.h                 |  1 +
 kernel/sysctl_check.c                  |  1 +
 net/ipv4/devinet.c                     |  1 +
 net/ipv4/fib_frontend.c                | 11 +++++++----
 6 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 989f5538b8dd..006b39dec87d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -731,6 +731,12 @@ accept_source_route - BOOLEAN
 	default TRUE (router)
 		FALSE (host)
 
+accept_local - BOOLEAN
+	Accept packets with local source addresses. In combination with
+	suitable routing, this can be used to direct packets between two
+	local interfaces over the wire and have them accepted properly.
+	default FALSE
+
 rp_filter - INTEGER
 	0 - No source validation.
 	1 - Strict mode as defined in RFC3704 Strict Reverse Path
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index eecfa559bfb4..699e85c01a4d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -83,6 +83,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
+#define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
 #define IN_DEV_BOOTP_RELAY(in_dev)	IN_DEV_ANDCONF((in_dev), BOOTP_RELAY)
 
 #define IN_DEV_LOG_MARTIANS(in_dev)	IN_DEV_ORCONF((in_dev), LOG_MARTIANS)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1e4743ee6831..9f047d73a216 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -490,6 +490,7 @@ enum
 	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
+	NET_IPV4_CONF_ACCEPT_LOCAL=23,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index b6e7aaea4604..f1d676e4b368 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -220,6 +220,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
 	{ NET_IPV4_CONF_PROMOTE_SECONDARIES,	"promote_secondaries" },
 	{ NET_IPV4_CONF_ARP_ACCEPT,		"arp_accept" },
 	{ NET_IPV4_CONF_ARP_NOTIFY,		"arp_notify" },
+	{ NET_IPV4_CONF_ACCEPT_LOCAL,		"accept_local" },
 	{}
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c100709d6ddf..e3126612fcbb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1468,6 +1468,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
+		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3b373a8b0473..3323168ee52d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -241,16 +241,17 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			    .iif = oif };
 
 	struct fib_result res;
-	int no_addr, rpf;
+	int no_addr, rpf, accept_local;
 	int ret;
 	struct net *net;
 
-	no_addr = rpf = 0;
+	no_addr = rpf = accept_local = 0;
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
+		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
 	}
 	rcu_read_unlock();
 
@@ -260,8 +261,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	net = dev_net(dev);
 	if (fib_lookup(net, &fl, &res))
 		goto last_resort;
-	if (res.type != RTN_UNICAST)
-		goto e_inval_res;
+	if (res.type != RTN_UNICAST) {
+		if (res.type != RTN_LOCAL || !accept_local)
+			goto e_inval_res;
+	}
 	*spec_dst = FIB_RES_PREFSRC(res);
 	fib_combine_itag(itag, &res);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-- 
cgit v1.2.3


From a01878aac57eac6eb4bf194788ab2cc440490d0f Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFS: reorder nfs4_sequence_regs to remove 8 bytes of padding on 64
 bits

reorder nfs4_sequence_args to remove 8 bytes of padding on 64 bit
builds.

The size of this structure drops to 24 bytes from 32 and reduces the
text size of nfs.ko.
On my x86_64 size reports

		text       data     bss
2.6.32-rc5 	200996	   8512	    432	 209940	  33414	nfs.ko
+patch 		200884	   8512	    432	 209828	  333a4	nfs.ko


Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 62f63fb0c4c8..8c880372536e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -170,8 +170,8 @@ struct nfs4_sequence_args {
 struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
 	u8			sr_slotid;	/* slot used to send request */
-	unsigned long		sr_renewal_time;
 	int			sr_status;	/* sequence operation status */
+	unsigned long		sr_renewal_time;
 };
 
 struct nfs4_get_lease_time_args {
-- 
cgit v1.2.3


From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Allow RPCs to fail quickly if the server is unreachable

The kernel sometimes makes RPC calls to services that aren't running.
Because the kernel's RPC client always assumes the hard retry semantic
when reconnecting a connection-oriented RPC transport, the underlying
reconnect logic takes a long while to time out, even though the remote
may have responded immediately with ECONNREFUSED.

In certain cases, like upcalls to our local rpcbind daemon, or for NFS
mount requests, we'd like the kernel to fail immediately if the remote
service isn't reachable.  This allows another transport to be tried
immediately, or the pending request can be abandoned quickly.

Introduce a per-request flag which controls how call_transmit_status()
behaves when request transmission fails because the server cannot be
reached.

We don't want soft connection semantics to apply to other errors.  The
default case of the switch statement in call_transmit_status() no
longer falls through; the fall through code is copied to the default
case, and a "break;" is added.

The transport's connection re-establishment timeout is also ignored for
such requests.  We want the request to fail immediately, so the
reconnect delay is skipped.  Additionally, we don't want a connect
failure here to further increase the reconnect timeout value, since
this request will not be retried.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  2 ++
 net/sunrpc/clnt.c            | 11 +++++++++--
 net/sunrpc/xprtsock.c        |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 401097781fc0..1906782ec86b 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -130,12 +130,14 @@ struct rpc_task_setup {
 #define RPC_TASK_DYNAMIC	0x0080		/* task was kmalloc'ed */
 #define RPC_TASK_KILLED		0x0100		/* task was killed */
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
+#define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
+#define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
 
 #define RPC_TASK_RUNNING	0
 #define RPC_TASK_QUEUED		1
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7bcd931e06ee..68a23583f44c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task)
 	default:
 		dprint_status(task);
 		xprt_end_transmit(task);
+		rpc_task_force_reencode(task);
+		break;
 		/*
 		 * Special cases: if we've been waiting on the
 		 * socket's write_space() callback, or if the
@@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task)
 		 * then hold onto the transport lock.
 		 */
 	case -ECONNREFUSED:
-	case -ECONNRESET:
-	case -ENOTCONN:
 	case -EHOSTDOWN:
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
+		if (RPC_IS_SOFTCONN(task)) {
+			xprt_end_transmit(task);
+			rpc_exit(task, task->tk_status);
+			break;
+		}
+	case -ECONNRESET:
+	case -ENOTCONN:
 	case -EPIPE:
 		rpc_task_force_reencode(task);
 	}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 37c5475ba258..ff312f8b018d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task)
 	if (xprt_test_and_set_connecting(xprt))
 		return;
 
-	if (transport->sock != NULL) {
+	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
 				"seconds\n",
 				xprt, xprt->reestablish_timeout / HZ);
-- 
cgit v1.2.3


From 141518c95870228da4e050fbe31a8f0c9df82c72 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Thu, 3 Dec 2009 08:36:22 +0000
Subject: tg3: Add some VPD preprocessor constants

This patch cleans up the VPD code by creating preprocessor definitions
and using them in the place of hardcoded constants.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 15 ++++++++-------
 drivers/net/tg3.h       | 10 ++++++++--
 include/linux/ethtool.h |  3 ++-
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 36f2e1b8cbb1..2576effca845 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12433,7 +12433,7 @@ skip_phy_reset:
 
 static void __devinit tg3_read_partno(struct tg3 *tp)
 {
-	unsigned char vpd_data[256];   /* in little-endian format */
+	unsigned char vpd_data[TG3_NVM_VPD_LEN];   /* in little-endian format */
 	unsigned int i;
 	u32 magic;
 
@@ -12442,14 +12442,14 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 		goto out_not_found;
 
 	if (magic == TG3_EEPROM_MAGIC) {
-		for (i = 0; i < 256; i += 4) {
+		for (i = 0; i < TG3_NVM_VPD_LEN; i += 4) {
 			u32 tmp;
 
 			/* The data is in little-endian format in NVRAM.
 			 * Use the big-endian read routines to preserve
 			 * the byte order as it exists in NVRAM.
 			 */
-			if (tg3_nvram_read_be32(tp, 0x100 + i, &tmp))
+			if (tg3_nvram_read_be32(tp, TG3_NVM_VPD_OFF + i, &tmp))
 				goto out_not_found;
 
 			memcpy(&vpd_data[i], &tmp, sizeof(tmp));
@@ -12458,7 +12458,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 		int vpd_cap;
 
 		vpd_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_VPD);
-		for (i = 0; i < 256; i += 4) {
+		for (i = 0; i < TG3_NVM_VPD_LEN; i += 4) {
 			u32 tmp, j = 0;
 			__le32 v;
 			u16 tmp16;
@@ -12483,7 +12483,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 	}
 
 	/* Now parse and find the part number. */
-	for (i = 0; i < 254; ) {
+	for (i = 0; i < TG3_NVM_VPD_LEN - 2; ) {
 		unsigned char val = vpd_data[i];
 		unsigned int block_end;
 
@@ -12502,7 +12502,7 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 			      (vpd_data[i + 2] << 8)));
 		i += 3;
 
-		if (block_end > 256)
+		if (block_end > TG3_NVM_VPD_LEN)
 			goto out_not_found;
 
 		while (i < (block_end - 2)) {
@@ -12511,7 +12511,8 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 				int partno_len = vpd_data[i + 2];
 
 				i += 3;
-				if (partno_len > 24 || (partno_len + i) > 256)
+				if (partno_len > TG3_BPN_SIZE ||
+				    (partno_len + i) > TG3_NVM_VPD_LEN)
 					goto out_not_found;
 
 				memcpy(tp->board_part_number,
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 6a2c31071caf..cd30889650f8 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1821,6 +1821,11 @@
 
 #define TG3_OTP_DEFAULT			0x286c1640
 
+
+/* Hardware Legacy NVRAM layout */
+#define TG3_NVM_VPD_OFF			0x100
+#define TG3_NVM_VPD_LEN			256
+
 /* Hardware Selfboot NVRAM layout */
 #define TG3_NVM_HWSB_CFG1		0x00000004
 #define  TG3_NVM_HWSB_CFG1_MAJMSK	0xf8000000
@@ -2893,8 +2898,9 @@ struct tg3 {
 	u32				led_ctrl;
 	u32				phy_otp;
 
-	char				board_part_number[24];
-#define TG3_VER_SIZE 32
+#define TG3_BPN_SIZE			24
+	char				board_part_number[TG3_BPN_SIZE];
+#define TG3_VER_SIZE			ETHTOOL_FWVERS_LEN
 	char				fw_ver[TG3_VER_SIZE];
 	u32				nic_sram_data_cfg;
 	u32				pci_clock_ctrl;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index bcaa0e0a54d3..ef4a2d84d922 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -49,13 +49,14 @@ static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep)
 	return (ep->speed_hi << 16) | ep->speed;
 }
 
+#define ETHTOOL_FWVERS_LEN	32
 #define ETHTOOL_BUSINFO_LEN	32
 /* these strings are set to whatever the driver author decides... */
 struct ethtool_drvinfo {
 	__u32	cmd;
 	char	driver[32];	/* driver short name, "tulip", "eepro100" */
 	char	version[32];	/* driver version string */
-	char	fw_version[32];	/* firmware version string, if applicable */
+	char	fw_version[ETHTOOL_FWVERS_LEN];	/* firmware version string */
 	char	bus_info[ETHTOOL_BUSINFO_LEN];	/* Bus info for this IF. */
 				/* For PCI devices, use pci_name(pci_dev). */
 	char	reserved1[32];
-- 
cgit v1.2.3


From e78db4dfb1355a895f7ea50133b702b55b8ed184 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Thu, 26 Nov 2009 22:46:03 -0500
Subject: libata: Report zeroed read after TRIM and max discard size

Our current TRIM payload is a single sector that can accommodate 64 *
65535 blocks being unmapped.  Report this value in the Block Limits
Maximum Unmap LBA count field.

If a storage device supports TRIM and the DRAT and RZAT bits are set,
report TPRZ=1 in Read Capacity(16).

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c | 12 +++++++++---
 include/linux/ata.h       | 11 +++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 340a616d226b..e1e186b9dfcc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2115,8 +2115,10 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 	 * that we support some form of unmap - in thise case via WRITE SAME
 	 * with the unmap bit set.
 	 */
-	if (ata_id_has_trim(args->id))
+	if (ata_id_has_trim(args->id)) {
+		put_unaligned_be32(65535 * 512 / 8, &rbuf[20]);
 		put_unaligned_be32(1, &rbuf[28]);
+	}
 
 	return 0;
 }
@@ -2411,8 +2413,12 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 		rbuf[14] = (lowest_aligned >> 8) & 0x3f;
 		rbuf[15] = lowest_aligned;
 
-		if (ata_id_has_trim(args->id))
-			rbuf[14] |= 0x80;
+		if (ata_id_has_trim(args->id)) {
+			rbuf[14] |= 0x80; /* TPE */
+
+			if (ata_id_has_zero_after_trim(args->id))
+				rbuf[14] |= 0x40; /* TPRZ */
+		}
 	}
 
 	return 0;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index e2595e877e44..dfa2298bf488 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -75,6 +75,7 @@ enum {
 	ATA_ID_EIDE_DMA_TIME	= 66,
 	ATA_ID_EIDE_PIO		= 67,
 	ATA_ID_EIDE_PIO_IORDY	= 68,
+	ATA_ID_ADDITIONAL_SUPP	= 69,
 	ATA_ID_QUEUE_DEPTH	= 75,
 	ATA_ID_MAJOR_VER	= 80,
 	ATA_ID_COMMAND_SET_1	= 82,
@@ -816,6 +817,16 @@ static inline int ata_id_has_trim(const u16 *id)
 	return 0;
 }
 
+static inline int ata_id_has_zero_after_trim(const u16 *id)
+{
+	/* DSM supported, deterministic read, and read zero after trim set */
+	if (ata_id_has_trim(id) &&
+	    (id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020)
+		return 1;
+
+	return 0;
+}
+
 static inline int ata_id_current_chs_valid(const u16 *id)
 {
 	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
-- 
cgit v1.2.3


From d0634c4aea0b80447cbdc4c0db285004b860c455 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Thu, 26 Nov 2009 12:00:43 -0500
Subject: libata: Clarify ata_set_lba_range_entries function

ata_set_lba_range_entries used the variable max for two different things
which was confusing.  Make the function take a buffer size in bytes as
argument and return the used buffer size upon completion.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c |  2 +-
 include/linux/ata.h       | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e1e186b9dfcc..62e6b9ea96af 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2972,7 +2972,7 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 
 	buf = page_address(sg_page(scsi_sglist(scmd)));
-	size = ata_set_lba_range_entries(buf, 512 / 8, block, n_block);
+	size = ata_set_lba_range_entries(buf, 512, block, n_block);
 
 	tf->protocol = ATA_PROT_DMA;
 	tf->hob_feature = 0;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index dfa2298bf488..38a6948ce0c2 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -982,17 +982,17 @@ static inline void ata_id_to_hd_driveid(u16 *id)
 }
 
 /*
- * Write up to 'max' LBA Range Entries to the buffer that will cover the
- * extent from sector to sector + count.  This is used for TRIM and for
- * ADD LBA(S) TO NV CACHE PINNED SET.
+ * Write LBA Range Entries to the buffer that will cover the extent from
+ * sector to sector + count.  This is used for TRIM and for ADD LBA(S)
+ * TO NV CACHE PINNED SET.
  */
-static inline unsigned ata_set_lba_range_entries(void *_buffer, unsigned max,
-						u64 sector, unsigned long count)
+static inline unsigned ata_set_lba_range_entries(void *_buffer,
+		unsigned buf_size, u64 sector, unsigned long count)
 {
 	__le64 *buffer = _buffer;
-	unsigned i = 0;
+	unsigned i = 0, used_bytes;
 
-	while (i < max) {
+	while (i < buf_size / 8 ) { /* 6-byte LBA + 2-byte range per entry */
 		u64 entry = sector |
 			((u64)(count > 0xffff ? 0xffff : count) << 48);
 		buffer[i++] = __cpu_to_le64(entry);
@@ -1002,9 +1002,9 @@ static inline unsigned ata_set_lba_range_entries(void *_buffer, unsigned max,
 		sector += 0xffff;
 	}
 
-	max = ALIGN(i * 8, 512);
-	memset(buffer + i, 0, max - i * 8);
-	return max;
+	used_bytes = ALIGN(i * 8, 512);
+	memset(buffer + i, 0, used_bytes - i * 8);
+	return used_bytes;
 }
 
 static inline int is_multi_taskfile(struct ata_taskfile *tf)
-- 
cgit v1.2.3


From 69ee472f2706371ca639de49b06df91615c07d8d Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 3 Dec 2009 15:31:18 -0800
Subject: usbnet & cdc-ether: Autosuspend for online devices

Using remote wakeup and delayed transmission to allow
online device to go into usb autosuspend.
Minimal alternate support for devices that don't support
remote wakeup.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c |   8 +++
 drivers/net/usb/usbnet.c    | 157 ++++++++++++++++++++++++++++++++++++--------
 include/linux/usb/usbnet.h  |   6 ++
 3 files changed, 142 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 197912d9c04a..21e183a83b99 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -411,6 +411,12 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	return 0;
 }
 
+static int cdc_manage_power(struct usbnet *dev, int on)
+{
+	dev->intf->needs_remote_wakeup = on;
+	return 0;
+}
+
 static const struct driver_info	cdc_info = {
 	.description =	"CDC Ethernet Device",
 	.flags =	FLAG_ETHER | FLAG_LINK_INTR,
@@ -418,6 +424,7 @@ static const struct driver_info	cdc_info = {
 	.bind =		cdc_bind,
 	.unbind =	usbnet_cdc_unbind,
 	.status =	cdc_status,
+	.manage_power =	cdc_manage_power,
 };
 
 static const struct driver_info mbm_info = {
@@ -619,6 +626,7 @@ static struct usb_driver cdc_driver = {
 	.suspend =	usbnet_suspend,
 	.resume =	usbnet_resume,
 	.reset_resume =	usbnet_resume,
+	.supports_autosuspend = 1,
 };
 
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 511e7bdc4573..035fab04c0a0 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -353,7 +353,8 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 
 	if (netif_running (dev->net) &&
 	    netif_device_present (dev->net) &&
-	    !test_bit (EVENT_RX_HALT, &dev->flags)) {
+	    !test_bit (EVENT_RX_HALT, &dev->flags) &&
+	    !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) {
 		switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) {
 		case -EPIPE:
 			usbnet_defer_kevent (dev, EVENT_RX_HALT);
@@ -611,15 +612,39 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);
 /*-------------------------------------------------------------------------*/
 
 // precondition: never called in_interrupt
+static void usbnet_terminate_urbs(struct usbnet *dev)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(unlink_wakeup);
+	DECLARE_WAITQUEUE(wait, current);
+	int temp;
+
+	/* ensure there are no more active urbs */
+	add_wait_queue(&unlink_wakeup, &wait);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	dev->wait = &unlink_wakeup;
+	temp = unlink_urbs(dev, &dev->txq) +
+		unlink_urbs(dev, &dev->rxq);
+
+	/* maybe wait for deletions to finish. */
+	while (!skb_queue_empty(&dev->rxq)
+		&& !skb_queue_empty(&dev->txq)
+		&& !skb_queue_empty(&dev->done)) {
+			schedule_timeout(UNLINK_TIMEOUT_MS);
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (netif_msg_ifdown(dev))
+				devdbg(dev, "waited for %d urb completions",
+					temp);
+	}
+	set_current_state(TASK_RUNNING);
+	dev->wait = NULL;
+	remove_wait_queue(&unlink_wakeup, &wait);
+}
 
 int usbnet_stop (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	struct driver_info	*info = dev->driver_info;
-	int			temp;
 	int			retval;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK (unlink_wakeup);
-	DECLARE_WAITQUEUE (wait, current);
 
 	netif_stop_queue (net);
 
@@ -641,25 +666,8 @@ int usbnet_stop (struct net_device *net)
 				info->description);
 	}
 
-	if (!(info->flags & FLAG_AVOID_UNLINK_URBS)) {
-		/* ensure there are no more active urbs */
-		add_wait_queue(&unlink_wakeup, &wait);
-		dev->wait = &unlink_wakeup;
-		temp = unlink_urbs(dev, &dev->txq) +
-			unlink_urbs(dev, &dev->rxq);
-
-		/* maybe wait for deletions to finish. */
-		while (!skb_queue_empty(&dev->rxq) &&
-		       !skb_queue_empty(&dev->txq) &&
-		       !skb_queue_empty(&dev->done)) {
-			msleep(UNLINK_TIMEOUT_MS);
-			if (netif_msg_ifdown(dev))
-				devdbg(dev, "waited for %d urb completions",
-					temp);
-		}
-		dev->wait = NULL;
-		remove_wait_queue(&unlink_wakeup, &wait);
-	}
+	if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
+		usbnet_terminate_urbs(dev);
 
 	usb_kill_urb(dev->interrupt);
 
@@ -672,7 +680,10 @@ int usbnet_stop (struct net_device *net)
 	dev->flags = 0;
 	del_timer_sync (&dev->delay);
 	tasklet_kill (&dev->bh);
-	usb_autopm_put_interface(dev->intf);
+	if (info->manage_power)
+		info->manage_power(dev, 0);
+	else
+		usb_autopm_put_interface(dev->intf);
 
 	return 0;
 }
@@ -753,6 +764,12 @@ int usbnet_open (struct net_device *net)
 
 	// delay posting reads until we're fully open
 	tasklet_schedule (&dev->bh);
+	if (info->manage_power) {
+		retval = info->manage_power(dev, 1);
+		if (retval < 0)
+			goto done;
+		usb_autopm_put_interface(dev->intf);
+	}
 	return retval;
 done:
 	usb_autopm_put_interface(dev->intf);
@@ -881,11 +898,16 @@ kevent (struct work_struct *work)
 	/* usb_clear_halt() needs a thread context */
 	if (test_bit (EVENT_TX_HALT, &dev->flags)) {
 		unlink_urbs (dev, &dev->txq);
+		status = usb_autopm_get_interface(dev->intf);
+		if (status < 0)
+			goto fail_pipe;
 		status = usb_clear_halt (dev->udev, dev->out);
+		usb_autopm_put_interface(dev->intf);
 		if (status < 0 &&
 		    status != -EPIPE &&
 		    status != -ESHUTDOWN) {
 			if (netif_msg_tx_err (dev))
+fail_pipe:
 				deverr (dev, "can't clear tx halt, status %d",
 					status);
 		} else {
@@ -896,11 +918,16 @@ kevent (struct work_struct *work)
 	}
 	if (test_bit (EVENT_RX_HALT, &dev->flags)) {
 		unlink_urbs (dev, &dev->rxq);
+		status = usb_autopm_get_interface(dev->intf);
+		if (status < 0)
+			goto fail_halt;
 		status = usb_clear_halt (dev->udev, dev->in);
+		usb_autopm_put_interface(dev->intf);
 		if (status < 0 &&
 		    status != -EPIPE &&
 		    status != -ESHUTDOWN) {
 			if (netif_msg_rx_err (dev))
+fail_halt:
 				deverr (dev, "can't clear rx halt, status %d",
 					status);
 		} else {
@@ -919,7 +946,12 @@ kevent (struct work_struct *work)
 			clear_bit (EVENT_RX_MEMORY, &dev->flags);
 		if (urb != NULL) {
 			clear_bit (EVENT_RX_MEMORY, &dev->flags);
+			status = usb_autopm_get_interface(dev->intf);
+			if (status < 0)
+				goto fail_lowmem;
 			rx_submit (dev, urb, GFP_KERNEL);
+			usb_autopm_put_interface(dev->intf);
+fail_lowmem:
 			tasklet_schedule (&dev->bh);
 		}
 	}
@@ -929,11 +961,18 @@ kevent (struct work_struct *work)
 		int			retval = 0;
 
 		clear_bit (EVENT_LINK_RESET, &dev->flags);
+		status = usb_autopm_get_interface(dev->intf);
+		if (status < 0)
+			goto skip_reset;
 		if(info->link_reset && (retval = info->link_reset(dev)) < 0) {
+			usb_autopm_put_interface(dev->intf);
+skip_reset:
 			devinfo(dev, "link reset failed (%d) usbnet usb-%s-%s, %s",
 				retval,
 				dev->udev->bus->bus_name, dev->udev->devpath,
 				info->description);
+		} else {
+			usb_autopm_put_interface(dev->intf);
 		}
 	}
 
@@ -971,6 +1010,7 @@ static void tx_complete (struct urb *urb)
 		case -EPROTO:
 		case -ETIME:
 		case -EILSEQ:
+			usb_mark_last_busy(dev->udev);
 			if (!timer_pending (&dev->delay)) {
 				mod_timer (&dev->delay,
 					jiffies + THROTTLE_JIFFIES);
@@ -987,6 +1027,7 @@ static void tx_complete (struct urb *urb)
 		}
 	}
 
+	usb_autopm_put_interface_async(dev->intf);
 	urb->dev = NULL;
 	entry->state = tx_done;
 	defer_bh(dev, skb, &dev->txq);
@@ -1057,14 +1098,34 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		}
 	}
 
-	spin_lock_irqsave (&dev->txq.lock, flags);
+	spin_lock_irqsave(&dev->txq.lock, flags);
+	retval = usb_autopm_get_interface_async(dev->intf);
+	if (retval < 0) {
+		spin_unlock_irqrestore(&dev->txq.lock, flags);
+		goto drop;
+	}
+
+#ifdef CONFIG_PM
+	/* if this triggers the device is still a sleep */
+	if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) {
+		/* transmission will be done in resume */
+		usb_anchor_urb(urb, &dev->deferred);
+		/* no use to process more packets */
+		netif_stop_queue(net);
+		spin_unlock_irqrestore(&dev->txq.lock, flags);
+		devdbg(dev, "Delaying transmission for resumption");
+		goto deferred;
+	}
+#endif
 
 	switch ((retval = usb_submit_urb (urb, GFP_ATOMIC))) {
 	case -EPIPE:
 		netif_stop_queue (net);
 		usbnet_defer_kevent (dev, EVENT_TX_HALT);
+		usb_autopm_put_interface_async(dev->intf);
 		break;
 	default:
+		usb_autopm_put_interface_async(dev->intf);
 		if (netif_msg_tx_err (dev))
 			devdbg (dev, "tx: submit urb err %d", retval);
 		break;
@@ -1088,6 +1149,9 @@ drop:
 		devdbg (dev, "> tx, len %d, type 0x%x",
 			length, skb->protocol);
 	}
+#ifdef CONFIG_PM
+deferred:
+#endif
 	return NETDEV_TX_OK;
 }
 EXPORT_SYMBOL_GPL(usbnet_start_xmit);
@@ -1263,6 +1327,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	dev->bh.func = usbnet_bh;
 	dev->bh.data = (unsigned long) dev;
 	INIT_WORK (&dev->kevent, kevent);
+	init_usb_anchor(&dev->deferred);
 	dev->delay.function = usbnet_bh;
 	dev->delay.data = (unsigned long) dev;
 	init_timer (&dev->delay);
@@ -1382,13 +1447,23 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message)
 	struct usbnet		*dev = usb_get_intfdata(intf);
 
 	if (!dev->suspend_count++) {
+		spin_lock_irq(&dev->txq.lock);
+		/* don't autosuspend while transmitting */
+		if (dev->txq.qlen && (message.event & PM_EVENT_AUTO)) {
+			spin_unlock_irq(&dev->txq.lock);
+			return -EBUSY;
+		} else {
+			set_bit(EVENT_DEV_ASLEEP, &dev->flags);
+			spin_unlock_irq(&dev->txq.lock);
+		}
 		/*
 		 * accelerate emptying of the rx and queues, to avoid
 		 * having everything error out.
 		 */
 		netif_device_detach (dev->net);
-		(void) unlink_urbs (dev, &dev->rxq);
-		(void) unlink_urbs (dev, &dev->txq);
+		usbnet_terminate_urbs(dev);
+		usb_kill_urb(dev->interrupt);
+
 		/*
 		 * reattach so runtime management can use and
 		 * wake the device
@@ -1402,10 +1477,34 @@ EXPORT_SYMBOL_GPL(usbnet_suspend);
 int usbnet_resume (struct usb_interface *intf)
 {
 	struct usbnet		*dev = usb_get_intfdata(intf);
+	struct sk_buff          *skb;
+	struct urb              *res;
+	int                     retval;
+
+	if (!--dev->suspend_count) {
+		spin_lock_irq(&dev->txq.lock);
+		while ((res = usb_get_from_anchor(&dev->deferred))) {
+
+			printk(KERN_INFO"%s has delayed data\n", __func__);
+			skb = (struct sk_buff *)res->context;
+			retval = usb_submit_urb(res, GFP_ATOMIC);
+			if (retval < 0) {
+				dev_kfree_skb_any(skb);
+				usb_free_urb(res);
+				usb_autopm_put_interface_async(dev->intf);
+			} else {
+				dev->net->trans_start = jiffies;
+				__skb_queue_tail(&dev->txq, skb);
+			}
+		}
 
-	if (!--dev->suspend_count)
+		smp_mb();
+		clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
+		spin_unlock_irq(&dev->txq.lock);
+		if (!(dev->txq.qlen >= TX_QLEN(dev)))
+			netif_start_queue(dev->net);
 		tasklet_schedule (&dev->bh);
-
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usbnet_resume);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 8c84881f8478..8ce61359bf73 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -55,6 +55,7 @@ struct usbnet {
 	struct sk_buff_head	done;
 	struct sk_buff_head	rxq_pause;
 	struct urb		*interrupt;
+	struct usb_anchor	deferred;
 	struct tasklet_struct	bh;
 
 	struct work_struct	kevent;
@@ -65,6 +66,8 @@ struct usbnet {
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
 #		define EVENT_RX_PAUSED	5
+#		define EVENT_DEV_WAKING 6
+#		define EVENT_DEV_ASLEEP 7
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -109,6 +112,9 @@ struct driver_info {
 	/* see if peer is connected ... can sleep */
 	int	(*check_connect)(struct usbnet *);
 
+	/* (dis)activate runtime power management */
+	int	(*manage_power)(struct usbnet *, int);
+
 	/* for status polling */
 	void	(*status)(struct usbnet *, struct urb *);
 
-- 
cgit v1.2.3


From fc4a7489663250360cd40d5adf06a08d1c5d54df Mon Sep 17 00:00:00 2001
From: Patrick Mullaney <pmullaney@novell.com>
Date: Thu, 3 Dec 2009 15:59:22 -0800
Subject: netdevice: provide common routine for macvlan and vlan operstate
 management

Provide common routine for the transition of operational state for a leaf
device during a root device transition.

Signed-off-by: Patrick Mullaney <pmullaney@novell.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 24 +++---------------------
 include/linux/netdevice.h |  3 +++
 net/8021q/vlan.c          | 29 ++++-------------------------
 net/core/dev.c            | 27 +++++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 93c3e6edf702..21a9c9ab4b34 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -582,25 +582,6 @@ static void macvlan_port_destroy(struct net_device *dev)
 	kfree(port);
 }
 
-static void macvlan_transfer_operstate(struct net_device *dev)
-{
-	struct macvlan_dev *vlan = netdev_priv(dev);
-	const struct net_device *lowerdev = vlan->lowerdev;
-
-	if (lowerdev->operstate == IF_OPER_DORMANT)
-		netif_dormant_on(dev);
-	else
-		netif_dormant_off(dev);
-
-	if (netif_carrier_ok(lowerdev)) {
-		if (!netif_carrier_ok(dev))
-			netif_carrier_on(dev);
-	} else {
-		if (netif_carrier_ok(dev))
-			netif_carrier_off(dev);
-	}
-}
-
 static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
@@ -693,7 +674,7 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 		return err;
 
 	list_add_tail(&vlan->list, &port->vlans);
-	macvlan_transfer_operstate(dev);
+	netif_stacked_transfer_operstate(lowerdev, dev);
 	return 0;
 }
 
@@ -768,7 +749,8 @@ static int macvlan_device_event(struct notifier_block *unused,
 	switch (event) {
 	case NETDEV_CHANGE:
 		list_for_each_entry(vlan, &port->vlans, list)
-			macvlan_transfer_operstate(vlan->dev);
+			netif_stacked_transfer_operstate(vlan->lowerdev,
+							 vlan->dev);
 		break;
 	case NETDEV_FEAT_CHANGE:
 		list_for_each_entry(vlan, &port->vlans, list) {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index daf13d367498..a3fccc85b1a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1981,6 +1981,9 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
 					unsigned long mask);
 unsigned long netdev_fix_features(unsigned long features, const char *name);
 
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+					struct net_device *dev);
+
 static inline int net_gso_ok(int features, int gso_type)
 {
 	int feature = gso_type << NETIF_F_GSO_SHIFT;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ec3769295dac..33f90e7362cc 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -184,27 +184,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	dev_put(real_dev);
 }
 
-static void vlan_transfer_operstate(const struct net_device *dev,
-				    struct net_device *vlandev)
-{
-	/* Have to respect userspace enforced dormant state
-	 * of real device, also must allow supplicant running
-	 * on VLAN device
-	 */
-	if (dev->operstate == IF_OPER_DORMANT)
-		netif_dormant_on(vlandev);
-	else
-		netif_dormant_off(vlandev);
-
-	if (netif_carrier_ok(dev)) {
-		if (!netif_carrier_ok(vlandev))
-			netif_carrier_on(vlandev);
-	} else {
-		if (netif_carrier_ok(vlandev))
-			netif_carrier_off(vlandev);
-	}
-}
-
 int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 {
 	const char *name = real_dev->name;
@@ -262,7 +241,7 @@ int register_vlan_dev(struct net_device *dev)
 	/* Account for reference in struct vlan_dev_info */
 	dev_hold(real_dev);
 
-	vlan_transfer_operstate(real_dev, dev);
+	netif_stacked_transfer_operstate(real_dev, dev);
 	linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
 
 	/* So, got the sucker initialized, now lets place
@@ -453,7 +432,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!vlandev)
 				continue;
 
-			vlan_transfer_operstate(dev, vlandev);
+			netif_stacked_transfer_operstate(dev, vlandev);
 		}
 		break;
 
@@ -511,7 +490,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			vlan = vlan_dev_info(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs & ~IFF_UP);
-			vlan_transfer_operstate(dev, vlandev);
+			netif_stacked_transfer_operstate(dev, vlandev);
 		}
 		break;
 
@@ -529,7 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			vlan = vlan_dev_info(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs | IFF_UP);
-			vlan_transfer_operstate(dev, vlandev);
+			netif_stacked_transfer_operstate(dev, vlandev);
 		}
 		break;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 0913a08a87d6..c36a17aafcf3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4900,6 +4900,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
+/**
+ *	netif_stacked_transfer_operstate -	transfer operstate
+ *	@rootdev: the root or lower level device to transfer state from
+ *	@dev: the device to transfer operstate to
+ *
+ *	Transfer operational state from root to device. This is normally
+ *	called when a stacking relationship exists between the root
+ *	device and the device(a leaf device).
+ */
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+					struct net_device *dev)
+{
+	if (rootdev->operstate == IF_OPER_DORMANT)
+		netif_dormant_on(dev);
+	else
+		netif_dormant_off(dev);
+
+	if (netif_carrier_ok(rootdev)) {
+		if (!netif_carrier_ok(dev))
+			netif_carrier_on(dev);
+	} else {
+		if (netif_carrier_ok(dev))
+			netif_carrier_off(dev);
+	}
+}
+EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
-- 
cgit v1.2.3


From 7164bb4393cef668d3da281fa1c599a6673ea768 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 3 Dec 2009 10:31:56 -0500
Subject: fb-defio: If FBINFO_VIRTFB is defined, do not set VM_IO flag.

Most users (except sh_mobile_lcdcfb.c) get their framebuffer from
vmalloc. Setting VM_IO is not necessary as the memory obtained
from vmalloc is System RAM type and is not susceptible to PCI memory
constraints.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jaya Kumar <jayakumar.lkml@gmail.com>
---
 drivers/video/fb_defio.c | 4 +++-
 include/linux/fb.h       | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 0a7a6679ee6e..875d019bb2a4 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -144,7 +144,9 @@ static const struct address_space_operations fb_deferred_io_aops = {
 static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	vma->vm_ops = &fb_deferred_io_vm_ops;
-	vma->vm_flags |= ( VM_IO | VM_RESERVED | VM_DONTEXPAND );
+	vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND );
+	if (!(info->flags & FBINFO_VIRTFB))
+		vma->vm_flags |= VM_IO;
 	vma->vm_private_data = info;
 	return 0;
 }
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f847df9e99b6..6e8ebf7e6523 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -768,6 +768,7 @@ struct fb_tile_ops {
 	 *  takes over; acceleration engine should be in a quiescent state */
 
 /* hints */
+#define FBINFO_VIRTFB		0x0004 /* FB is System RAM, not device. */
 #define FBINFO_PARTIAL_PAN_OK	0x0040 /* otw use pan only for double-buffering */
 #define FBINFO_READS_FAST	0x0080 /* soft-copy faster than rendering */
 
-- 
cgit v1.2.3


From 8a74770353d540d9b7641d22ea0401966f14cf2a Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Fri, 30 Oct 2009 18:03:39 -0200
Subject: sysctl: add missing comments

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/sysctl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1e4743ee6831..9a3c8f777caf 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -98,8 +98,8 @@ enum
 	KERN_VERSION=4,		/* string: compile time info */
 	KERN_SECUREMASK=5,	/* struct: maximum rights mask */
 	KERN_PROF=6,		/* table: profiling information */
-	KERN_NODENAME=7,
-	KERN_DOMAINNAME=8,
+	KERN_NODENAME=7,	/* string: hostname */
+	KERN_DOMAINNAME=8,	/* string: domainname */
 
 	KERN_PANIC=15,		/* int: panic timeout */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
@@ -111,8 +111,8 @@ enum
 	KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */
 	KERN_PPC_ZEROPAGED=26,	/* turn idle page zeroing on/off on PPC */
 	KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */
-	KERN_MODPROBE=28,
-	KERN_SG_BIG_BUFF=29,
+	KERN_MODPROBE=28,	/* string: modprobe path */
+	KERN_SG_BIG_BUFF=29,	/* int: sg driver reserved buffer size */
 	KERN_ACCT=30,		/* BSD process accounting parameters */
 	KERN_PPC_L2CR=31,	/* l2cr register on PPC */
 
@@ -159,7 +159,7 @@ enum
 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
 	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
-	KERN_MAX_LOCK_DEPTH=74,
+	KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
 };
-- 
cgit v1.2.3


From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001
From: André Goddard Rosa <andre.goddard@gmail.com>
Date: Sat, 14 Nov 2009 13:09:05 -0200
Subject: tree-wide: fix assorted typos all over the place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That is "success", "unknown", "through", "performance", "[re|un]mapping"
, "access", "default", "reasonable", "[con]currently", "temperature"
, "channel", "[un]used", "application", "example","hierarchy", "therefore"
, "[over|under]flow", "contiguous", "threshold", "enough" and others.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/ABI/testing/procfs-diskstats         |  2 +-
 Documentation/ABI/testing/sysfs-block              |  2 +-
 Documentation/DocBook/mtdnand.tmpl                 |  2 +-
 Documentation/DocBook/v4l/videodev2.h.xml          |  2 +-
 Documentation/DocBook/writing-an-alsa-driver.tmpl  |  2 +-
 Documentation/dvb/README.dvb-usb                   |  2 +-
 Documentation/lguest/lguest.c                      |  2 +-
 Documentation/scsi/ChangeLog.megaraid_sas          |  2 +-
 Documentation/spi/spi-summary                      |  2 +-
 Documentation/sysctl/vm.txt                        |  2 +-
 Documentation/video4linux/gspca.txt                |  2 +-
 Documentation/vm/page-types.c                      |  2 +-
 arch/alpha/mm/numa.c                               |  2 +-
 arch/arm/common/scoop.c                            |  2 +-
 .../mach-bcmring/include/mach/csp/dmacHw_priv.h    |  2 +-
 arch/arm/mach-bcmring/include/mach/dma.h           |  2 +-
 arch/arm/mach-lh7a40x/include/mach/hardware.h      |  2 +-
 arch/arm/mach-orion5x/pci.c                        |  2 +-
 arch/arm/mach-pxa/include/mach/palmld.h            |  2 +-
 arch/arm/mach-pxa/include/mach/palmt5.h            |  2 +-
 arch/arm/mach-pxa/include/mach/palmtc.h            |  2 +-
 arch/arm/mach-pxa/include/mach/palmte2.h           |  2 +-
 arch/arm/mach-pxa/include/mach/palmtx.h            |  2 +-
 arch/arm/mach-pxa/include/mach/palmz72.h           |  2 +-
 arch/arm/mach-s3c6400/setup-sdhci.c                |  2 +-
 arch/arm/mach-s3c6410/setup-sdhci.c                |  2 +-
 arch/arm/mach-sa1100/dma.c                         |  2 +-
 arch/arm/plat-mxc/include/mach/iomux-mx3.h         |  2 +-
 arch/arm/plat-mxc/include/mach/iomux-mxc91231.h    |  2 +-
 arch/arm/plat-mxc/pwm.c                            |  2 +-
 arch/arm/plat-omap/dma.c                           |  2 +-
 arch/arm/plat-omap/include/mach/omap16xx.h         |  2 +-
 arch/arm/plat-s3c24xx/include/plat/map.h           |  2 +-
 arch/avr32/boards/hammerhead/Kconfig               |  2 +-
 arch/blackfin/kernel/traps.c                       |  2 +-
 .../mach-bf518/include/mach/defBF51x_base.h        |  4 +-
 .../mach-bf527/include/mach/defBF52x_base.h        |  4 +-
 arch/blackfin/mach-bf537/include/mach/defBF534.h   |  4 +-
 arch/blackfin/mach-bf548/include/mach/defBF544.h   |  4 +-
 arch/blackfin/mach-bf548/include/mach/defBF547.h   |  4 +-
 arch/blackfin/mach-bf548/include/mach/defBF548.h   |  4 +-
 arch/blackfin/mach-bf548/include/mach/defBF549.h   |  4 +-
 arch/cris/mm/fault.c                               |  2 +-
 arch/ia64/hp/common/sba_iommu.c                    |  2 +-
 arch/ia64/ia32/ia32_entry.S                        |  2 +-
 arch/ia64/include/asm/perfmon_default_smpl.h       |  2 +-
 arch/ia64/include/asm/sn/shubio.h                  |  2 +-
 arch/ia64/kernel/esi.c                             |  2 +-
 arch/ia64/kernel/perfmon.c                         |  2 +-
 arch/m68k/ifpsp060/src/fpsp.S                      | 28 +++++-----
 arch/m68k/ifpsp060/src/pfpsp.S                     | 26 ++++-----
 arch/m68k/include/asm/bootinfo.h                   |  2 +-
 arch/microblaze/lib/memcpy.c                       |  2 +-
 arch/microblaze/lib/memmove.c                      |  2 +-
 arch/microblaze/lib/memset.c                       |  2 +-
 arch/mips/include/asm/mach-pnx833x/gpio.h          |  2 +-
 arch/mips/include/asm/sgi/ioc.h                    |  2 +-
 arch/mips/include/asm/sibyte/sb1250_mac.h          |  2 +-
 arch/mips/include/asm/sn/sn0/hubio.h               |  2 +-
 arch/mips/kernel/smtc.c                            |  2 +-
 arch/mips/math-emu/dp_sub.c                        |  2 +-
 arch/mips/txx9/generic/smsc_fdc37m81x.c            |  2 +-
 arch/powerpc/include/asm/reg_fsl_emb.h             |  2 +-
 arch/powerpc/kernel/kgdb.c                         |  2 +-
 arch/powerpc/kernel/tau_6xx.c                      |  2 +-
 arch/powerpc/oprofile/op_model_cell.c              |  4 +-
 arch/powerpc/platforms/52xx/mpc52xx_pci.c          |  2 +-
 arch/powerpc/platforms/powermac/pci.c              |  2 +-
 arch/powerpc/sysdev/dart_iommu.c                   |  2 +-
 arch/s390/math-emu/math.c                          |  4 +-
 arch/x86/include/asm/desc_defs.h                   |  4 +-
 arch/x86/include/asm/mmzone_32.h                   |  2 +-
 arch/x86/include/asm/uv/uv_bau.h                   |  2 +-
 arch/x86/kernel/acpi/boot.c                        |  2 +-
 arch/x86/kernel/amd_iommu.c                        |  4 +-
 arch/x86/kernel/cpu/perf_event.c                   |  2 +-
 arch/x86/kernel/kprobes.c                          |  4 +-
 arch/x86/mm/kmmio.c                                |  4 +-
 block/blk-iopoll.c                                 |  2 +-
 drivers/ata/ata_piix.c                             |  2 +-
 drivers/ata/sata_fsl.c                             |  6 +--
 drivers/atm/iphase.c                               |  2 +-
 drivers/base/dd.c                                  |  2 +-
 drivers/bluetooth/btmrvl_sdio.c                    |  2 +-
 drivers/bluetooth/hci_ldisc.c                      |  2 +-
 drivers/char/mem.c                                 |  2 +-
 drivers/char/mspec.c                               |  2 +-
 drivers/char/n_r3964.c                             |  2 +-
 drivers/char/rio/route.h                           |  2 +-
 drivers/crypto/hifn_795x.c                         |  2 +-
 drivers/dma/at_hdmac.c                             |  2 +-
 drivers/firewire/core-topology.c                   |  2 +-
 drivers/gpu/drm/drm_crtc.c                         |  4 +-
 drivers/gpu/drm/i915/i915_gem.c                    |  2 +-
 drivers/gpu/drm/i915/intel_fb.c                    |  2 +-
 drivers/gpu/drm/i915/intel_sdvo.c                  |  2 +-
 drivers/gpu/drm/radeon/r600.c                      |  4 +-
 drivers/gpu/drm/radeon/radeon_fb.c                 |  2 +-
 drivers/gpu/drm/radeon/radeon_state.c              |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c                |  2 +-
 drivers/gpu/drm/radeon/rv770.c                     |  4 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c                  |  2 +-
 drivers/hwmon/adm1029.c                            |  2 +-
 drivers/hwmon/lm93.c                               |  2 +-
 drivers/ieee1394/dv1394.c                          |  2 +-
 drivers/infiniband/hw/ipath/ipath_iba6110.c        |  2 +-
 drivers/infiniband/hw/ipath/ipath_sd7220.c         |  4 +-
 drivers/infiniband/hw/mlx4/qp.c                    |  2 +-
 drivers/input/serio/hp_sdc.c                       |  2 +-
 drivers/input/serio/hp_sdc_mlc.c                   |  2 +-
 drivers/input/touchscreen/atmel-wm97xx.c           |  2 +-
 drivers/input/touchscreen/mainstone-wm97xx.c       |  4 +-
 drivers/input/touchscreen/zylonite-wm97xx.c        |  2 +-
 drivers/isdn/capi/capidrv.c                        |  2 +-
 drivers/isdn/hardware/eicon/di.c                   |  2 +-
 drivers/isdn/hardware/eicon/maintidi.c             |  4 +-
 drivers/isdn/hardware/mISDN/hfcsusb.c              |  2 +-
 drivers/isdn/hardware/mISDN/hfcsusb.h              |  2 +-
 drivers/isdn/hardware/mISDN/mISDNisar.c            |  2 +-
 drivers/isdn/hisax/hfc_usb.c                       |  2 +-
 drivers/isdn/i4l/isdn_ppp.c                        |  6 +--
 drivers/isdn/i4l/isdn_ttyfax.c                     |  2 +-
 drivers/isdn/mISDN/dsp_core.c                      |  2 +-
 drivers/isdn/mISDN/tei.c                           |  2 +-
 drivers/macintosh/therm_windtunnel.c               |  2 +-
 drivers/media/common/saa7146_i2c.c                 |  2 +-
 drivers/media/dvb/dvb-core/dvb_frontend.h          |  2 +-
 drivers/media/dvb/dvb-usb/anysee.c                 |  2 +-
 drivers/media/dvb/dvb-usb/dibusb-mb.c              |  2 +-
 drivers/media/dvb/dvb-usb/dvb-usb-remote.c         |  2 +-
 drivers/media/dvb/dvb-usb/usb-urb.c                |  4 +-
 drivers/media/dvb/frontends/au8522_decoder.c       |  2 +-
 drivers/media/dvb/frontends/cx24110.c              |  4 +-
 drivers/media/dvb/frontends/cx24113.c              |  2 +-
 drivers/media/dvb/frontends/dib3000mb.c            |  2 +-
 drivers/media/dvb/frontends/lgdt330x.c             |  4 +-
 drivers/media/dvb/frontends/stb0899_drv.c          |  2 +-
 drivers/media/dvb/ttpci/av7110.c                   |  4 +-
 drivers/media/dvb/ttpci/budget-patch.c             |  2 +-
 drivers/media/radio/radio-mr800.c                  |  2 +-
 drivers/media/video/cx231xx/cx231xx-avcore.c       |  8 +--
 drivers/media/video/cx23885/cx23885-dvb.c          |  2 +-
 drivers/media/video/cx88/cx88-core.c               |  2 +-
 drivers/media/video/davinci/dm355_ccdc.c           |  2 +-
 drivers/media/video/davinci/vpss.c                 |  2 +-
 drivers/media/video/gspca/sonixb.c                 |  2 +-
 drivers/media/video/gspca/spca500.c                |  2 +-
 drivers/media/video/gspca/spca501.c                |  6 +--
 drivers/media/video/gspca/sunplus.c                |  2 +-
 drivers/media/video/gspca/zc3xx.c                  |  2 +-
 drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h |  2 +-
 drivers/media/video/s2255drv.c                     |  2 +-
 drivers/media/video/zoran/zoran.h                  |  2 +-
 drivers/message/i2o/i2o_block.c                    |  2 +-
 drivers/message/i2o/iop.c                          |  4 +-
 drivers/misc/sgi-gru/grufile.c                     |  2 +-
 drivers/mmc/host/s3cmci.c                          |  2 +-
 drivers/mtd/devices/slram.c                        |  2 +-
 drivers/mtd/nand/diskonchip.c                      |  2 +-
 drivers/mtd/nand/nand_ecc.c                        |  2 +-
 drivers/mtd/nand/s3c2410.c                         |  2 +-
 drivers/net/82596.c                                |  2 +-
 drivers/net/amd8111e.c                             |  7 ++-
 drivers/net/appletalk/cops.c                       |  2 +-
 drivers/net/ariadne.h                              |  2 +-
 drivers/net/atl1c/atl1c_main.c                     |  2 +-
 drivers/net/benet/be_cmds.h                        |  2 +-
 drivers/net/benet/be_main.c                        |  2 +-
 drivers/net/bnx2x_reg.h                            |  2 +-
 drivers/net/cxgb3/sge.c                            |  2 +-
 drivers/net/ehea/ehea_ethtool.c                    |  4 +-
 drivers/net/hamradio/baycom_ser_fdx.c              |  2 +-
 drivers/net/iseries_veth.c                         |  2 +-
 drivers/net/lasi_82596.c                           |  2 +-
 drivers/net/lib82596.c                             |  2 +-
 drivers/net/mlx4/en_rx.c                           |  2 +-
 drivers/net/mlx4/en_tx.c                           |  2 +-
 drivers/net/mlx4/mlx4_en.h                         |  2 +-
 drivers/net/ps3_gelic_net.c                        |  2 +-
 drivers/net/sis900.c                               |  4 +-
 drivers/net/skfp/h/smc.h                           |  8 +--
 drivers/net/skfp/skfddi.c                          |  2 +-
 drivers/net/smsc911x.c                             |  2 +-
 drivers/net/smsc911x.h                             |  2 +-
 drivers/net/spider_net.c                           |  2 +-
 drivers/net/stmmac/gmac.c                          |  2 +-
 drivers/net/stmmac/gmac.h                          |  4 +-
 drivers/net/tokenring/smctr.c                      |  2 +-
 drivers/net/ucc_geth.c                             |  2 +-
 drivers/net/ucc_geth.h                             | 20 +++----
 drivers/net/usb/smsc95xx.c                         |  2 +-
 drivers/net/wan/lmc/lmc_main.c                     |  2 +-
 drivers/net/wimax/i2400m/rx.c                      |  2 +-
 drivers/net/wireless/ath/ath5k/phy.c               |  6 +--
 drivers/net/wireless/ath/ath9k/rc.c                |  2 +-
 drivers/net/wireless/ipw2x00/ipw2100.c             |  6 +--
 drivers/net/wireless/ipw2x00/ipw2200.c             |  8 +--
 drivers/net/wireless/ipw2x00/libipw_module.c       |  2 +-
 drivers/net/wireless/iwmc3200wifi/hal.c            |  2 +-
 drivers/net/wireless/iwmc3200wifi/rx.c             |  2 +-
 drivers/net/wireless/libertas/if_sdio.c            |  2 +-
 drivers/net/wireless/prism54/isl_ioctl.c           |  4 +-
 drivers/net/wireless/rt2x00/rt2400pci.h            |  2 +-
 drivers/net/wireless/rt2x00/rt2500pci.h            |  2 +-
 drivers/net/wireless/rt2x00/rt2500usb.h            |  2 +-
 drivers/net/wireless/rt2x00/rt61pci.h              |  2 +-
 drivers/net/wireless/rt2x00/rt73usb.h              |  2 +-
 drivers/net/wireless/wavelan_cs.c                  |  2 +-
 drivers/net/wireless/zd1211rw/zd_mac.c             |  2 +-
 drivers/parisc/ccio-dma.c                          |  2 +-
 drivers/platform/x86/thinkpad_acpi.c               |  2 +-
 drivers/pnp/pnpbios/rsparser.c                     |  8 +--
 drivers/ps3/ps3-sys-manager.c                      |  2 +-
 drivers/rtc/rtc-v3020.c                            |  2 +-
 drivers/s390/char/fs3270.c                         |  2 +-
 drivers/s390/cio/chp.c                             |  2 +-
 drivers/s390/cio/cmf.c                             |  2 +-
 drivers/sbus/char/envctrl.c                        |  4 +-
 drivers/scsi/53c700.c                              |  2 +-
 drivers/scsi/aacraid/aacraid.h                     |  6 +--
 drivers/scsi/aacraid/comminit.c                    |  2 +-
 drivers/scsi/aic7xxx/aic79xx.seq                   |  2 +-
 drivers/scsi/aic7xxx/aic79xx_core.c                |  2 +-
 drivers/scsi/aic7xxx/aic7xxx_core.c                |  2 +-
 drivers/scsi/bfa/include/defs/bfa_defs_pport.h     |  2 +-
 drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h   |  2 +-
 drivers/scsi/hptiop.c                              |  2 +-
 drivers/scsi/libfc/fc_lport.c                      |  2 +-
 drivers/scsi/libiscsi_tcp.c                        |  2 +-
 drivers/scsi/lpfc/lpfc_attr.c                      |  4 +-
 drivers/scsi/lpfc/lpfc_els.c                       |  4 +-
 drivers/scsi/lpfc/lpfc_init.c                      | 62 +++++++++++-----------
 drivers/scsi/lpfc/lpfc_sli.c                       | 10 ++--
 drivers/scsi/megaraid.h                            |  2 +-
 drivers/scsi/megaraid/mbox_defs.h                  |  2 +-
 drivers/scsi/megaraid/megaraid_mbox.c              |  2 +-
 drivers/scsi/mpt2sas/mpt2sas_scsih.c               |  2 +-
 drivers/scsi/ncr53c8xx.c                           |  2 +-
 drivers/scsi/pmcraid.c                             |  6 +--
 drivers/scsi/pmcraid.h                             |  6 +--
 drivers/scsi/scsi_netlink.c                        |  2 +-
 drivers/scsi/scsi_transport_sas.c                  |  6 +--
 drivers/scsi/sym53c8xx_2/sym_glue.c                |  2 +-
 drivers/scsi/sym53c8xx_2/sym_hipd.c                |  2 +-
 drivers/scsi/sym53c8xx_2/sym_hipd.h                |  2 +-
 drivers/serial/8250_pnp.c                          |  4 +-
 drivers/serial/pmac_zilog.h                        |  2 +-
 drivers/serial/ucc_uart.c                          |  2 +-
 drivers/telephony/ixj.c                            |  4 +-
 drivers/usb/atm/ueagle-atm.c                       |  2 +-
 drivers/usb/class/usbtmc.c                         |  2 +-
 drivers/usb/core/message.c                         |  2 +-
 drivers/usb/gadget/f_acm.c                         |  2 +-
 drivers/usb/gadget/pxa27x_udc.c                    |  2 +-
 drivers/usb/host/fhci-sched.c                      |  2 +-
 drivers/usb/wusbcore/crypto.c                      |  2 +-
 drivers/usb/wusbcore/wa-xfer.c                     |  4 +-
 drivers/uwb/i1480/dfu/usb.c                        |  2 +-
 drivers/uwb/wlp/txrx.c                             |  2 +-
 drivers/video/aty/atyfb_base.c                     |  4 +-
 drivers/video/backlight/atmel-pwm-bl.c             |  2 +-
 drivers/video/backlight/tosa_lcd.c                 |  2 +-
 drivers/video/console/sticore.c                    |  2 +-
 drivers/video/gbefb.c                              |  2 +-
 drivers/video/stifb.c                              |  4 +-
 drivers/video/tdfxfb.c                             |  2 +-
 drivers/video/via/dvi.c                            |  4 +-
 drivers/video/vt8623fb.c                           |  2 +-
 drivers/watchdog/coh901327_wdt.c                   |  2 +-
 drivers/watchdog/machzwd.c                         |  2 +-
 drivers/watchdog/wdrtas.c                          |  6 +--
 fs/binfmt_elf.c                                    |  2 +-
 fs/bio.c                                           |  2 +-
 fs/btrfs/extent_map.c                              |  2 +-
 fs/cifs/README                                     |  2 +-
 fs/cifs/cifsglob.h                                 |  2 +-
 fs/cifs/inode.c                                    |  4 +-
 fs/cifs/smbdes.c                                   |  2 +-
 fs/dlm/plock.c                                     |  2 +-
 fs/ext4/inode.c                                    |  6 +--
 fs/ext4/mballoc.c                                  |  2 +-
 fs/jffs2/compr.c                                   |  2 +-
 fs/jffs2/readinode.c                               |  2 +-
 fs/jffs2/xattr.c                                   |  2 +-
 fs/jfs/jfs_dmap.c                                  |  4 +-
 fs/ncpfs/ioctl.c                                   |  2 +-
 fs/ntfs/compress.c                                 |  2 +-
 fs/ntfs/file.c                                     |  4 +-
 fs/ntfs/logfile.c                                  |  2 +-
 fs/ocfs2/alloc.c                                   |  2 +-
 fs/ocfs2/dlm/dlmmaster.c                           |  2 +-
 fs/ocfs2/dlmglue.c                                 |  2 +-
 fs/ocfs2/journal.c                                 |  2 +-
 fs/ocfs2/refcounttree.c                            |  2 +-
 fs/omfs/bitmap.c                                   |  2 +-
 fs/ubifs/recovery.c                                |  2 +-
 fs/xfs/quota/xfs_dquot.h                           |  2 +-
 include/asm-generic/memory_model.h                 |  2 +-
 include/asm-generic/unistd.h                       |  2 +-
 include/linux/chio.h                               |  2 +-
 include/linux/mfd/ezx-pcap.h                       |  4 +-
 include/linux/pktcdvd.h                            |  2 +-
 include/linux/serial_reg.h                         |  8 +--
 include/linux/videodev2.h                          |  2 +-
 include/net/sctp/structs.h                         |  2 +-
 include/net/tcp.h                                  |  2 +-
 include/net/wimax.h                                |  2 +-
 include/sound/wm8993.h                             |  2 +-
 kernel/perf_event.c                                |  4 +-
 lib/Kconfig.debug                                  |  2 +-
 lib/decompress_bunzip2.c                           |  2 +-
 lib/dma-debug.c                                    |  2 +-
 lib/swiotlb.c                                      |  2 +-
 mm/filemap.c                                       |  2 +-
 mm/memcontrol.c                                    |  4 +-
 mm/memory-failure.c                                |  2 +-
 net/ipv4/netfilter/ipt_ECN.c                       |  2 +-
 net/irda/irlap.c                                   | 14 ++---
 net/irda/irlap_event.c                             |  2 +-
 net/irda/irlmp.c                                   |  4 +-
 net/mac80211/mesh_pathtbl.c                        |  4 +-
 net/netlabel/netlabel_domainhash.c                 |  2 +-
 net/sctp/sm_sideeffect.c                           |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c              |  2 +-
 net/wimax/op-reset.c                               |  2 +-
 scripts/kconfig/mconf.c                            |  2 +-
 security/selinux/netlabel.c                        |  2 +-
 security/selinux/ss/services.c                     |  2 +-
 sound/Kconfig                                      |  2 +-
 sound/isa/cs423x/cs4236.c                          |  2 +-
 sound/isa/opti9xx/miro.c                           |  2 +-
 sound/isa/opti9xx/opti92x-ad1848.c                 |  2 +-
 sound/oss/dmasound/dmasound_paula.c                |  2 +-
 sound/pci/ca0106/ca0106_proc.c                     |  2 +-
 sound/pci/cs46xx/imgs/cwcdma.asp                   |  9 ++--
 sound/pci/emu10k1/emu10k1x.c                       |  2 +-
 sound/pci/hda/patch_cmedia.c                       |  2 +-
 sound/pci/hda/patch_realtek.c                      |  2 +-
 sound/pci/rme9652/hdspm.c                          |  4 +-
 sound/soc/codecs/uda134x.c                         |  4 +-
 sound/soc/codecs/wm8903.c                          |  6 +--
 sound/soc/codecs/wm8993.c                          |  4 +-
 sound/soc/s3c24xx/s3c24xx_simtec.c                 |  2 +-
 sound/soc/s6000/s6000-pcm.c                        |  2 +-
 sound/sound_core.c                                 |  2 +-
 345 files changed, 516 insertions(+), 508 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index 99233902e09e..f91a973a37fe 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -8,7 +8,7 @@ Description:
 		 1 - major number
 		 2 - minor mumber
 		 3 - device name
-		 4 - reads completed succesfully
+		 4 - reads completed successfully
 		 5 - reads merged
 		 6 - sectors read
 		 7 - time spent reading (ms)
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3bedaf8e35..d2f90334bb93 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -4,7 +4,7 @@ Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
 		statistics of disk <disk>. They contain 11 fields:
-		 1 - reads completed succesfully
+		 1 - reads completed successfully
 		 2 - reads merged
 		 3 - sectors read
 		 4 - time spent reading (ms)
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index df0d089d0fb9..f508a8a27fea 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -362,7 +362,7 @@ module_exit(board_cleanup);
 	<sect1 id="Multiple_chip_control">
 		<title>Multiple chip control</title>
 		<para>
-			The nand driver can control chip arrays. Therefor the
+			The nand driver can control chip arrays. Therefore the
 			board driver must provide an own select_chip function. This
 			function must (de)select the requested chip.
 			The function pointer in the nand_chip structure must
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 97002060ac4f..26303e58345f 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -492,7 +492,7 @@ struct <link linkend="v4l2-jpegcompression">v4l2_jpegcompression</link> {
                                  * you do, leave them untouched.
                                  * Inluding less markers will make the
                                  * resulting code smaller, but there will
-                                 * be fewer aplications which can read it.
+                                 * be fewer applications which can read it.
                                  * The presence of the APP and COM marker
                                  * is influenced by APP_len and COM_len
                                  * ONLY, not by this property! */
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
index 7a2e0e98986a..0d0f7b4d4b1a 100644
--- a/Documentation/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -5318,7 +5318,7 @@ struct _snd_pcm_runtime {
       pages of the given size and map them onto the virtually contiguous
       memory.  The virtual pointer is addressed in runtime-&gt;dma_area.
       The physical address (runtime-&gt;dma_addr) is set to zero,
-      because the buffer is physically non-contigous.
+      because the buffer is physically non-contiguous.
       The physical address table is set up in sgbuf-&gt;table.
       You can get the physical address at a certain offset via
       <function>snd_pcm_sgbuf_get_addr()</function>. 
diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb
index bf2a9cdfe7bb..c8238e44ed6b 100644
--- a/Documentation/dvb/README.dvb-usb
+++ b/Documentation/dvb/README.dvb-usb
@@ -85,7 +85,7 @@ http://www.linuxtv.org/wiki/index.php/DVB_USB
 	     - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems
 	       better settled there (added xfer_ops-struct)
 	     - created a common files for frontends (mc/p/mb)
-  2004-09-28 - added support for a new device (Unkown, vendor ID is Hyper-Paltek)
+  2004-09-28 - added support for a new device (Unknown, vendor ID is Hyper-Paltek)
   2004-09-20 - added support for a new device (Compro DVB-U2000), thanks
 	       to Amaury Demol for reporting
 	     - changed usb TS transfer method (several urbs, stopping transfer
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 098de5bce00a..42208511b5c0 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -304,7 +304,7 @@ static void *map_zeroed_pages(unsigned int num)
 	addr = mmap(NULL, getpagesize() * num,
 		    PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0);
 	if (addr == MAP_FAILED)
-		err(1, "Mmaping %u pages of /dev/zero", num);
+		err(1, "Mmapping %u pages of /dev/zero", num);
 
 	/*
 	 * One neat mmap feature is that you can close the fd, and it
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index c851ef497795..84524e0cf9c3 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -185,7 +185,7 @@ ii.	FW enables WCE bit in Mode Sense cmd for drives that are configured
 	Disks are exposed with WCE=1. User is advised to enable Write Back
 	mode only when the controller has battery backup. At this time
 	Synhronize cache is not supported by the FW. Driver will short-cycle
-	the cmd and return sucess without sending down to FW.
+	the cmd and return success without sending down to FW.
 
 1 Release Date    : Sun Jan. 14 11:21:32 PDT 2007 -
 		 Sumant Patro <Sumant.Patro@lsil.com>/Bo Yang
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index deab51ddc33e..4884cb33845d 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -538,7 +538,7 @@ SPI MESSAGE QUEUE
 The bulk of the driver will be managing the I/O queue fed by transfer().
 
 That queue could be purely conceptual.  For example, a driver used only
-for low-frequency sensor acess might be fine using synchronous PIO.
+for low-frequency sensor access might be fine using synchronous PIO.
 
 But the queue will probably be very real, using message->queue, PIO,
 often DMA (especially if the root filesystem is in SPI flash), and
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index a6e360d2055c..fc5790d36cd9 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -370,7 +370,7 @@ The default is 1 percent.
 mmap_min_addr
 
 This file indicates the amount of address space  which a user process will
-be restricted from mmaping.  Since kernel null dereference bugs could
+be restricted from mmapping.  Since kernel null dereference bugs could
 accidentally operate based on the information in the first couple of pages
 of memory userspace processes should not be allowed to write to them.  By
 default this value is set to 0 and no protections will be enforced by the
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 3f61825be499..6b29555b58b7 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -6,7 +6,7 @@ The modules are:
 
 xxxx		vend:prod
 ----
-spca501		0000:0000	MystFromOri Unknow Camera
+spca501		0000:0000	MystFromOri Unknown Camera
 m5602		0402:5602	ALi Video Camera Controller
 spca501		040a:0002	Kodak DVC-325
 spca500		040a:0300	Kodak EZ200
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 3ec4f2a22585..aa7f4d0639c4 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -301,7 +301,7 @@ static char *page_flag_name(uint64_t flags)
 		present = (flags >> i) & 1;
 		if (!page_flag_names[i]) {
 			if (present)
-				fatal("unkown flag bit %d\n", i);
+				fatal("unknown flag bit %d\n", i);
 			continue;
 		}
 		buf[j++] = present ? page_flag_names[i][0] : '_';
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 10b403554b65..7b2c56d8f930 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -197,7 +197,7 @@ setup_memory_node(int nid, void *kernel_end)
 	}
 
 	if (bootmap_start == -1)
-		panic("couldn't find a contigous place for the bootmap");
+		panic("couldn't find a contiguous place for the bootmap");
 
 	/* Allocate the bootmap and mark the whole MM as reserved.  */
 	bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start,
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index 7713a08bb10c..37bda5f3dde3 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -82,7 +82,7 @@ static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
 	struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio);
 
-	/* XXX: I'm usure,  but it seems so */
+	/* XXX: I'm unsure, but it seems so */
 	return ioread16(sdev->base + SCOOP_GPRR) & (1 << (offset + 1));
 }
 
diff --git a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h
index 375066ad0186..cbf334d1c761 100644
--- a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h
+++ b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h
@@ -83,7 +83,7 @@ typedef struct {
 *  @brief   Get next available transaction width
 *
 *
-*  @return  On sucess  : Next avail able transaction width
+*  @return  On success  : Next available transaction width
 *           On failure : dmacHw_TRANSACTION_WIDTH_8
 *
 *  @note
diff --git a/arch/arm/mach-bcmring/include/mach/dma.h b/arch/arm/mach-bcmring/include/mach/dma.h
index 847980c85c88..1f2c5319c056 100644
--- a/arch/arm/mach-bcmring/include/mach/dma.h
+++ b/arch/arm/mach-bcmring/include/mach/dma.h
@@ -651,7 +651,7 @@ int dma_map_add_region(DMA_MemMap_t *memMap,	/* Stores state information about t
 /**
 *   Creates a descriptor ring from a memory mapping.
 *
-*   @return 0 on sucess, error code otherwise.
+*   @return 0 on success, error code otherwise.
 */
 /****************************************************************************/
 
diff --git a/arch/arm/mach-lh7a40x/include/mach/hardware.h b/arch/arm/mach-lh7a40x/include/mach/hardware.h
index 48e827d2fa56..59d2ace35217 100644
--- a/arch/arm/mach-lh7a40x/include/mach/hardware.h
+++ b/arch/arm/mach-lh7a40x/include/mach/hardware.h
@@ -31,7 +31,7 @@
 /*
  * This __REG() version gives the same results as the one above,  except
  * that we are fooling gcc somehow so it generates far better and smaller
- * assembly code for access to contigous registers.  It's a shame that gcc
+ * assembly code for access to contiguous registers.  It's a shame that gcc
  * doesn't guess this by itself.
  */
 #include <asm/types.h>
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index 36dc5413cc97..bdf96eb523bc 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -463,7 +463,7 @@ static void __init orion5x_setup_pci_wins(struct mbus_dram_target_info *dram)
 	writel(win_enable, PCI_BAR_ENABLE);
 
 	/*
-	 * Disable automatic update of address remaping when writing to BARs.
+	 * Disable automatic update of address remapping when writing to BARs.
 	 */
 	orion5x_setbits(PCI_ADDR_DECODE_CTRL, 1);
 }
diff --git a/arch/arm/mach-pxa/include/mach/palmld.h b/arch/arm/mach-pxa/include/mach/palmld.h
index 8721b8010221..ae536e86d8e8 100644
--- a/arch/arm/mach-pxa/include/mach/palmld.h
+++ b/arch/arm/mach-pxa/include/mach/palmld.h
@@ -91,7 +91,7 @@
 /* BATTERY */
 #define PALMLD_BAT_MAX_VOLTAGE		4000	/* 4.00V maximum voltage */
 #define PALMLD_BAT_MIN_VOLTAGE		3550	/* 3.55V critical voltage */
-#define PALMLD_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMLD_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMLD_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMLD_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMLD_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-pxa/include/mach/palmt5.h b/arch/arm/mach-pxa/include/mach/palmt5.h
index d15662aba008..6baf7469d4ec 100644
--- a/arch/arm/mach-pxa/include/mach/palmt5.h
+++ b/arch/arm/mach-pxa/include/mach/palmt5.h
@@ -66,7 +66,7 @@
 /* BATTERY */
 #define PALMT5_BAT_MAX_VOLTAGE		4000	/* 4.00v current voltage */
 #define PALMT5_BAT_MIN_VOLTAGE		3550	/* 3.55v critical voltage */
-#define PALMT5_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMT5_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMT5_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMT5_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMT5_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h
index 3dc9b074ab46..3f9dd3fd4638 100644
--- a/arch/arm/mach-pxa/include/mach/palmtc.h
+++ b/arch/arm/mach-pxa/include/mach/palmtc.h
@@ -68,7 +68,7 @@
 /* BATTERY */
 #define PALMTC_BAT_MAX_VOLTAGE		4000	/* 4.00V maximum voltage */
 #define PALMTC_BAT_MIN_VOLTAGE		3550	/* 3.55V critical voltage */
-#define PALMTC_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMTC_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMTC_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMTC_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMTC_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-pxa/include/mach/palmte2.h b/arch/arm/mach-pxa/include/mach/palmte2.h
index 12361341f9d8..f89e989a7637 100644
--- a/arch/arm/mach-pxa/include/mach/palmte2.h
+++ b/arch/arm/mach-pxa/include/mach/palmte2.h
@@ -59,7 +59,7 @@
 /* BATTERY */
 #define PALMTE2_BAT_MAX_VOLTAGE		4000	/* 4.00v current voltage */
 #define PALMTE2_BAT_MIN_VOLTAGE		3550	/* 3.55v critical voltage */
-#define PALMTE2_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMTE2_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMTE2_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMTE2_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMTE2_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h
index 1be0db6ed55e..10abc4f2e8e4 100644
--- a/arch/arm/mach-pxa/include/mach/palmtx.h
+++ b/arch/arm/mach-pxa/include/mach/palmtx.h
@@ -94,7 +94,7 @@
 /* BATTERY */
 #define PALMTX_BAT_MAX_VOLTAGE		4000	/* 4.00v current voltage */
 #define PALMTX_BAT_MIN_VOLTAGE		3550	/* 3.55v critical voltage */
-#define PALMTX_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMTX_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMTX_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMTX_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMTX_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-pxa/include/mach/palmz72.h b/arch/arm/mach-pxa/include/mach/palmz72.h
index 2806ef69ba5a..2bbcf70dd935 100644
--- a/arch/arm/mach-pxa/include/mach/palmz72.h
+++ b/arch/arm/mach-pxa/include/mach/palmz72.h
@@ -49,7 +49,7 @@
 /* Battery */
 #define PALMZ72_BAT_MAX_VOLTAGE		4000	/* 4.00v current voltage */
 #define PALMZ72_BAT_MIN_VOLTAGE		3550	/* 3.55v critical voltage */
-#define PALMZ72_BAT_MAX_CURRENT		0	/* unknokn */
+#define PALMZ72_BAT_MAX_CURRENT		0	/* unknown */
 #define PALMZ72_BAT_MIN_CURRENT		0	/* unknown */
 #define PALMZ72_BAT_MAX_CHARGE		1	/* unknown */
 #define PALMZ72_BAT_MIN_CHARGE		1	/* unknown */
diff --git a/arch/arm/mach-s3c6400/setup-sdhci.c b/arch/arm/mach-s3c6400/setup-sdhci.c
index b93dafbee1f4..1039937403be 100644
--- a/arch/arm/mach-s3c6400/setup-sdhci.c
+++ b/arch/arm/mach-s3c6400/setup-sdhci.c
@@ -30,7 +30,7 @@ char *s3c6400_hsmmc_clksrcs[4] = {
 	[0] = "hsmmc",
 	[1] = "hsmmc",
 	[2] = "mmc_bus",
-	/* [3] = "48m", - note not succesfully used yet */
+	/* [3] = "48m", - note not successfully used yet */
 };
 
 void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
diff --git a/arch/arm/mach-s3c6410/setup-sdhci.c b/arch/arm/mach-s3c6410/setup-sdhci.c
index 20666f3bd478..816d2d9f9ef8 100644
--- a/arch/arm/mach-s3c6410/setup-sdhci.c
+++ b/arch/arm/mach-s3c6410/setup-sdhci.c
@@ -30,7 +30,7 @@ char *s3c6410_hsmmc_clksrcs[4] = {
 	[0] = "hsmmc",
 	[1] = "hsmmc",
 	[2] = "mmc_bus",
-	/* [3] = "48m", - note not succesfully used yet */
+	/* [3] = "48m", - note not successfully used yet */
 };
 
 
diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c
index cb4521a6f42d..ad660350c296 100644
--- a/arch/arm/mach-sa1100/dma.c
+++ b/arch/arm/mach-sa1100/dma.c
@@ -65,7 +65,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id)
 
 
 /**
- *	sa1100_request_dma - allocate one of the SA11x0's DMA chanels
+ *	sa1100_request_dma - allocate one of the SA11x0's DMA channels
  *	@device: The SA11x0 peripheral targeted by this request
  *	@device_id: An ascii name for the claiming device
  *	@callback: Function to be called when the DMA completes
diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx3.h b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
index 446f86763816..0c7802bbeccb 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-mx3.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
@@ -112,7 +112,7 @@ enum iomux_gp_func {
  * setups a single pin:
  * 	- reserves the pin so that it is not claimed by another driver
  * 	- setups the iomux according to the configuration
- * 	- if the pin is configured as a GPIO, we claim it throug kernel gpiolib
+ * 	- if the pin is configured as a GPIO, we claim it through kernel gpiolib
  */
 int mxc_iomux_alloc_pin(const unsigned int pin, const char *label);
 /*
diff --git a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h
index 9f13061192c8..3887f3fe29d4 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h
@@ -48,7 +48,7 @@
  * setups a single pin:
  * 	- reserves the pin so that it is not claimed by another driver
  * 	- setups the iomux according to the configuration
- * 	- if the pin is configured as a GPIO, we claim it throug kernel gpiolib
+ * 	- if the pin is configured as a GPIO, we claim it through kernel gpiolib
  */
 int mxc_iomux_alloc_pin(const unsigned int pin_mode, const char *label);
 /*
diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c
index 5cdbd605ac05..4ff6dfe04283 100644
--- a/arch/arm/plat-mxc/pwm.c
+++ b/arch/arm/plat-mxc/pwm.c
@@ -94,7 +94,7 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
 		 * register to follow the ratio of duty_ns vs. period_ns
 		 * accordingly.
 		 *
-		 * This is good enought for programming the brightness of
+		 * This is good enough for programming the brightness of
 		 * the LCD backlight.
 		 *
 		 * The real implementation would divide PERCLK[0] first by
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index b53125f41293..0e308913291b 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -1232,7 +1232,7 @@ static void create_dma_lch_chain(int lch_head, int lch_queue)
  * 					      OMAP_DMA_DYNAMIC_CHAIN
  * @params - Channel parameters
  *
- * @return - Succes : 0
+ * @return - Success : 0
  * 	     Failure: -EINVAL/-ENOMEM
  */
 int omap_request_dma_chain(int dev_id, const char *dev_name,
diff --git a/arch/arm/plat-omap/include/mach/omap16xx.h b/arch/arm/plat-omap/include/mach/omap16xx.h
index 0e69b504c25f..7560b4d583a3 100644
--- a/arch/arm/plat-omap/include/mach/omap16xx.h
+++ b/arch/arm/plat-omap/include/mach/omap16xx.h
@@ -124,7 +124,7 @@
 #define TIPB_SWITCH_BASE		 (0xfffbc800)
 #define OMAP16XX_MMCSD2_SSW_MPU_CONF	(TIPB_SWITCH_BASE + 0x160)
 
-/* UART3 Registers Maping through MPU bus */
+/* UART3 Registers Mapping through MPU bus */
 #define UART3_RHR               (OMAP_UART3_BASE + 0)
 #define UART3_THR               (OMAP_UART3_BASE + 0)
 #define UART3_DLL               (OMAP_UART3_BASE + 0)
diff --git a/arch/arm/plat-s3c24xx/include/plat/map.h b/arch/arm/plat-s3c24xx/include/plat/map.h
index c4d133436fc7..bd534d32b993 100644
--- a/arch/arm/plat-s3c24xx/include/plat/map.h
+++ b/arch/arm/plat-s3c24xx/include/plat/map.h
@@ -64,7 +64,7 @@
 /* the calculation for the VA of this must ensure that
  * it is the same distance apart from the UART in the
  * phsyical address space, as the initial mapping for the IO
- * is done as a 1:1 maping. This puts it (currently) at
+ * is done as a 1:1 mapping. This puts it (currently) at
  * 0xFA800000, which is not in the way of any current mapping
  * by the base system.
 */
diff --git a/arch/avr32/boards/hammerhead/Kconfig b/arch/avr32/boards/hammerhead/Kconfig
index fda2331f9789..5c13d785cc70 100644
--- a/arch/avr32/boards/hammerhead/Kconfig
+++ b/arch/avr32/boards/hammerhead/Kconfig
@@ -24,7 +24,7 @@ config BOARD_HAMMERHEAD_SND
 	bool "Atmel AC97 Sound support"
 	help
 	  This enables Sound support for the Hammerhead board. You may
-	  also go trough the ALSA settings to get it working.
+	  also go through the ALSA settings to get it working.
 
 	  Choose 'Y' here if you have ordered a Corona daugther board and
 	  want to make your board funky.
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 6b7325d634af..78cb3d38f899 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -619,7 +619,7 @@ asmlinkage notrace void trap_c(struct pt_regs *fp)
 
 /*
  * Similar to get_user, do some address checking, then dereference
- * Return true on sucess, false on bad address
+ * Return true on success, false on bad address
  */
 static bool get_instruction(unsigned short *val, unsigned short *address)
 {
diff --git a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h
index e06f4112c695..f9fd2b2a2956 100644
--- a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h
+++ b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h
@@ -542,7 +542,7 @@
 #define HMDMA0_CONTROL		0xFFC03300	/* Handshake MDMA0 Control Register					*/
 #define HMDMA0_ECINIT		0xFFC03304	/* HMDMA0 Initial Edge Count Register				*/
 #define HMDMA0_BCINIT		0xFFC03308	/* HMDMA0 Initial Block Count Register				*/
-#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshhold Register		*/
+#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshold Register		*/
 #define HMDMA0_ECOVERFLOW	0xFFC03310	/* HMDMA0 Edge Count Overflow Interrupt Register	*/
 #define HMDMA0_ECOUNT		0xFFC03314	/* HMDMA0 Current Edge Count Register				*/
 #define HMDMA0_BCOUNT		0xFFC03318	/* HMDMA0 Current Block Count Register				*/
@@ -550,7 +550,7 @@
 #define HMDMA1_CONTROL		0xFFC03340	/* Handshake MDMA1 Control Register					*/
 #define HMDMA1_ECINIT		0xFFC03344	/* HMDMA1 Initial Edge Count Register				*/
 #define HMDMA1_BCINIT		0xFFC03348	/* HMDMA1 Initial Block Count Register				*/
-#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshhold Register		*/
+#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshold Register		*/
 #define HMDMA1_ECOVERFLOW	0xFFC03350	/* HMDMA1 Edge Count Overflow Interrupt Register	*/
 #define HMDMA1_ECOUNT		0xFFC03354	/* HMDMA1 Current Edge Count Register				*/
 #define HMDMA1_BCOUNT		0xFFC03358	/* HMDMA1 Current Block Count Register				*/
diff --git a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h
index f821700716ee..b9dbb73d7ef0 100644
--- a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h
+++ b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h
@@ -544,7 +544,7 @@
 #define HMDMA0_CONTROL		0xFFC03300	/* Handshake MDMA0 Control Register					*/
 #define HMDMA0_ECINIT		0xFFC03304	/* HMDMA0 Initial Edge Count Register				*/
 #define HMDMA0_BCINIT		0xFFC03308	/* HMDMA0 Initial Block Count Register				*/
-#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshhold Register		*/
+#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshold Register		*/
 #define HMDMA0_ECOVERFLOW	0xFFC03310	/* HMDMA0 Edge Count Overflow Interrupt Register	*/
 #define HMDMA0_ECOUNT		0xFFC03314	/* HMDMA0 Current Edge Count Register				*/
 #define HMDMA0_BCOUNT		0xFFC03318	/* HMDMA0 Current Block Count Register				*/
@@ -552,7 +552,7 @@
 #define HMDMA1_CONTROL		0xFFC03340	/* Handshake MDMA1 Control Register					*/
 #define HMDMA1_ECINIT		0xFFC03344	/* HMDMA1 Initial Edge Count Register				*/
 #define HMDMA1_BCINIT		0xFFC03348	/* HMDMA1 Initial Block Count Register				*/
-#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshhold Register		*/
+#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshold Register		*/
 #define HMDMA1_ECOVERFLOW	0xFFC03350	/* HMDMA1 Edge Count Overflow Interrupt Register	*/
 #define HMDMA1_ECOUNT		0xFFC03354	/* HMDMA1 Current Edge Count Register				*/
 #define HMDMA1_BCOUNT		0xFFC03358	/* HMDMA1 Current Block Count Register				*/
diff --git a/arch/blackfin/mach-bf537/include/mach/defBF534.h b/arch/blackfin/mach-bf537/include/mach/defBF534.h
index cebb14feb1ba..a6d20ca57683 100644
--- a/arch/blackfin/mach-bf537/include/mach/defBF534.h
+++ b/arch/blackfin/mach-bf537/include/mach/defBF534.h
@@ -934,7 +934,7 @@
 #define HMDMA0_CONTROL		0xFFC03300	/* Handshake MDMA0 Control Register                                     */
 #define HMDMA0_ECINIT		0xFFC03304	/* HMDMA0 Initial Edge Count Register                           */
 #define HMDMA0_BCINIT		0xFFC03308	/* HMDMA0 Initial Block Count Register                          */
-#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshhold Register         */
+#define HMDMA0_ECURGENT		0xFFC0330C	/* HMDMA0 Urgent Edge Count Threshold Register         */
 #define HMDMA0_ECOVERFLOW	0xFFC03310	/* HMDMA0 Edge Count Overflow Interrupt Register        */
 #define HMDMA0_ECOUNT		0xFFC03314	/* HMDMA0 Current Edge Count Register                           */
 #define HMDMA0_BCOUNT		0xFFC03318	/* HMDMA0 Current Block Count Register                          */
@@ -942,7 +942,7 @@
 #define HMDMA1_CONTROL		0xFFC03340	/* Handshake MDMA1 Control Register                                     */
 #define HMDMA1_ECINIT		0xFFC03344	/* HMDMA1 Initial Edge Count Register                           */
 #define HMDMA1_BCINIT		0xFFC03348	/* HMDMA1 Initial Block Count Register                          */
-#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshhold Register         */
+#define HMDMA1_ECURGENT		0xFFC0334C	/* HMDMA1 Urgent Edge Count Threshold Register         */
 #define HMDMA1_ECOVERFLOW	0xFFC03350	/* HMDMA1 Edge Count Overflow Interrupt Register        */
 #define HMDMA1_ECOUNT		0xFFC03354	/* HMDMA1 Current Edge Count Register                           */
 #define HMDMA1_BCOUNT		0xFFC03358	/* HMDMA1 Current Block Count Register                          */
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF544.h b/arch/blackfin/mach-bf548/include/mach/defBF544.h
index dd414ae4ba4c..39f588dcd382 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF544.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF544.h
@@ -491,7 +491,7 @@
 #define                   HMDMA0_CONTROL  0xffc04500   /* Handshake MDMA0 Control Register */
 #define                    HMDMA0_ECINIT  0xffc04504   /* Handshake MDMA0 Initial Edge Count Register */
 #define                    HMDMA0_BCINIT  0xffc04508   /* Handshake MDMA0 Initial Block Count Register */
-#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshhold Register */
+#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshold Register */
 #define                HMDMA0_ECOVERFLOW  0xffc04510   /* Handshake MDMA0 Edge Count Overflow Interrupt Register */
 #define                    HMDMA0_ECOUNT  0xffc04514   /* Handshake MDMA0 Current Edge Count Register */
 #define                    HMDMA0_BCOUNT  0xffc04518   /* Handshake MDMA0 Current Block Count Register */
@@ -501,7 +501,7 @@
 #define                   HMDMA1_CONTROL  0xffc04540   /* Handshake MDMA1 Control Register */
 #define                    HMDMA1_ECINIT  0xffc04544   /* Handshake MDMA1 Initial Edge Count Register */
 #define                    HMDMA1_BCINIT  0xffc04548   /* Handshake MDMA1 Initial Block Count Register */
-#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshhold Register */
+#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshold Register */
 #define                HMDMA1_ECOVERFLOW  0xffc04550   /* Handshake MDMA1 Edge Count Overflow Interrupt Register */
 #define                    HMDMA1_ECOUNT  0xffc04554   /* Handshake MDMA1 Current Edge Count Register */
 #define                    HMDMA1_BCOUNT  0xffc04558   /* Handshake MDMA1 Current Block Count Register */
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h
index 5a9dbabe0a68..c4dcf302d9f5 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h
@@ -470,7 +470,7 @@
 #define                   HMDMA0_CONTROL  0xffc04500   /* Handshake MDMA0 Control Register */
 #define                    HMDMA0_ECINIT  0xffc04504   /* Handshake MDMA0 Initial Edge Count Register */
 #define                    HMDMA0_BCINIT  0xffc04508   /* Handshake MDMA0 Initial Block Count Register */
-#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshhold Register */
+#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshold Register */
 #define                HMDMA0_ECOVERFLOW  0xffc04510   /* Handshake MDMA0 Edge Count Overflow Interrupt Register */
 #define                    HMDMA0_ECOUNT  0xffc04514   /* Handshake MDMA0 Current Edge Count Register */
 #define                    HMDMA0_BCOUNT  0xffc04518   /* Handshake MDMA0 Current Block Count Register */
@@ -480,7 +480,7 @@
 #define                   HMDMA1_CONTROL  0xffc04540   /* Handshake MDMA1 Control Register */
 #define                    HMDMA1_ECINIT  0xffc04544   /* Handshake MDMA1 Initial Edge Count Register */
 #define                    HMDMA1_BCINIT  0xffc04548   /* Handshake MDMA1 Initial Block Count Register */
-#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshhold Register */
+#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshold Register */
 #define                HMDMA1_ECOVERFLOW  0xffc04550   /* Handshake MDMA1 Edge Count Overflow Interrupt Register */
 #define                    HMDMA1_ECOUNT  0xffc04554   /* Handshake MDMA1 Current Edge Count Register */
 #define                    HMDMA1_BCOUNT  0xffc04558   /* Handshake MDMA1 Current Block Count Register */
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF548.h b/arch/blackfin/mach-bf548/include/mach/defBF548.h
index 82cd593f7391..a5079980968c 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF548.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF548.h
@@ -853,7 +853,7 @@
 #define                   HMDMA0_CONTROL  0xffc04500   /* Handshake MDMA0 Control Register */
 #define                    HMDMA0_ECINIT  0xffc04504   /* Handshake MDMA0 Initial Edge Count Register */
 #define                    HMDMA0_BCINIT  0xffc04508   /* Handshake MDMA0 Initial Block Count Register */
-#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshhold Register */
+#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshold Register */
 #define                HMDMA0_ECOVERFLOW  0xffc04510   /* Handshake MDMA0 Edge Count Overflow Interrupt Register */
 #define                    HMDMA0_ECOUNT  0xffc04514   /* Handshake MDMA0 Current Edge Count Register */
 #define                    HMDMA0_BCOUNT  0xffc04518   /* Handshake MDMA0 Current Block Count Register */
@@ -863,7 +863,7 @@
 #define                   HMDMA1_CONTROL  0xffc04540   /* Handshake MDMA1 Control Register */
 #define                    HMDMA1_ECINIT  0xffc04544   /* Handshake MDMA1 Initial Edge Count Register */
 #define                    HMDMA1_BCINIT  0xffc04548   /* Handshake MDMA1 Initial Block Count Register */
-#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshhold Register */
+#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshold Register */
 #define                HMDMA1_ECOVERFLOW  0xffc04550   /* Handshake MDMA1 Edge Count Overflow Interrupt Register */
 #define                    HMDMA1_ECOUNT  0xffc04554   /* Handshake MDMA1 Current Edge Count Register */
 #define                    HMDMA1_BCOUNT  0xffc04558   /* Handshake MDMA1 Current Block Count Register */
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF549.h b/arch/blackfin/mach-bf548/include/mach/defBF549.h
index 6fc6e39ab61b..f7f043560c6f 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF549.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF549.h
@@ -1024,7 +1024,7 @@
 #define                   HMDMA0_CONTROL  0xffc04500   /* Handshake MDMA0 Control Register */
 #define                    HMDMA0_ECINIT  0xffc04504   /* Handshake MDMA0 Initial Edge Count Register */
 #define                    HMDMA0_BCINIT  0xffc04508   /* Handshake MDMA0 Initial Block Count Register */
-#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshhold Register */
+#define                  HMDMA0_ECURGENT  0xffc0450c   /* Handshake MDMA0 Urgent Edge Count Threshold Register */
 #define                HMDMA0_ECOVERFLOW  0xffc04510   /* Handshake MDMA0 Edge Count Overflow Interrupt Register */
 #define                    HMDMA0_ECOUNT  0xffc04514   /* Handshake MDMA0 Current Edge Count Register */
 #define                    HMDMA0_BCOUNT  0xffc04518   /* Handshake MDMA0 Current Block Count Register */
@@ -1034,7 +1034,7 @@
 #define                   HMDMA1_CONTROL  0xffc04540   /* Handshake MDMA1 Control Register */
 #define                    HMDMA1_ECINIT  0xffc04544   /* Handshake MDMA1 Initial Edge Count Register */
 #define                    HMDMA1_BCINIT  0xffc04548   /* Handshake MDMA1 Initial Block Count Register */
-#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshhold Register */
+#define                  HMDMA1_ECURGENT  0xffc0454c   /* Handshake MDMA1 Urgent Edge Count Threshold Register */
 #define                HMDMA1_ECOVERFLOW  0xffc04550   /* Handshake MDMA1 Edge Count Overflow Interrupt Register */
 #define                    HMDMA1_ECOUNT  0xffc04554   /* Handshake MDMA1 Current Edge Count Register */
 #define                    HMDMA1_BCOUNT  0xffc04558   /* Handshake MDMA1 Current Block Count Register */
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 4a7cdd9ea1ee..380df1a73a6e 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -209,7 +209,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	/* Are we prepared to handle this kernel fault?
 	 *
 	 * (The kernel has valid exception-points in the source
-	 *  when it acesses user-memory. When it fails in one
+	 *  when it accesses user-memory. When it fails in one
 	 *  of those points, we find it in a table and do a jump
 	 *  to some fixup code that loads an appropriate error
 	 *  code)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 674a8374c6d9..f332e3fe4237 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1381,7 +1381,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 #endif
 
 			/*
-			** Not virtually contigous.
+			** Not virtually contiguous.
 			** Terminate prev chunk.
 			** Start a new chunk.
 			**
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index af9405cd70e5..02d1fb732951 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -79,7 +79,7 @@ GLOBAL_ENTRY(ia32_ret_from_clone)
 (p6)	br.cond.spnt .ia32_strace_check_retval
 	;;					// prevent RAW on r8
 END(ia32_ret_from_clone)
-	// fall thrugh
+	// fall through
 GLOBAL_ENTRY(ia32_ret_from_syscall)
 	PT_REGS_UNWIND_INFO(0)
 
diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h
index 48822c0811d8..74724b24c2b7 100644
--- a/arch/ia64/include/asm/perfmon_default_smpl.h
+++ b/arch/ia64/include/asm/perfmon_default_smpl.h
@@ -67,7 +67,7 @@ typedef struct {
         unsigned long   ip;                     /* where did the overflow interrupt happened  */
         unsigned long   tstamp;                 /* ar.itc when entering perfmon intr. handler */
 
-        unsigned short  cpu;                    /* cpu on which the overfow occured */
+        unsigned short  cpu;                    /* cpu on which the overflow occured */
         unsigned short  set;                    /* event set active when overflow ocurred   */
         int    		tgid;              	/* thread group id (for NPTL, this is getpid()) */
 } pfm_default_smpl_entry_t;
diff --git a/arch/ia64/include/asm/sn/shubio.h b/arch/ia64/include/asm/sn/shubio.h
index 22a6f18a5313..6052422a22b3 100644
--- a/arch/ia64/include/asm/sn/shubio.h
+++ b/arch/ia64/include/asm/sn/shubio.h
@@ -3289,7 +3289,7 @@ typedef ii_icrb0_e_u_t icrbe_t;
 #define IIO_IIDSR_LVL_SHIFT     0
 #define IIO_IIDSR_LVL_MASK      0x000000ff
 
-/* Xtalk timeout threshhold register (IIO_IXTT) */
+/* Xtalk timeout threshold register (IIO_IXTT) */
 #define IXTT_RRSP_TO_SHFT	55	/* read response timeout */
 #define IXTT_RRSP_TO_MASK	(0x1FULL << IXTT_RRSP_TO_SHFT)
 #define IXTT_RRSP_PS_SHFT	32	/* read responsed TO prescalar */
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
index d5764a3d74af..b091111270cb 100644
--- a/arch/ia64/kernel/esi.c
+++ b/arch/ia64/kernel/esi.c
@@ -84,7 +84,7 @@ static int __init esi_init (void)
 		      case ESI_DESC_ENTRY_POINT:
 			break;
 		      default:
-			printk(KERN_WARNING "Unkown table type %d found in "
+			printk(KERN_WARNING "Unknown table type %d found in "
 			       "ESI table, ignoring rest of table\n", *p);
 			return -ENODEV;
 		}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f1782705b1f7..b3a1cb3e6b25 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -3523,7 +3523,7 @@ pfm_use_debug_registers(struct task_struct *task)
  * IA64_THREAD_DBG_VALID set. This indicates a task which was
  * able to use the debug registers for debugging purposes via
  * ptrace(). Therefore we know it was not using them for
- * perfmormance monitoring, so we only decrement the number
+ * performance monitoring, so we only decrement the number
  * of "ptraced" debug register users to keep the count up to date
  */
 int
diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S
index 6c1a9a217887..73613b5f1ee5 100644
--- a/arch/m68k/ifpsp060/src/fpsp.S
+++ b/arch/m68k/ifpsp060/src/fpsp.S
@@ -753,7 +753,7 @@ fovfl_ovfl_on:
 
 	bra.l		_real_ovfl
 
-# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
+# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 # we must jump to real_inex().
 fovfl_inex_on:
 
@@ -1015,7 +1015,7 @@ funfl_unfl_on2:
 
 	bra.l		_real_unfl
 
-# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
+# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 # we must jump to real_inex().
 funfl_inex_on:
 
@@ -2963,7 +2963,7 @@ iea_disabled:
 
 	tst.w		%d0			# is instr fmovm?
 	bmi.b		iea_dis_fmovm		# yes
-# instruction is using an extended precision immediate operand. therefore,
+# instruction is using an extended precision immediate operand. Therefore,
 # the total instruction length is 16 bytes.
 iea_dis_immed:
 	mov.l		&0x10,%d0		# 16 bytes of instruction
@@ -9624,7 +9624,7 @@ sok_dnrm:
 	bge.b		sok_norm2		# thank goodness no
 
 # the multiply factor that we're trying to create should be a denorm
-# for the multiply to work. therefore, we're going to actually do a
+# for the multiply to work. Therefore, we're going to actually do a
 # multiply with a denorm which will cause an unimplemented data type
 # exception to be put into the machine which will be caught and corrected
 # later. we don't do this with the DENORMs above because this method
@@ -12216,7 +12216,7 @@ fin_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow or inexact is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fin_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -12746,7 +12746,7 @@ fdiv_zero_load_p:
 
 #
 # The destination was In Range and the source was a ZERO. The result,
-# therefore, is an INF w/ the proper sign.
+# Therefore, is an INF w/ the proper sign.
 # So, determine the sign and return a new INF (w/ the j-bit cleared).
 #
 	global		fdiv_inf_load		# global for fsgldiv
@@ -12996,7 +12996,7 @@ fneg_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fneg_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -13611,7 +13611,7 @@ fabs_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fabs_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -14973,7 +14973,7 @@ fadd_zero_2:
 
 #
 # the ZEROes have opposite signs:
-# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
+# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
 # - -ZERO is returned in the case of RM.
 #
 fadd_zero_2_chk_rm:
@@ -15425,7 +15425,7 @@ fsub_zero_2:
 
 #
 # the ZEROes have the same signs:
-# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
+# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
 # - -ZERO is returned in the case of RM.
 #
 fsub_zero_2_chk_rm:
@@ -15693,7 +15693,7 @@ fsqrt_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fsqrt_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -21000,7 +21000,7 @@ fout_pack_type:
 	tst.l		%d0
 	bne.b		fout_pack_set
 # "mantissa" is all zero which means that the answer is zero. but, the '040
-# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
+# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
 # if the mantissa is zero, I will zero the exponent, too.
 # the question now is whether the exponents sign bit is allowed to be non-zero
 # for a zero, also...
@@ -21743,7 +21743,7 @@ denorm_set_stky:
 	rts
 
 #									#
-# dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
+# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
 #									#
 # INPUT:								#
 #	%a0	   : points to the operand to be denormalized		#
@@ -22402,7 +22402,7 @@ unnorm_shift:
 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
 
 #
-# exponent would not go < 0. therefore, number stays normalized
+# exponent would not go < 0. Therefore, number stays normalized
 #
 	sub.w		%d0, %d1		# shift exponent value
 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S
index 51b9f7d879dd..e71ba0ab013c 100644
--- a/arch/m68k/ifpsp060/src/pfpsp.S
+++ b/arch/m68k/ifpsp060/src/pfpsp.S
@@ -752,7 +752,7 @@ fovfl_ovfl_on:
 
 	bra.l		_real_ovfl
 
-# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
+# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 # we must jump to real_inex().
 fovfl_inex_on:
 
@@ -1014,7 +1014,7 @@ funfl_unfl_on2:
 
 	bra.l		_real_unfl
 
-# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
+# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 # we must jump to real_inex().
 funfl_inex_on:
 
@@ -2962,7 +2962,7 @@ iea_disabled:
 
 	tst.w		%d0			# is instr fmovm?
 	bmi.b		iea_dis_fmovm		# yes
-# instruction is using an extended precision immediate operand. therefore,
+# instruction is using an extended precision immediate operand. Therefore,
 # the total instruction length is 16 bytes.
 iea_dis_immed:
 	mov.l		&0x10,%d0		# 16 bytes of instruction
@@ -5865,7 +5865,7 @@ denorm_set_stky:
 	rts
 
 #									#
-# dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
+# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
 #									#
 # INPUT:								#
 #	%a0	   : points to the operand to be denormalized		#
@@ -6524,7 +6524,7 @@ unnorm_shift:
 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
 
 #
-# exponent would not go < 0. therefore, number stays normalized
+# exponent would not go < 0. Therefore, number stays normalized
 #
 	sub.w		%d0, %d1		# shift exponent value
 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
@@ -7901,7 +7901,7 @@ fout_pack_type:
 	tst.l		%d0
 	bne.b		fout_pack_set
 # "mantissa" is all zero which means that the answer is zero. but, the '040
-# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
+# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
 # if the mantissa is zero, I will zero the exponent, too.
 # the question now is whether the exponents sign bit is allowed to be non-zero
 # for a zero, also...
@@ -8647,7 +8647,7 @@ fin_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow or inexact is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fin_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -9177,7 +9177,7 @@ fdiv_zero_load_p:
 
 #
 # The destination was In Range and the source was a ZERO. The result,
-# therefore, is an INF w/ the proper sign.
+# Therefore, is an INF w/ the proper sign.
 # So, determine the sign and return a new INF (w/ the j-bit cleared).
 #
 	global		fdiv_inf_load		# global for fsgldiv
@@ -9427,7 +9427,7 @@ fneg_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fneg_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -10042,7 +10042,7 @@ fabs_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fabs_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
@@ -11404,7 +11404,7 @@ fadd_zero_2:
 
 #
 # the ZEROes have opposite signs:
-# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
+# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
 # - -ZERO is returned in the case of RM.
 #
 fadd_zero_2_chk_rm:
@@ -11856,7 +11856,7 @@ fsub_zero_2:
 
 #
 # the ZEROes have the same signs:
-# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
+# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
 # - -ZERO is returned in the case of RM.
 #
 fsub_zero_2_chk_rm:
@@ -12124,7 +12124,7 @@ fsqrt_sd_unfl_dis:
 
 #
 # operand will underflow AND underflow is enabled.
-# therefore, we must return the result rounded to extended precision.
+# Therefore, we must return the result rounded to extended precision.
 #
 fsqrt_sd_unfl_ena:
 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index fb8a06b9ab6a..67e7a78ad96b 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -145,7 +145,7 @@ struct bi_record {
 
     /*
      *  Macintosh hardware profile data - unused, see macintosh.h for
-     *  resonable type values
+     *  reasonable type values
      */
 
 #define BI_MAC_VIA1BASE		0x8010	/* Mac VIA1 base address (always present) */
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index 6a907c58a4bc..cc2108b6b260 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -9,7 +9,7 @@
  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
  *
- * Attempts were made, unsuccesfully, to contact the original
+ * Attempts were made, unsuccessfully, to contact the original
  * author of this code (Michael Morrow, Intel).  Below is the original
  * copyright notice.
  *
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index d4e9f49a71f7..0929198c5e68 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -9,7 +9,7 @@
  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
  *
- * Attempts were made, unsuccesfully, to contact the original
+ * Attempts were made, unsuccessfully, to contact the original
  * author of this code (Michael Morrow, Intel).  Below is the original
  * copyright notice.
  *
diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c
index 941dc8f94b03..4df851d41a29 100644
--- a/arch/microblaze/lib/memset.c
+++ b/arch/microblaze/lib/memset.c
@@ -9,7 +9,7 @@
  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
  *
- * Attempts were made, unsuccesfully, to contact the original
+ * Attempts were made, unsuccessfully, to contact the original
  * author of this code (Michael Morrow, Intel).  Below is the original
  * copyright notice.
  *
diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h
index 8de0eb9c98a3..ed3a88da70f6 100644
--- a/arch/mips/include/asm/mach-pnx833x/gpio.h
+++ b/arch/mips/include/asm/mach-pnx833x/gpio.h
@@ -24,7 +24,7 @@
 
 /* BIG FAT WARNING: races danger!
    No protections exist here. Current users are only early init code,
-   when locking is not needed because no cuncurency yet exists there,
+   when locking is not needed because no concurrency yet exists there,
    and GPIO IRQ dispatcher, which does locking.
    However, if many uses will ever happen, proper locking will be needed
    - including locking between different uses
diff --git a/arch/mips/include/asm/sgi/ioc.h b/arch/mips/include/asm/sgi/ioc.h
index 343ed15f8dc4..57a971904cfe 100644
--- a/arch/mips/include/asm/sgi/ioc.h
+++ b/arch/mips/include/asm/sgi/ioc.h
@@ -164,7 +164,7 @@ struct sgioc_regs {
 	u32 _unused5;
 	u8 _write[3];
 	volatile u8 write;
-#define SGIOC_WRITE_NTHRESH	0x01	/* use 4.5db threshhold */
+#define SGIOC_WRITE_NTHRESH	0x01	/* use 4.5db threshold */
 #define SGIOC_WRITE_TPSPEED	0x02	/* use 100ohm TP speed */
 #define SGIOC_WRITE_EPSEL	0x04	/* force cable mode: 1=AUI 0=TP */
 #define SGIOC_WRITE_EASEL	0x08	/* 1=autoselect 0=manual cable selection */
diff --git a/arch/mips/include/asm/sibyte/sb1250_mac.h b/arch/mips/include/asm/sibyte/sb1250_mac.h
index b6faf08ca81d..591b9061fd8e 100644
--- a/arch/mips/include/asm/sibyte/sb1250_mac.h
+++ b/arch/mips/include/asm/sibyte/sb1250_mac.h
@@ -212,7 +212,7 @@
 #define G_MAC_TXD_WEIGHT1(x)        _SB_GETVALUE(x, S_MAC_TXD_WEIGHT1, M_MAC_TXD_WEIGHT1)
 
 /*
- * MAC Fifo Threshhold registers (Table 9-14)
+ * MAC Fifo Threshold registers (Table 9-14)
  * Register: MAC_THRSH_CFG_0
  * Register: MAC_THRSH_CFG_1
  * Register: MAC_THRSH_CFG_2
diff --git a/arch/mips/include/asm/sn/sn0/hubio.h b/arch/mips/include/asm/sn/sn0/hubio.h
index d0c29d4de084..31c76c021bb6 100644
--- a/arch/mips/include/asm/sn/sn0/hubio.h
+++ b/arch/mips/include/asm/sn/sn0/hubio.h
@@ -825,7 +825,7 @@ typedef union iprb_u {
 	struct {
 	    u64	rsvd1:	15,
 		error:	1,	/* Widget rcvd wr resp pkt w/ error */
-		ovflow:	5,	/* Over flow count. perf measurement */
+		ovflow:	5,	/* Overflow count. perf measurement */
 		fire_and_forget: 1, /* Launch Write without response */
 		mode:	2,	/* Widget operation Mode	*/
 		rsvd2:	2,
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 24630fd8ef60..a38e3ee95515 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -1331,7 +1331,7 @@ void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
 		if (!((asid += ASID_INC) & ASID_MASK) ) {
 			if (cpu_has_vtag_icache)
 				flush_icache_all();
-			/* Traverse all online CPUs (hack requires contigous range) */
+			/* Traverse all online CPUs (hack requires contiguous range) */
 			for_each_online_cpu(i) {
 				/*
 				 * We don't need to worry about our own CPU, nor those of
diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index b30c5b1f1a2c..a2127d685a0d 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c
@@ -110,7 +110,7 @@ ieee754dp ieee754dp_sub(ieee754dp x, ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* FAAL THOROUGH */
+		/* FALL THROUGH */
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		/* normalize ym,ye */
diff --git a/arch/mips/txx9/generic/smsc_fdc37m81x.c b/arch/mips/txx9/generic/smsc_fdc37m81x.c
index a2b2d62d88e3..8ebc3848f3ac 100644
--- a/arch/mips/txx9/generic/smsc_fdc37m81x.c
+++ b/arch/mips/txx9/generic/smsc_fdc37m81x.c
@@ -117,7 +117,7 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port)
 	if (chip_id == SMSC_FDC37M81X_CHIP_ID)
 		smsc_fdc37m81x_config_end();
 	else {
-		printk(KERN_WARNING "%s: unknow chip id 0x%02x\n", __func__,
+		printk(KERN_WARNING "%s: unknown chip id 0x%02x\n", __func__,
 		       chip_id);
 		g_smsc_fdc37m81x_base = 0;
 	}
diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h
index 1e180a594589..0de404dfee8b 100644
--- a/arch/powerpc/include/asm/reg_fsl_emb.h
+++ b/arch/powerpc/include/asm/reg_fsl_emb.h
@@ -39,7 +39,7 @@
 #define PMRN_PMLCB2	0x112	/* PM Local Control B2 */
 #define PMRN_PMLCB3	0x113	/* PM Local Control B3 */
 
-#define PMLCB_THRESHMUL_MASK	0x0700	/* Threshhold Multiple Field */
+#define PMLCB_THRESHMUL_MASK	0x0700	/* Threshold Multiple Field */
 #define PMLCB_THRESHMUL_SHIFT	8
 
 #define PMLCB_THRESHOLD_MASK	0x003f	/* Threshold Field */
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 641c74bb8e27..b6bd1eaa1c24 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -52,7 +52,7 @@ static struct hard_trap_info
 	{ 0x2030, 0x08 /* SIGFPE */  },		/* spe fp data */
 	{ 0x2040, 0x08 /* SIGFPE */  },		/* spe fp data */
 	{ 0x2050, 0x08 /* SIGFPE */  },		/* spe fp round */
-	{ 0x2060, 0x0e /* SIGILL */  },		/* performace monitor */
+	{ 0x2060, 0x0e /* SIGILL */  },		/* performance monitor */
 	{ 0x2900, 0x08 /* SIGFPE */  },		/* apu unavailable */
 	{ 0x3100, 0x0e /* SIGALRM */ },		/* fixed interval timer */
 	{ 0x3200, 0x02 /* SIGINT */  }, 	/* watchdog */
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index c3a56d65c5a9..a753b72efbc0 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -59,7 +59,7 @@ void set_thresholds(unsigned long cpu)
 	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
 
 	/* setup THRM2,
-	 * threshold, valid bit, enable interrupts, interrupt when above threshhold
+	 * threshold, valid bit, enable interrupts, interrupt when above threshold
 	 */
 	mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
 #else
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 52c98edcd703..2c9e52267292 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -1594,7 +1594,7 @@ static void cell_handle_interrupt_spu(struct pt_regs *regs,
 		 * to a latch.  The new values (interrupt setting bits, reset
 		 * counter value etc.) are not copied to the actual registers
 		 * until the performance monitor is enabled.  In order to get
-		 * this to work as desired, the permormance monitor needs to
+		 * this to work as desired, the performance monitor needs to
 		 * be disabled while writing to the latches.  This is a
 		 * HW design issue.
 		 */
@@ -1668,7 +1668,7 @@ static void cell_handle_interrupt_ppu(struct pt_regs *regs,
 		 * to a latch.	The new values (interrupt setting bits, reset
 		 * counter value etc.) are not copied to the actual registers
 		 * until the performance monitor is enabled.  In order to get
-		 * this to work as desired, the permormance monitor needs to
+		 * this to work as desired, the performance monitor needs to
 		 * be disabled while writing to the latches.  This is a
 		 * HW design issue.
 		 */
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index dd43114e9684..da110bd88346 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -100,7 +100,7 @@ const struct of_device_id mpc52xx_pci_ids[] __initdata = {
 };
 
 /* ======================================================================== */
-/* PCI configuration acess                                                  */
+/* PCI configuration access                                                 */
 /* ======================================================================== */
 
 static int
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index e81403b245b5..ab2027cdf893 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -302,7 +302,7 @@ static void __init setup_chaos(struct pci_controller *hose,
  *  1 -> Skip the device but act as if the access was successfull
  *       (return 0xff's on reads, eventually, cache config space
  *       accesses in a later version)
- * -1 -> Hide the device (unsuccessful acess)
+ * -1 -> Hide the device (unsuccessful access)
  */
 static int u3_ht_skip_device(struct pci_controller *hose,
 			     struct pci_bus *bus, unsigned int devfn)
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index ae3c4db86fe8..bafc3f85360d 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -160,7 +160,7 @@ static int dart_build(struct iommu_table *tbl, long index,
 
 	dp = ((unsigned int*)tbl->it_base) + index;
 
-	/* On U3, all memory is contigous, so we can move this
+	/* On U3, all memory is contiguous, so we can move this
 	 * out of the loop.
 	 */
 	l = npages;
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index 3ee78ccb617d..cd4e9c168dd7 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -2088,7 +2088,7 @@ int math_emu_ldr(__u8 *opcode) {
         __u16 opc = *((__u16 *) opcode);
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
-                /* we got an exception therfore ry can't be in {0,2,4,6} */
+                /* we got an exception therefore ry can't be in {0,2,4,6} */
 		asm volatile(		/* load rx from fp_regs.fprs[ry] */
 			"	bras	1,0f\n"
 			"	ld	0,0(%1)\n"
@@ -2118,7 +2118,7 @@ int math_emu_ler(__u8 *opcode) {
         __u16 opc = *((__u16 *) opcode);
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
-                /* we got an exception therfore ry can't be in {0,2,4,6} */
+                /* we got an exception therefore ry can't be in {0,2,4,6} */
 		asm volatile(		/* load rx from fp_regs.fprs[ry] */
 			"	bras	1,0f\n"
 			"	le	0,0(%1)\n"
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 9d6684849fd9..278441f39856 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -12,9 +12,9 @@
 #include <linux/types.h>
 
 /*
- * FIXME: Acessing the desc_struct through its fields is more elegant,
+ * FIXME: Accessing the desc_struct through its fields is more elegant,
  * and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b acessors, and doing this allow us to do it
+ * still touches the a and b accessors, and doing this allow us to do it
  * incrementally. We keep the signature as a struct, rather than an union,
  * so we can get rid of it transparently in the future -- glommer
  */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ede6998bd92c..91df7c51806c 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {}
 /*
  * generic node memory support, the following assumptions apply:
  *
- * 1) memory comes in 64Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contiguous chunks which are either present or not
  * 2) we will not have more than 64Gb in total
  *
  * for now assume that 64Gb is max amount of RAM for whole system
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 80e2984f521c..b414d2b401f6 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,7 +55,7 @@
 #define DESC_STATUS_SOURCE_TIMEOUT	3
 
 /*
- * source side threshholds at which message retries print a warning
+ * source side thresholds at which message retries print a warning
  */
 #define SOURCE_TIMEOUT_LIMIT		20
 #define DESTINATION_TIMEOUT_LIMIT	20
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 67e929b89875..1c2c4838d35c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1122,7 +1122,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	if (!acpi_sci_override_gsi)
 		acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0);
 
-	/* Fill in identity legacy mapings where no override */
+	/* Fill in identity legacy mappings where no override */
 	mp_config_acpi_legacy_irqs();
 
 	count =
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0285521e0a99..42ac5e000995 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1637,7 +1637,7 @@ retry:
 			goto out;
 
 		/*
-		 * aperture was sucessfully enlarged by 128 MB, try
+		 * aperture was successfully enlarged by 128 MB, try
 		 * allocation again
 		 */
 		goto retry;
@@ -2396,7 +2396,7 @@ int __init amd_iommu_init_passthrough(void)
 	struct pci_dev *dev = NULL;
 	u16 devid, devid2;
 
-	/* allocate passthroug domain */
+	/* allocate passthrough domain */
 	pt_domain = protection_domain_alloc();
 	if (!pt_domain)
 		return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b5801c311846..35be5802ac1e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1229,7 +1229,7 @@ x86_perf_event_set_period(struct perf_event *event,
 		return 0;
 
 	/*
-	 * If we are way outside a reasoable range then just skip forward:
+	 * If we are way outside a reasonable range then just skip forward:
 	 */
 	if (unlikely(left <= -period)) {
 		left = period;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..7d377379fa4a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -514,7 +514,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
+ * remain disabled throughout this function.
  */
 static int __kprobes kprobe_handler(struct pt_regs *regs)
 {
@@ -851,7 +851,7 @@ no_change:
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.
+ * remain disabled throughout this function.
  */
 static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 {
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..d16d576beebf 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -203,7 +203,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
  */
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
  */
 int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 {
@@ -302,7 +302,7 @@ no_kmmio:
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
  * This must always get called as the pair to kmmio_handler().
  */
 static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
index ca564202ed7a..58916afbbda5 100644
--- a/block/blk-iopoll.c
+++ b/block/blk-iopoll.c
@@ -28,7 +28,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
  * Description:
  *     Add this blk_iopoll structure to the pending poll list and trigger the
  *     raise of the blk iopoll softirq. The driver must already have gotten a
- *     succesful return from blk_iopoll_sched_prep() before calling this.
+ *     successful return from blk_iopoll_sched_prep() before calling this.
  **/
 void blk_iopoll_sched(struct blk_iopoll *iop)
 {
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 9ac4e378992e..3aadded05a05 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -599,7 +599,7 @@ static const struct ich_laptop ich_laptop[] = {
 	{ 0x27DF, 0x1028, 0x02b0 },	/* ICH7 on unknown Dell */
 	{ 0x27DF, 0x1043, 0x1267 },	/* ICH7 on Asus W5F */
 	{ 0x27DF, 0x103C, 0x30A1 },	/* ICH7 on HP Compaq nc2400 */
-	{ 0x27DF, 0x103C, 0x361a },	/* ICH7 on unkown HP  */
+	{ 0x27DF, 0x103C, 0x361a },	/* ICH7 on unknown HP  */
 	{ 0x27DF, 0x1071, 0xD221 },	/* ICH7 on Hercules EC-900 */
 	{ 0x27DF, 0x152D, 0x0778 },	/* ICH7 on unknown Intel */
 	{ 0x24CA, 0x1025, 0x0061 },	/* ICH4 on ACER Aspire 2023WLMi */
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index d344db42a002..0d9d2f20788a 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -43,9 +43,9 @@ enum {
 	/*
 	 * SATA-FSL host controller supports a max. of (15+1) direct PRDEs, and
 	 * chained indirect PRDEs upto a max count of 63.
-	 * We are allocating an array of 63 PRDEs contigiously, but PRDE#15 will
+	 * We are allocating an array of 63 PRDEs contiguously, but PRDE#15 will
 	 * be setup as an indirect descriptor, pointing to it's next
-	 * (contigious) PRDE. Though chained indirect PRDE arrays are
+	 * (contiguous) PRDE. Though chained indirect PRDE arrays are
 	 * supported,it will be more efficient to use a direct PRDT and
 	 * a single chain/link to indirect PRDE array/PRDT.
 	 */
@@ -314,7 +314,7 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
 	u32 ttl_dwords = 0;
 
 	/*
-	 * NOTE : direct & indirect prdt's are contigiously allocated
+	 * NOTE : direct & indirect prdt's are contiguously allocated
 	 */
 	struct prde *prd = (struct prde *)&((struct command_desc *)
 					    cmd_desc)->prdt;
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index b2c1b37ab2e4..f734b345ac71 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1132,7 +1132,7 @@ static int rx_pkt(struct atm_dev *dev)
                     IF_ERR(printk(" cause: packet time out\n");)
                 }
                 else {
-                    IF_ERR(printk(" cause: buffer over flow\n");)
+                    IF_ERR(printk(" cause: buffer overflow\n");)
                 }
 		goto out_free_desc;
 	}  
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 979d159b5cd1..ee95c76bfd3d 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -188,7 +188,7 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe);
  * @dev: device to try to bind to the driver
  *
  * This function returns -ENODEV if the device is not registered,
- * 1 if the device is bound sucessfully and 0 otherwise.
+ * 1 if the device is bound successfully and 0 otherwise.
  *
  * This function must be called with @dev->sem held.  When called for a
  * USB interface, @dev->parent->sem must be held as well.
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 5b33b85790f2..63bfc5436799 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -535,7 +535,7 @@ static int btmrvl_sdio_card_to_host(struct btmrvl_private *priv)
 		break;
 
 	default:
-		BT_ERR("Unknow packet type:%d", type);
+		BT_ERR("Unknown packet type:%d", type);
 		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, payload,
 						blksz * buf_block_len);
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 4895f0e05322..aa0919386b8c 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -214,7 +214,7 @@ static int hci_uart_send_frame(struct sk_buff *skb)
 	struct hci_uart *hu;
 
 	if (!hdev) {
-		BT_ERR("Frame for uknown device (hdev=NULL)");
+		BT_ERR("Frame for unknown device (hdev=NULL)");
 		return -ENODEV;
 	}
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index a074fceb67d3..42e65cf8ab52 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -5,7 +5,7 @@
  *
  *  Added devfs support. 
  *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
- *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
  */
 
 #include <linux/mm.h>
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 1997270bb6f4..ecb89d798e35 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -248,7 +248,7 @@ static const struct vm_operations_struct mspec_vm_ops = {
 /*
  * mspec_mmap
  *
- * Called when mmaping the device.  Initializes the vma with a fault handler
+ * Called when mmapping the device.  Initializes the vma with a fault handler
  * and private data structure necessary to allocate, track, and free the
  * underlying pages.
  */
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index 6934025a1ac1..c1d8b54c816d 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -602,7 +602,7 @@ static void receive_char(struct r3964_info *pInfo, const unsigned char c)
 		}
 		break;
 	case R3964_WAIT_FOR_RX_REPEAT:
-		/* FALLTROUGH */
+		/* FALLTHROUGH */
 	case R3964_IDLE:
 		if (c == STX) {
 			/* Prevent rx_queue from overflow: */
diff --git a/drivers/char/rio/route.h b/drivers/char/rio/route.h
index 20ed73f3fd7b..46e963771c30 100644
--- a/drivers/char/rio/route.h
+++ b/drivers/char/rio/route.h
@@ -67,7 +67,7 @@
 typedef struct COST_ROUTE COST_ROUTE;
 struct COST_ROUTE {
 	unsigned char cost;	/* Cost down this link */
-	unsigned char route[NODE_BYTES];	/* Nodes thorough this route */
+	unsigned char route[NODE_BYTES];	/* Nodes through this route */
 };
 
 typedef struct ROUTE_STR ROUTE_STR;
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 5f753fc08730..09ad9154d86c 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -863,7 +863,7 @@ static int hifn_init_pubrng(struct hifn_device *dev)
 		dev->dmareg |= HIFN_DMAIER_PUBDONE;
 		hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg);
 
-		dprintk("Chip %s: Public key engine has been sucessfully "
+		dprintk("Chip %s: Public key engine has been successfully "
 				"initialised.\n", dev->name);
 	}
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 7585c4164bd5..c52ac9efd0bf 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -99,7 +99,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
 }
 
 /**
- * atc_desc_get - get a unsused descriptor from free_list
+ * atc_desc_get - get an unused descriptor from free_list
  * @atchan: channel we want a new descriptor for
  */
 static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index fddf2b358936..d373d17257e9 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -183,7 +183,7 @@ static inline struct fw_node *fw_node(struct list_head *l)
  * This function builds the tree representation of the topology given
  * by the self IDs from the latest bus reset.  During the construction
  * of the tree, the function checks that the self IDs are valid and
- * internally consistent.  On succcess this function returns the
+ * internally consistent.  On success this function returns the
  * fw_node corresponding to the local card otherwise NULL.
  */
 static struct fw_node *build_tree(struct fw_card *card,
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 5cae0b3eee9b..3f7c500b2115 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(drm_mode_object_find);
  * functions & device file and adds it to the master fd list.
  *
  * RETURNS:
- * Zero on success, error code on falure.
+ * Zero on success, error code on failure.
  */
 int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
 			 const struct drm_framebuffer_funcs *funcs)
@@ -2328,7 +2328,7 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
 	} else if (connector->funcs->set_property)
 		ret = connector->funcs->set_property(connector, property, out_resp->value);
 
-	/* store the property value if succesful */
+	/* store the property value if successful */
 	if (!ret)
 		drm_connector_property_set_value(connector, property, out_resp->value);
 out:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index abfc27b0c2ea..a2a3fa599923 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1309,7 +1309,7 @@ out_free_list:
  * i915_gem_release_mmap - remove physical page mappings
  * @obj: obj in question
  *
- * Preserve the reservation of the mmaping with the DRM core code, but
+ * Preserve the reservation of the mmapping with the DRM core code, but
  * relinquish ownership of the pages back to the system.
  *
  * It is vital that we remove the page mapping if we have mapped a tiled
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 2b0fe54cd92c..40fcf6fdef38 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -70,7 +70,7 @@ static struct drm_fb_helper_funcs intel_fb_helper_funcs = {
 
 
 /**
- * Curretly it is assumed that the old framebuffer is reused.
+ * Currently it is assumed that the old framebuffer is reused.
  *
  * LOCKING
  * caller should hold the mode config lock.
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 083bec2e50f9..e7fa3279e2f8 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2726,7 +2726,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	/* Wrap with our custom algo which switches to DDC mode */
 	intel_output->ddc_bus->algo = &intel_sdvo_i2c_bit_algo;
 
-	/* In defaut case sdvo lvds is false */
+	/* In default case sdvo lvds is false */
 	intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps);
 
 	if (intel_sdvo_output_setup(intel_output,
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 609719490ec2..00c739c44848 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -389,11 +389,11 @@ int r600_mc_init(struct radeon_device *rdev)
 		 * AGP so that GPU can catch out of VRAM/AGP access
 		 */
 		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
-			/* Enought place before */
+			/* Enough place before */
 			rdev->mc.vram_location = rdev->mc.gtt_location -
 							rdev->mc.mc_vram_size;
 		} else if (tmp > rdev->mc.mc_vram_size) {
-			/* Enought place after */
+			/* Enough place after */
 			rdev->mc.vram_location = rdev->mc.gtt_location +
 							rdev->mc.gtt_size;
 		} else {
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index b38c4c8e2c61..d10eb43645c8 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -59,7 +59,7 @@ static struct fb_ops radeonfb_ops = {
 };
 
 /**
- * Curretly it is assumed that the old framebuffer is reused.
+ * Currently it is assumed that the old framebuffer is reused.
  *
  * LOCKING
  * caller should hold the mode config lock.
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 38537d971a3e..067167cb39ca 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -1950,7 +1950,7 @@ static void radeon_apply_surface_regs(int surf_index,
  * Note that refcount can be at most 2, since during a free refcount=3
  * might mean we have to allocate a new surface which might not always
  * be available.
- * For example : we allocate three contigous surfaces ABC. If B is
+ * For example : we allocate three contiguous surfaces ABC. If B is
  * freed, we suddenly need two surfaces to store A and C, which might
  * not always be available.
  */
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 765bd184b6fc..5a664000bf70 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -372,7 +372,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
 	     new_mem->mem_type == TTM_PL_TT)) {
-		/* bind is enought */
+		/* bind is enough */
 		radeon_move_null(bo, new_mem);
 		return 0;
 	}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 595ac638039d..9e9826ace305 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -807,11 +807,11 @@ int rv770_mc_init(struct radeon_device *rdev)
 		 * AGP so that GPU can catch out of VRAM/AGP access
 		 */
 		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
-			/* Enought place before */
+			/* Enough place before */
 			rdev->mc.vram_location = rdev->mc.gtt_location -
 							rdev->mc.mc_vram_size;
 		} else if (tmp > rdev->mc.mc_vram_size) {
-			/* Enought place after */
+			/* Enough place after */
 			rdev->mc.vram_location = rdev->mc.gtt_location +
 							rdev->mc.gtt_size;
 		} else {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index c70927ecda21..61c5572d2b91 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -427,7 +427,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 
 		/*
 		 * We need to use vmap to get the desired page protection
-		 * or to make the buffer object look contigous.
+		 * or to make the buffer object look contiguous.
 		 */
 		prot = (mem->placement & TTM_PL_FLAG_CACHED) ?
 			PAGE_KERNEL :
diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c
index 36718150b475..e845b75ccee4 100644
--- a/drivers/hwmon/adm1029.c
+++ b/drivers/hwmon/adm1029.c
@@ -432,7 +432,7 @@ static int adm1029_remove(struct i2c_client *client)
 }
 
 /*
-function that update the status of the chips (temperature for exemple)
+function that update the status of the chips (temperature for example)
 */
 static struct adm1029_data *adm1029_update_device(struct device *dev)
 {
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index fc36cadf36fb..c48a284b8314 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -928,7 +928,7 @@ static void lm93_update_client_common(struct lm93_data *data,
 	data->prochot_interval = lm93_read_byte(client,
 			LM93_REG_PROCHOT_INTERVAL);
 
-	/* Fan Boost Termperature registers */
+	/* Fan Boost Temperature registers */
 	for (i = 0; i < 4; i++)
 		data->boost[i] = lm93_read_byte(client, LM93_REG_BOOST(i));
 
diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c
index 2cd00b5b45b4..9fd4a0d3206e 100644
--- a/drivers/ieee1394/dv1394.c
+++ b/drivers/ieee1394/dv1394.c
@@ -125,7 +125,7 @@
    0 - no debugging messages
    1 - some debugging messages, but none during DMA frame transmission
    2 - lots of messages, including during DMA frame transmission
-       (will cause undeflows if your machine is too slow!)
+       (will cause underflows if your machine is too slow!)
 */
 
 #define DV1394_DEBUG_LEVEL 0
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 4bd39c8af80f..37d12e5efa49 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -381,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
 #define IPATH_GPIO_SCL \
 	(1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
-/* keep the code below somewhat more readonable; not used elsewhere */
+/* keep the code below somewhat more readable; not used elsewhere */
 #define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |	\
 				infinipath_hwe_htclnkabyte1crcerr)
 #define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |	\
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c
index aa47eb549520..2a68d9f624dd 100644
--- a/drivers/infiniband/hw/ipath/ipath_sd7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c
@@ -614,7 +614,7 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
  * @wd: Write Data - value to set in register
  * @mask: ones where data should be spliced into reg.
  *
- * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * Basic register read/modify/write, with un-needed accesses elided. That is,
  * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
  * returns current (presumed, if a write was done) contents of selected
  * register, or <0 if errors.
@@ -989,7 +989,7 @@ static struct rxeq_init {
 	/* Set DFELTHFDR/HDR thresholds */
 	RXEQ_VAL(7, 8,    0, 0, 0, 0), /* FDR */
 	RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
-	/* Set TLTHFDR/HDR theshold */
+	/* Set TLTHFDR/HDR threshold */
 	RXEQ_VAL(7, 9,    2, 2, 2, 2), /* FDR */
 	RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */
 	/* Set Preamp setting 2 (ZFR/ZCNT) */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 219b10397b4d..256a00c6aeea 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -352,7 +352,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	 * anymore, so we do this only if selective signaling is off.
 	 *
 	 * Further, on 32-bit platforms, we can't use vmap() to make
-	 * the QP buffer virtually contigious.  Thus we have to use
+	 * the QP buffer virtually contiguous.  Thus we have to use
 	 * constant-sized WRs to make sure a WR is always fully within
 	 * a single page-sized chunk.
 	 *
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 1c9410d1822c..bcc2d30ec245 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -955,7 +955,7 @@ static int __init hp_sdc_init_hppa(struct parisc_device *d)
 	INIT_DELAYED_WORK(&moduleloader_work, request_module_delayed);
 
 	ret = hp_sdc_init();
-	/* after sucessfull initialization give SDC some time to settle
+	/* after successfull initialization give SDC some time to settle
 	 * and then load the hp_sdc_mlc upper layer driver */
 	if (!ret)
 		schedule_delayed_work(&moduleloader_work,
diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c
index 820e51673b26..7d2b820ef58d 100644
--- a/drivers/input/serio/hp_sdc_mlc.c
+++ b/drivers/input/serio/hp_sdc_mlc.c
@@ -125,7 +125,7 @@ static void hp_sdc_mlc_isr (int irq, void *dev_id,
 		break;
 
 	default:
-		printk(KERN_WARNING PREFIX "Unkown HIL Error status (%x)!\n", data);
+		printk(KERN_WARNING PREFIX "Unknown HIL Error status (%x)!\n", data);
 		break;
 	}
 
diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c
index 35377f583e28..a12242f77e23 100644
--- a/drivers/input/touchscreen/atmel-wm97xx.c
+++ b/drivers/input/touchscreen/atmel-wm97xx.c
@@ -59,7 +59,7 @@
 #define ATMEL_WM97XX_AC97C_IRQ		(29)
 #define ATMEL_WM97XX_GPIO_DEFAULT	(32+16) /* Pin 16 on port B. */
 #else
-#error Unkown CPU, this driver only supports AT32AP700X CPUs.
+#error Unknown CPU, this driver only supports AT32AP700X CPUs.
 #endif
 
 struct continuous {
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 8fc3b08deb3b..6cdcf2a6e036 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -14,7 +14,7 @@
  *
  * Notes:
  *     This is a wm97xx extended touch driver to capture touch
- *     data in a continuous manner on the Intel XScale archictecture
+ *     data in a continuous manner on the Intel XScale architecture
  *
  *  Features:
  *       - codecs supported:- WM9705, WM9712, WM9713
@@ -131,7 +131,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
 	/* When the AC97 queue has been drained we need to allow time
 	 * to buffer up samples otherwise we end up spinning polling
 	 * for samples.  The controller can't have a suitably low
-	 * threashold set to use the notifications it gives.
+	 * threshold set to use the notifications it gives.
 	 */
 	schedule_timeout_uninterruptible(1);
 
diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c
index 41e4359c277c..eca54dbdf493 100644
--- a/drivers/input/touchscreen/zylonite-wm97xx.c
+++ b/drivers/input/touchscreen/zylonite-wm97xx.c
@@ -96,7 +96,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
 	/* When the AC97 queue has been drained we need to allow time
 	 * to buffer up samples otherwise we end up spinning polling
 	 * for samples.  The controller can't have a suitably low
-	 * threashold set to use the notifications it gives.
+	 * threshold set to use the notifications it gives.
 	 */
 	msleep(1);
 
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 3e6d17f42a98..66b7d7a86474 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -830,7 +830,7 @@ static void handle_controller(_cmsg * cmsg)
 		      case 0: break;
 		      case 1: s = "unknown class"; break;
 		      case 2: s = "unknown function"; break;
-		      default: s = "unkown error"; break;
+		      default: s = "unknown error"; break;
 		   }
 		   if (s)
 	           printk(KERN_INFO "capidrv-%d: %s from controller 0x%x function %d: %s\n",
diff --git a/drivers/isdn/hardware/eicon/di.c b/drivers/isdn/hardware/eicon/di.c
index b029d130eb21..cb14ae3e7154 100644
--- a/drivers/isdn/hardware/eicon/di.c
+++ b/drivers/isdn/hardware/eicon/di.c
@@ -806,7 +806,7 @@ static void xdi_xlog_request (byte Adapter, byte Id,
           DELIVERY - indication entered isdn_rc function
           RNR=...  - application had returned RNR=... after the
                      look ahead callback
-          RNum=0   - aplication had not returned any buffer to copy
+          RNum=0   - application had not returned any buffer to copy
                      this indication and will copy it self
           COMPLETE - XDI had copied the data to the buffers provided
                      bu the application and is about to issue the
diff --git a/drivers/isdn/hardware/eicon/maintidi.c b/drivers/isdn/hardware/eicon/maintidi.c
index 23960cb6eaab..e7cfb3b5647f 100644
--- a/drivers/isdn/hardware/eicon/maintidi.c
+++ b/drivers/isdn/hardware/eicon/maintidi.c
@@ -385,7 +385,7 @@ static int SuperTraceMessageInput (void* hLib) {
                   }
                   break;
                 default:
-                  diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind (DMA mode): %02x", Ind);
+                  diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind (DMA mode): %02x", Ind);
               }
               p += (this_ind_length+1);
               total_length -= (4 + this_ind_length);
@@ -420,7 +420,7 @@ static int SuperTraceMessageInput (void* hLib) {
             }
             break;
           default:
-            diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind: %02x", Ind);
+            diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind: %02x", Ind);
         }
       }
     }
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index fc46a26cb14f..a64bb6c67ba7 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -721,7 +721,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol)
 	switch (protocol) {
 	case (-1):	/* used for init */
 		bch->state = -1;
-		/* fall trough */
+		/* fall through */
 	case (ISDN_P_NONE):
 		if (bch->state == ISDN_P_NONE)
 			return 0; /* already in idle state */
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.h b/drivers/isdn/hardware/mISDN/hfcsusb.h
index 43efe7358fa3..369196adae03 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.h
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.h
@@ -150,7 +150,7 @@ symbolic(struct hfcusb_symbolic_list list[], const int num)
 	for (i = 0; list[i].name != NULL; i++)
 		if (list[i].num == num)
 			return list[i].name;
-	return "<unkown USB Error>";
+	return "<unknown USB Error>";
 }
 
 /* USB descriptor need to contain one of the following EndPoint combination: */
diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c
index de352a17673a..09095c747110 100644
--- a/drivers/isdn/hardware/mISDN/mISDNisar.c
+++ b/drivers/isdn/hardware/mISDN/mISDNisar.c
@@ -860,7 +860,7 @@ isar_pump_statev_modem(struct isar_ch *ch, u8 devt) {
 		pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name);
 		break;
 	default:
-		pr_info("u%s: nknown pump stev %x\n", ch->is->name, devt);
+		pr_info("u%s: unknown pump stev %x\n", ch->is->name, devt);
 		break;
 	}
 }
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index 9de54202c90c..ad5831f37d84 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -1086,7 +1086,7 @@ hfc_usb_l2l1(struct hisax_if *my_hisax_if, int pr, void *arg)
 			break;
 		default:
 			DBG(HFCUSB_DBG_STATES,
-			       "HFC_USB: hfc_usb_d_l2l1: unkown state : %#x", pr);
+			       "HFC_USB: hfc_usb_d_l2l1: unknown state : %#x", pr);
 			break;
 	}
 }
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 2d14b64202a3..0f4ea7d16a15 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -836,7 +836,7 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 			unsigned short hl;
 			struct sk_buff *skb;
 			/*
-			 * we need to reserve enought space in front of
+			 * we need to reserve enough space in front of
 			 * sk_buff. old call to dev_alloc_skb only reserved
 			 * 16 bytes, now we are looking what the driver want
 			 */
@@ -1326,7 +1326,7 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 		struct sk_buff *new_skb;
 	        unsigned short hl;
 		/*
-		 * we need to reserve enought space in front of
+		 * we need to reserve enough space in front of
 		 * sk_buff. old call to dev_alloc_skb only reserved
 		 * 16 bytes, now we are looking what the driver want.
 		 */
@@ -1685,7 +1685,7 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp,
 	 *
 	 * try to accomplish several tasks:
 	 * - reassemble any complete fragment sequence (non-null 'start'
-	 *   indicates there is a continguous sequence present)
+	 *   indicates there is a contiguous sequence present)
 	 * - discard any incomplete sequences that are below minseq -- due
 	 *   to the fact that sender always increment sequence number, if there
 	 *   is an incomplete sequence below minseq, no new fragments would
diff --git a/drivers/isdn/i4l/isdn_ttyfax.c b/drivers/isdn/i4l/isdn_ttyfax.c
index 78f7660c1d0e..4c41f191d4e2 100644
--- a/drivers/isdn/i4l/isdn_ttyfax.c
+++ b/drivers/isdn/i4l/isdn_ttyfax.c
@@ -470,7 +470,7 @@ isdn_tty_cmd_FCLASS2(char **p, modem_info * info)
 		}
 		return 0;
 	}
-	/* BADMUL=value - dummy 0=disable errorchk disabled (treshold multiplier) */
+	/* BADMUL=value - dummy 0=disable errorchk disabled (threshold multiplier) */
 	if (!strncmp(p[0], "BADMUL", 6)) {
 		p[0] += 6;
 		switch (*p[0]) {
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 77ee2867c8b4..43ff4d3b046e 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -110,7 +110,7 @@
  * crossconnections and conferences via software if not possible through
  * hardware. If hardware capability is available, hardware is used.
  *
- * Echo: Is generated by CMX and is used to check performane of hard and
+ * Echo: Is generated by CMX and is used to check performance of hard and
  * software CMX.
  *
  * The CMX has special functions for conferences with one, two and more
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
index e04bad6c5baf..6d4da6095885 100644
--- a/drivers/isdn/mISDN/tei.c
+++ b/drivers/isdn/mISDN/tei.c
@@ -725,7 +725,7 @@ tei_id_ver_tout_net(struct FsmInst *fi, int event, void *arg)
 	if (tm->rcnt == 1) {
 		if (*debug & DEBUG_L2_TEI)
 			tm->tei_m.printdebug(fi,
-			    "check req for tei %d sucessful\n", tm->l2->tei);
+			    "check req for tei %d successful\n", tm->l2->tei);
 		mISDN_FsmChangeState(fi, ST_TEI_NOP);
 	} else if (tm->rcnt > 1) {
 		/* duplicate assignment; remove */
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index 8b9364434aa0..3fbe41b0ac07 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -15,7 +15,7 @@
  *
  *	WARNING: This driver has only been testen on Apple's
  *	1.25 MHz Dual G4 (March 03). It is tuned for a CPU
- *	temperatur around 57 C.
+ *	temperature around 57 C.
  *
  *   Copyright (C) 2003, 2004 Samuel Rydh (samuel@ibrium.se)
  *
diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c
index 7e8f56815998..48cb154c7a46 100644
--- a/drivers/media/common/saa7146_i2c.c
+++ b/drivers/media/common/saa7146_i2c.c
@@ -98,7 +98,7 @@ static int saa7146_i2c_msg_cleanup(const struct i2c_msg *m, int num, __le32 *op)
 
 		op_count++;
 
-		/* loop throgh all bytes of message i */
+		/* loop through all bytes of message i */
 		for(j = 0; j < m[i].len; j++) {
 			/* write back all bytes that could have been read */
 			m[i].buf[j] = (le32_to_cpu(op[op_count/3]) >> ((3-(op_count%3))*8));
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 810f07d63246..52e4ce4304ee 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -160,7 +160,7 @@ struct tuner_state {
  * search callback possible return status
  *
  * DVBFE_ALGO_SEARCH_SUCCESS
- * The frontend search algorithm completed and returned succesfully
+ * The frontend search algorithm completed and returned successfully
  *
  * DVBFE_ALGO_SEARCH_ASLEEP
  * The frontend search algorithm is sleeping
diff --git a/drivers/media/dvb/dvb-usb/anysee.c b/drivers/media/dvb/dvb-usb/anysee.c
index 2ae7f648effe..bb69f3719f9a 100644
--- a/drivers/media/dvb/dvb-usb/anysee.c
+++ b/drivers/media/dvb/dvb-usb/anysee.c
@@ -344,7 +344,7 @@ static int anysee_frontend_attach(struct dvb_usb_adapter *adap)
 	if (ret)
 		return ret;
 
-	err("Unkown Anysee version: %02x %02x %02x. "\
+	err("Unknown Anysee version: %02x %02x %02x. "\
 	    "Please report the <linux-dvb@linuxtv.org>.",
 	    hw_info[0], hw_info[1], hw_info[2]);
 
diff --git a/drivers/media/dvb/dvb-usb/dibusb-mb.c b/drivers/media/dvb/dvb-usb/dibusb-mb.c
index eeef50bff4f9..5c0126dc1ff9 100644
--- a/drivers/media/dvb/dvb-usb/dibusb-mb.c
+++ b/drivers/media/dvb/dvb-usb/dibusb-mb.c
@@ -242,7 +242,7 @@ static struct dvb_usb_device_properties dibusb1_1_properties = {
 			{ &dibusb_dib3000mb_table[9],  &dibusb_dib3000mb_table[11], NULL },
 			{ &dibusb_dib3000mb_table[10], &dibusb_dib3000mb_table[12], NULL },
 		},
-		{	"Unkown USB1.1 DVB-T device ???? please report the name to the author",
+		{	"Unknown USB1.1 DVB-T device ???? please report the name to the author",
 			{ &dibusb_dib3000mb_table[13], NULL },
 			{ &dibusb_dib3000mb_table[14], NULL },
 		},
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index edde87c6aa3a..6b5ded9e7d5d 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -259,7 +259,7 @@ int dvb_usb_nec_rc_key_to_event(struct dvb_usb_device *d,
 			*state = REMOTE_KEY_REPEAT;
 			break;
 		default:
-			deb_err("unkown type of remote status: %d\n",keybuf[0]);
+			deb_err("unknown type of remote status: %d\n",keybuf[0]);
 			break;
 	}
 	return 0;
diff --git a/drivers/media/dvb/dvb-usb/usb-urb.c b/drivers/media/dvb/dvb-usb/usb-urb.c
index 9da2cc95ca13..f9702e3756b6 100644
--- a/drivers/media/dvb/dvb-usb/usb-urb.c
+++ b/drivers/media/dvb/dvb-usb/usb-urb.c
@@ -56,7 +56,7 @@ static void usb_urb_complete(struct urb *urb)
 				stream->complete(stream, b, urb->actual_length);
 			break;
 		default:
-			err("unkown endpoint type in completition handler.");
+			err("unknown endpoint type in completition handler.");
 			return;
 	}
 	usb_submit_urb(urb,GFP_ATOMIC);
@@ -228,7 +228,7 @@ int usb_urb_init(struct usb_data_stream *stream, struct usb_data_stream_properti
 		case USB_ISOC:
 			return usb_isoc_urb_init(stream);
 		default:
-			err("unkown URB-type for data transfer.");
+			err("unknown URB-type for data transfer.");
 			return -EINVAL;
 	}
 }
diff --git a/drivers/media/dvb/frontends/au8522_decoder.c b/drivers/media/dvb/frontends/au8522_decoder.c
index 74981ee923c8..7c6431fe33e0 100644
--- a/drivers/media/dvb/frontends/au8522_decoder.c
+++ b/drivers/media/dvb/frontends/au8522_decoder.c
@@ -315,7 +315,7 @@ static void setup_decoder_defaults(struct au8522_state *state, u8 input_mode)
 	if (input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH13 ||
 	    input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH24) {
 		/* Despite what the table says, for the HVR-950q we still need
-		   to be in CVBS mode for the S-Video input (reason uknown). */
+		   to be in CVBS mode for the S-Video input (reason unknown). */
 		/* filter_coef_type = 3; */
 		filter_coef_type = 5;
 	} else {
diff --git a/drivers/media/dvb/frontends/cx24110.c b/drivers/media/dvb/frontends/cx24110.c
index ffbcfabd83f0..00a4e8f03304 100644
--- a/drivers/media/dvb/frontends/cx24110.c
+++ b/drivers/media/dvb/frontends/cx24110.c
@@ -1,4 +1,4 @@
-	/*
+/*
     cx24110 - Single Chip Satellite Channel Receiver driver module
 
     Copyright (C) 2002 Peter Hettkamp <peter.hettkamp@htp-tel.de> based on
@@ -96,7 +96,7 @@ static struct {u8 reg; u8 data;} cx24110_regdata[]=
 	 {0x42,0x00}, /* @ middle bytes " */
 	 {0x43,0x00}, /* @ LSB          " */
 		      /* leave the carrier tracking loop parameters on default */
-		      /* leave the bit timing loop parameters at gefault */
+		      /* leave the bit timing loop parameters at default */
 	 {0x56,0x4d}, /* set the filtune voltage to 2.7V, as recommended by */
 		      /* the cx24108 data sheet for symbol rates above 15MS/s */
 	 {0x57,0x00}, /* @ Filter sigma delta enabled, positive */
diff --git a/drivers/media/dvb/frontends/cx24113.c b/drivers/media/dvb/frontends/cx24113.c
index 075b2b57cf09..e9ee55592fd3 100644
--- a/drivers/media/dvb/frontends/cx24113.c
+++ b/drivers/media/dvb/frontends/cx24113.c
@@ -589,7 +589,7 @@ struct dvb_frontend *cx24113_attach(struct dvb_frontend *fe,
 		info("detected CX24113 variant\n");
 		break;
 	case REV_CX24113:
-		info("sucessfully detected\n");
+		info("successfully detected\n");
 		break;
 	default:
 		err("unsupported device id: %x\n", state->rev);
diff --git a/drivers/media/dvb/frontends/dib3000mb.c b/drivers/media/dvb/frontends/dib3000mb.c
index 136b9d2164d7..ad4c8cfd8090 100644
--- a/drivers/media/dvb/frontends/dib3000mb.c
+++ b/drivers/media/dvb/frontends/dib3000mb.c
@@ -154,7 +154,7 @@ static int dib3000mb_set_frontend(struct dvb_frontend* fe,
 			case BANDWIDTH_AUTO:
 				return -EOPNOTSUPP;
 			default:
-				err("unkown bandwidth value.");
+				err("unknown bandwidth value.");
 				return -EINVAL;
 		}
 	}
diff --git a/drivers/media/dvb/frontends/lgdt330x.c b/drivers/media/dvb/frontends/lgdt330x.c
index 056387b41a8f..43971e63baa7 100644
--- a/drivers/media/dvb/frontends/lgdt330x.c
+++ b/drivers/media/dvb/frontends/lgdt330x.c
@@ -479,7 +479,7 @@ static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
 	switch (state->current_modulation) {
 	case QAM_256:
 	case QAM_64:
-		/* Need to undestand why there are 3 lock levels here */
+		/* Need to understand why there are 3 lock levels here */
 		if ((buf[0] & 0x07) == 0x07)
 			*status |= FE_HAS_CARRIER;
 		break;
@@ -520,7 +520,7 @@ static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status)
 	switch (state->current_modulation) {
 	case QAM_256:
 	case QAM_64:
-		/* Need to undestand why there are 3 lock levels here */
+		/* Need to understand why there are 3 lock levels here */
 		if ((buf[0] & 0x07) == 0x07)
 			*status |= FE_HAS_CARRIER;
 		else
diff --git a/drivers/media/dvb/frontends/stb0899_drv.c b/drivers/media/dvb/frontends/stb0899_drv.c
index a04c782fff8d..1570669837ea 100644
--- a/drivers/media/dvb/frontends/stb0899_drv.c
+++ b/drivers/media/dvb/frontends/stb0899_drv.c
@@ -1571,7 +1571,7 @@ static enum dvbfe_search stb0899_search(struct dvb_frontend *fe, struct dvb_fron
  * stb0899_track
  * periodically check the signal level against a specified
  * threshold level and perform derotator centering.
- * called once we have a lock from a succesful search
+ * called once we have a lock from a successful search
  * event.
  *
  * Will be called periodically called to maintain the
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index 8d65c652ba50..baf3159a3aa6 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -2425,7 +2425,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev,
 		 * use 0x15 to track VPE  interrupts - increase by 1 every vpeirq() is called
 		 */
 		saa7146_write(dev, EC1SSR, (0x03<<2) | 3 );
-		/* set event counter 1 treshold to maximum allowed value        (rEC p55) */
+		/* set event counter 1 threshold to maximum allowed value        (rEC p55) */
 		saa7146_write(dev, ECT1R,  0x3fff );
 #endif
 		/* Set RPS1 Address register to point to RPS code               (r108 p42) */
@@ -2559,7 +2559,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev,
 		 * use 0x15 to track VPE  interrupts - increase by 1 every vpeirq() is called
 		 */
 		saa7146_write(dev, EC1SSR, (0x03<<2) | 3 );
-		/* set event counter 1 treshold to maximum allowed value        (rEC p55) */
+		/* set event counter 1 threshold to maximum allowed value        (rEC p55) */
 		saa7146_write(dev, ECT1R,  0x3fff );
 #endif
 		/* Setup BUDGETPATCH MAIN RPS1 "program" (p35) */
diff --git a/drivers/media/dvb/ttpci/budget-patch.c b/drivers/media/dvb/ttpci/budget-patch.c
index 60136688a9a4..9c92f9ddd223 100644
--- a/drivers/media/dvb/ttpci/budget-patch.c
+++ b/drivers/media/dvb/ttpci/budget-patch.c
@@ -456,7 +456,7 @@ static int budget_patch_attach (struct saa7146_dev* dev, struct saa7146_pci_exte
 	// use 0x03 to track RPS1 interrupts - increase by 1 every gpio3 is toggled
 	// use 0x15 to track VPE  interrupts - increase by 1 every vpeirq() is called
 	saa7146_write(dev, EC1SSR, (0x03<<2) | 3 );
-	// set event counter 1 treshold to maximum allowed value        (rEC p55)
+	// set event counter 1 threshold to maximum allowed value        (rEC p55)
 	saa7146_write(dev, ECT1R,  0x3fff );
 #endif
 	// Fix VSYNC level
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index a1239083472d..5f79acb56e48 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -28,7 +28,7 @@
  * http://av-usbradio.sourceforge.net/index.php
  * http://sourceforge.net/projects/av-usbradio/
  * Latest release of theirs project was in 2005.
- * Probably, this driver could be improved trough using their
+ * Probably, this driver could be improved through using their
  * achievements (specifications given).
  * Also, Faidon Liambotis <paravoid@debian.org> wrote nice driver for this radio
  * in 2007. He allowed to use his driver to improve current mr800 radio driver.
diff --git a/drivers/media/video/cx231xx/cx231xx-avcore.c b/drivers/media/video/cx231xx/cx231xx-avcore.c
index 28f48f41f218..c2174413ab29 100644
--- a/drivers/media/video/cx231xx/cx231xx-avcore.c
+++ b/drivers/media/video/cx231xx/cx231xx-avcore.c
@@ -2414,9 +2414,11 @@ int cx231xx_gpio_i2c_read_ack(struct cx231xx *dev)
 		cx231xx_info("No ACK after %d msec -GPIO I2C failed!",
 			     nInit * 10);
 
-	/* readAck
-	   throuth clock stretch ,slave has given a SCL signal,
-	   so the SDA data can be directly read.  */
+	/*
+	 * readAck
+	 * through clock stretch, slave has given a SCL signal,
+	 * so the SDA data can be directly read.
+	 */
 	status = cx231xx_get_gpio_bit(dev, dev->gpio_dir, (u8 *)&dev->gpio_val);
 
 	if ((dev->gpio_val & 1 << dev->board.tuner_sda_gpio) == 0) {
diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c
index 45e13ee66dc7..16c6a921f40b 100644
--- a/drivers/media/video/cx23885/cx23885-dvb.c
+++ b/drivers/media/video/cx23885/cx23885-dvb.c
@@ -940,7 +940,7 @@ int cx23885_dvb_register(struct cx23885_tsport *port)
 	int err, i;
 
 	/* Here we need to allocate the correct number of frontends,
-	 * as reflected in the cards struct. The reality is that currrently
+	 * as reflected in the cards struct. The reality is that currently
 	 * no cx23885 boards support this - yet. But, if we don't modify this
 	 * code then the second frontend would never be allocated (later)
 	 * and fail with error before the attach in dvb_register().
diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c
index cf634606ba9a..b35411160f04 100644
--- a/drivers/media/video/cx88/cx88-core.c
+++ b/drivers/media/video/cx88/cx88-core.c
@@ -624,7 +624,7 @@ int cx88_reset(struct cx88_core *core)
 	/* setup image format */
 	cx_andor(MO_COLOR_CTRL, 0x4000, 0x4000);
 
-	/* setup FIFO Threshholds */
+	/* setup FIFO Thresholds */
 	cx_write(MO_PDMA_STHRSH,   0x0807);
 	cx_write(MO_PDMA_DTHRSH,   0x0807);
 
diff --git a/drivers/media/video/davinci/dm355_ccdc.c b/drivers/media/video/davinci/dm355_ccdc.c
index 56fbefe036ae..314390016370 100644
--- a/drivers/media/video/davinci/dm355_ccdc.c
+++ b/drivers/media/video/davinci/dm355_ccdc.c
@@ -285,7 +285,7 @@ static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
 
 	if ((ccdcparam->med_filt_thres < 0) ||
 	   (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) {
-		dev_dbg(dev, "Invalid value of median filter thresold\n");
+		dev_dbg(dev, "Invalid value of median filter threshold\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/media/video/davinci/vpss.c b/drivers/media/video/davinci/vpss.c
index 6d709ca8cfb0..453236bd7559 100644
--- a/drivers/media/video/davinci/vpss.c
+++ b/drivers/media/video/davinci/vpss.c
@@ -53,7 +53,7 @@ struct vpss_hw_ops {
 	int (*enable_clock)(enum vpss_clock_sel clock_sel, int en);
 	/* select input to ccdc */
 	void (*select_ccdc_source)(enum vpss_ccdc_source_sel src_sel);
-	/* clear wbl overlflow bit */
+	/* clear wbl overflow bit */
 	int (*clear_wbl_overflow)(enum vpss_wbl_sel wbl_sel);
 };
 
diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c
index cf3af8de6e97..e39efb45fa1c 100644
--- a/drivers/media/video/gspca/sonixb.c
+++ b/drivers/media/video/gspca/sonixb.c
@@ -304,7 +304,7 @@ static const __u8 initOv6650[] = {
 };
 static const __u8 ov6650_sensor_init[][8] =
 {
-	/* Bright, contrast, etc are set througth SCBB interface.
+	/* Bright, contrast, etc are set through SCBB interface.
 	 * AVCAP on win2 do not send any data on this 	controls. */
 	/* Anyway, some registers appears to alter bright and constrat */
 
diff --git a/drivers/media/video/gspca/spca500.c b/drivers/media/video/gspca/spca500.c
index fab7ef85a6c1..7dbd5eea6cc0 100644
--- a/drivers/media/video/gspca/spca500.c
+++ b/drivers/media/video/gspca/spca500.c
@@ -589,7 +589,7 @@ static void spca500_reinit(struct gspca_dev *gspca_dev)
 	int err;
 	__u8 Data;
 
-	/* some unknow command from Aiptek pocket dv and family300 */
+	/* some unknown command from Aiptek pocket dv and family300 */
 
 	reg_w(gspca_dev, 0x00, 0x0d01, 0x01);
 	reg_w(gspca_dev, 0x00, 0x0d03, 0x00);
diff --git a/drivers/media/video/gspca/spca501.c b/drivers/media/video/gspca/spca501.c
index b74a34218da0..66f9f0056146 100644
--- a/drivers/media/video/gspca/spca501.c
+++ b/drivers/media/video/gspca/spca501.c
@@ -1636,7 +1636,7 @@ static const __u16 spca501c_arowana_init_data[][3] = {
 	{}
 };
 
-/* Unknow camera from Ori Usbid 0x0000:0x0000 */
+/* Unknown camera from Ori Usbid 0x0000:0x0000 */
 /* Based on snoops from Ori Cohen */
 static const __u16 spca501c_mysterious_open_data[][3] = {
 	{0x02, 0x000f, 0x0005},
@@ -1945,7 +1945,7 @@ static int sd_init(struct gspca_dev *gspca_dev)
 			goto error;
 		break;
 	case MystFromOriUnknownCamera:
-		/* UnKnow Ori CMOS Camera data */
+		/* Unknown Ori CMOS Camera data */
 		if (write_vector(gspca_dev, spca501c_mysterious_open_data))
 			goto error;
 		break;
@@ -1978,7 +1978,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 		write_vector(gspca_dev, spca501c_arowana_open_data);
 		break;
 	case MystFromOriUnknownCamera:
-		/* UnKnow  CMOS Camera data */
+		/* Unknown CMOS Camera data */
 		write_vector(gspca_dev, spca501c_mysterious_init_data);
 		break;
 	default:
diff --git a/drivers/media/video/gspca/sunplus.c b/drivers/media/video/gspca/sunplus.c
index aa8f995ce04e..1a9af2ebdbef 100644
--- a/drivers/media/video/gspca/sunplus.c
+++ b/drivers/media/video/gspca/sunplus.c
@@ -631,7 +631,7 @@ static void spca504A_acknowledged_command(struct gspca_dev *gspca_dev,
 	count = 200;
 	while (--count > 0) {
 		msleep(10);
-		/* gsmart mini2 write a each wait setting 1 ms is enought */
+		/* gsmart mini2 write a each wait setting 1 ms is enough */
 /*		reg_w_riv(dev, req, idx, val); */
 		status = reg_r_12(gspca_dev, 0x01, 0x0001, 1);
 		if (status == endcode) {
diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c
index cdf3357b4c9f..49c3c1226e0e 100644
--- a/drivers/media/video/gspca/zc3xx.c
+++ b/drivers/media/video/gspca/zc3xx.c
@@ -7065,7 +7065,7 @@ static int sd_config(struct gspca_dev *gspca_dev,
 				break;
 			default:
 				PDEBUG(D_PROBE,
-					"Sensor UNKNOW_0 force Tas5130");
+					"Sensor UNKNOWN_0 force Tas5130");
 				sd->sensor = SENSOR_TAS5130CXX;
 			}
 			break;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
index 5b152ff20bd0..5fcad28211d2 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
@@ -332,7 +332,7 @@ struct pvr2_hdw {
 
 	/* Bit mask of PVR2_CVAL_INPUT choices which are valid for the hardware */
 	unsigned int input_avail_mask;
-	/* Bit mask of PVR2_CVAL_INPUT choices which are currenly allowed */
+	/* Bit mask of PVR2_CVAL_INPUT choices which are currently allowed */
 	unsigned int input_allowed_mask;
 
 	/* Location of eeprom or a negative number if none */
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index 9e3262c0ba37..a4c84368eb10 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -1985,7 +1985,7 @@ static int save_frame(struct s2255_dev *dev, struct s2255_pipeinfo *pipe_info)
 					wake_up(&dev->fw_data->wait_fw);
 					break;
 				default:
-					printk(KERN_INFO "s2255 unknwn resp\n");
+					printk(KERN_INFO "s2255 unknown resp\n");
 				}
 			default:
 				pdata++;
diff --git a/drivers/media/video/zoran/zoran.h b/drivers/media/video/zoran/zoran.h
index d439c76b27e1..cb1de7ea197a 100644
--- a/drivers/media/video/zoran/zoran.h
+++ b/drivers/media/video/zoran/zoran.h
@@ -106,7 +106,7 @@ struct zoran_params {
 	unsigned long jpeg_markers;	/* Which markers should go into the JPEG output.
 					 * Unless you exactly know what you do, leave them untouched.
 					 * Inluding less markers will make the resulting code
-					 * smaller, but there will be fewer aplications
+					 * smaller, but there will be fewer applications
 					 * which can read it.
 					 * The presence of the APP and COM marker is
 					 * influenced by APP0_len and COM_len ONLY! */
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index d505b68cd372..e39986a78273 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -940,7 +940,7 @@ static const struct block_device_operations i2o_block_fops = {
  *	Allocate memory for the i2o_block_device struct, gendisk and request
  *	queue and initialize them as far as no additional information is needed.
  *
- *	Returns a pointer to the allocated I2O Block device on succes or a
+ *	Returns a pointer to the allocated I2O Block device on success or a
  *	negative error code on failure.
  */
 static struct i2o_block_device *i2o_block_device_alloc(void)
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index 27cf4af0e13d..e5ab62141503 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -132,7 +132,7 @@ u32 i2o_cntxt_list_add(struct i2o_controller * c, void *ptr)
  *	Removes a previously added pointer from the context list and returns
  *	the matching context id.
  *
- *	Returns context id on succes or 0 on failure.
+ *	Returns context id on success or 0 on failure.
  */
 u32 i2o_cntxt_list_remove(struct i2o_controller * c, void *ptr)
 {
@@ -198,7 +198,7 @@ void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context)
  *	@c: controller to which the context list belong
  *	@ptr: pointer to which the context id should be fetched
  *
- *	Returns context id which matches to the pointer on succes or 0 on
+ *	Returns context id which matches to the pointer on success or 0 on
  *	failure.
  */
 u32 i2o_cntxt_list_get_ptr(struct i2o_controller * c, void *ptr)
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 41c8fe2a928c..ce5eda985ab0 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -92,7 +92,7 @@ static void gru_vma_close(struct vm_area_struct *vma)
 /*
  * gru_file_mmap
  *
- * Called when mmaping the device.  Initializes the vma with a fault handler
+ * Called when mmapping the device.  Initializes the vma with a fault handler
  * and private data structure necessary to allocate, track, and free the
  * underlying pages.
  */
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 99b74a351020..941a4d35ef8d 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1360,7 +1360,7 @@ static struct mmc_host_ops s3cmci_ops = {
 
 static struct s3c24xx_mci_pdata s3cmci_def_pdata = {
 	/* This is currently here to avoid a number of if (host->pdata)
-	 * checks. Any zero fields to ensure reaonable defaults are picked. */
+	 * checks. Any zero fields to ensure reasonable defaults are picked. */
 };
 
 #ifdef CONFIG_CPU_FREQ
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 3aa05cd18ea1..592016a0668f 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -18,7 +18,7 @@
                 to specify the offset instead of the absolute address
 
   NOTE:
-  With slram it's only possible to map a contigous memory region. Therfore
+  With slram it's only possible to map a contiguous memory region. Therefore
   if there's a device mapped somewhere in the region specified slram will
   fail to load (see kernel log if modprobe fails).
 
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index e51c1ed7ac18..b126cf887476 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -1056,7 +1056,7 @@ static struct nand_ecclayout doc200x_oobinfo = {
 };
 
 /* Find the (I)NFTL Media Header, and optionally also the mirror media header.
-   On sucessful return, buf will contain a copy of the media header for
+   On successful return, buf will contain a copy of the media header for
    further processing.  id is the string to scan for, and will presumably be
    either "ANAND" or "BNAND".  If findmirror=1, also look for the mirror media
    header.  The page #s of the found media headers are placed in mh0_page and
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index db7ae9d6a296..92320a643275 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -475,7 +475,7 @@ int __nand_correct_data(unsigned char *buf,
 		 *
 		 * The b2 shift is there to get rid of the lowest two bits.
 		 * We could also do addressbits[b2] >> 1 but for the
-		 * performace it does not make any difference
+		 * performance it does not make any difference
 		 */
 		if (eccsize_mult == 1)
 			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 11dc7e69c4fb..68b5b3a486a9 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -875,7 +875,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
  * @info: The controller instance.
  * @nmtd: The driver version of the MTD instance.
  *
- * This routine is called after the chip probe has succesfully completed
+ * This routine is called after the chip probe has successfully completed
  * and the relevant per-chip information updated. This call ensure that
  * we update the internal state accordingly.
  *
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index ea6b139b812c..1663bc9e45de 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -19,7 +19,7 @@
    TBD:
    * look at deferring rx frames rather than discarding (as per tulip)
    * handle tx ring full as per tulip
-   * performace test to tune rx_copybreak
+   * performance test to tune rx_copybreak
 
    Most of my modifications relate to the braindead big-endian
    implementation by Intel.  When the i596 is operating in
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 4e6359fff0e1..766aabfdfc75 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -1633,8 +1633,13 @@ static int amd8111e_enable_link_change(struct amd8111e_priv* lp)
 	readl(lp->mmio + CMD7);
 	return 0;
 }
-/* This function is called when a packet transmission fails to complete within a  resonable period, on the assumption that an interrupts have been failed or the  interface is locked up. This function will reinitialize the hardware */
 
+/*
+ * This function is called when a packet transmission fails to complete
+ * within a reasonable period, on the assumption that an interrupt have
+ * failed or the interface is locked up. This function will reinitialize
+ * the hardware.
+ */
 static void amd8111e_tx_timeout(struct net_device *dev)
 {
 	struct amd8111e_priv* lp = netdev_priv(dev);
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index b5dc7f550725..9d828aed968a 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -120,7 +120,7 @@ static int irq = 5;		/* Default IRQ */
  *      DAYNA driver mode:
  *              Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95, 
  *		Farallon PhoneNET PC III, Farallon PhoneNET PC II
- *	Other cards possibly supported mode unkown though:
+ *	Other cards possibly supported mode unknown though:
  *		Dayna DL2000 (Full length), COPS LT/M (Micro-Channel)
  *
  *	Cards NOT supported by this driver but supported by the ltpc.c
diff --git a/drivers/net/ariadne.h b/drivers/net/ariadne.h
index bb613f292e04..727be5cdd1ea 100644
--- a/drivers/net/ariadne.h
+++ b/drivers/net/ariadne.h
@@ -244,7 +244,7 @@ struct Am79C960 {
 #define DLNKTST		0x0010	/* Disable Link Status */
 #define DAPC		0x0008	/* Disable Automatic Polarity Correction */
 #define MENDECL		0x0004	/* MENDEC Loopback Mode */
-#define LRTTSEL		0x0002	/* Low Receive Treshold/Transmit Mode Select */
+#define LRTTSEL		0x0002	/* Low Receive Threshold/Transmit Mode Select */
 #define PORTSEL1	0x0001	/* Port Select Bits */
 #define PORTSEL2	0x8000	/* Port Select Bits */
 #define INTL		0x4000	/* Internal Loopback */
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 1372e9a99f5b..96506eacc131 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -1543,7 +1543,7 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 		if (status & ISR_OVER)
 			if (netif_msg_intr(adapter))
 				dev_warn(&pdev->dev,
-					"TX/RX over flow (status = 0x%x)\n",
+					"TX/RX overflow (status = 0x%x)\n",
 					status & ISR_OVER);
 
 		/* link event */
diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h
index 49953787e41c..f0bb62b5ca9e 100644
--- a/drivers/net/benet/be_cmds.h
+++ b/drivers/net/benet/be_cmds.h
@@ -435,7 +435,7 @@ enum be_if_flags {
  * filtering capabilities. */
 struct be_cmd_req_if_create {
 	struct be_cmd_req_hdr hdr;
-	u32 version;		/* ignore currntly */
+	u32 version;		/* ignore currently */
 	u32 capability_flags;
 	u32 enable_flags;
 	u8 mac_addr[ETH_ALEN];
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 1f941f027718..02a0908707ed 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1876,7 +1876,7 @@ int be_load_fw(struct be_adapter *adapter, u8 *func)
 		goto fw_exit;
 	}
 
-	dev_info(&adapter->pdev->dev, "Firmware flashed succesfully\n");
+	dev_info(&adapter->pdev->dev, "Firmware flashed successfully\n");
 
 fw_exit:
 	release_firmware(fw);
diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h
index aa76cbada5e2..732eafdeb0f2 100644
--- a/drivers/net/bnx2x_reg.h
+++ b/drivers/net/bnx2x_reg.h
@@ -2536,7 +2536,7 @@
 /* [RC 1] A flag to indicate that overflow error occurred in one of the
    queues. */
 #define QM_REG_OVFERROR 					 0x16805c
-/* [RC 7] the Q were the qverflow occurs */
+/* [RC 7] the Q where the overflow occurs */
 #define QM_REG_OVFQNUM						 0x168058
 /* [R 16] Pause state for physical queues 15-0 */
 #define QM_REG_PAUSESTATE0					 0x168410
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index f86612857a73..56ba872be9c1 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1285,7 +1285,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/*
 	 * We do not use Tx completion interrupts to free DMAd Tx packets.
-	 * This is good for performamce but means that we rely on new Tx
+	 * This is good for performance but means that we rely on new Tx
 	 * packets arriving to run the destructors of completed packets,
 	 * which open up space in their sockets' send queues.  Sometimes
 	 * we do not get such new packets causing Tx to stall.  A single
diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c
index d76885223366..75b099ce49c9 100644
--- a/drivers/net/ehea/ehea_ethtool.c
+++ b/drivers/net/ehea/ehea_ethtool.c
@@ -118,7 +118,7 @@ doit:
 	ret = ehea_set_portspeed(port, sp);
 
 	if (!ret)
-		ehea_info("%s: Port speed succesfully set: %dMbps "
+		ehea_info("%s: Port speed successfully set: %dMbps "
 			  "%s Duplex",
 			  port->netdev->name, port->port_speed,
 			  port->full_duplex == 1 ? "Full" : "Half");
@@ -134,7 +134,7 @@ static int ehea_nway_reset(struct net_device *dev)
 	ret = ehea_set_portspeed(port, EHEA_SPEED_AUTONEG);
 
 	if (!ret)
-		ehea_info("%s: Port speed succesfully set: %dMbps "
+		ehea_info("%s: Port speed successfully set: %dMbps "
 			  "%s Duplex",
 			  port->netdev->name, port->port_speed,
 			  port->full_duplex == 1 ? "Full" : "Half");
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index ed60fd664273..0cab992b3d1a 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -35,7 +35,7 @@
  *          driver only supports standard serial hardware (8250, 16450, 16550A)
  *
  *          This modem usually draws its supply current out of the otherwise unused
- *          TXD pin of the serial port. Thus a contignuous stream of 0x00-bytes
+ *          TXD pin of the serial port. Thus a contiguous stream of 0x00-bytes
  *          is transmitted to achieve a positive supply voltage.
  *
  *  hsk:    This is a 4800 baud FSK modem, designed for TNC use. It works fine
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index aa7286bc4364..8739ba850f82 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1384,7 +1384,7 @@ static inline void veth_build_dma_list(struct dma_chunk *list,
 	unsigned long done;
 	int i = 1;
 
-	/* FIXME: skbs are continguous in real addresses.  Do we
+	/* FIXME: skbs are contiguous in real addresses.  Do we
 	 * really need to break it into PAGE_SIZE chunks, or can we do
 	 * it just at the granularity of iSeries real->absolute
 	 * mapping?  Indeed, given the way the allocator works, can we
diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c
index a0c578585a50..b77238dbafb8 100644
--- a/drivers/net/lasi_82596.c
+++ b/drivers/net/lasi_82596.c
@@ -47,7 +47,7 @@
    TBD:
    * look at deferring rx frames rather than discarding (as per tulip)
    * handle tx ring full as per tulip
-   * performace test to tune rx_copybreak
+   * performance test to tune rx_copybreak
 
    Most of my modifications relate to the braindead big-endian
    implementation by Intel.  When the i596 is operating in
diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c
index 51e11c3e53e1..c0dbfc185b53 100644
--- a/drivers/net/lib82596.c
+++ b/drivers/net/lib82596.c
@@ -47,7 +47,7 @@
    TBD:
    * look at deferring rx frames rather than discarding (as per tulip)
    * handle tx ring full as per tulip
-   * performace test to tune rx_copybreak
+   * performance test to tune rx_copybreak
 
    Most of my modifications relate to the braindead big-endian
    implementation by Intel.  When the i596 is operating in
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index 03b781a7a182..829b9ec9ff67 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -204,7 +204,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
 		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
 
-		en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
+		en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
 		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
 				 PCI_DMA_FROMDEVICE);
 		put_page(skb_frags[nr].page);
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index 8c7279965b44..3d1396af9462 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -47,7 +47,7 @@ enum {
 static int inline_thold __read_mostly = MAX_INLINE;
 
 module_param_named(inline_thold, inline_thold, int, 0444);
-MODULE_PARM_DESC(inline_thold, "treshold for using inline data");
+MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring *ring, u32 size,
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 4376147b0ea0..82c3ebc584e3 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -162,7 +162,7 @@ enum {
 #define MLX4_EN_DEF_RX_PAUSE	1
 #define MLX4_EN_DEF_TX_PAUSE	1
 
-/* Interval between sucessive polls in the Tx routine when polling is used
+/* Interval between successive polls in the Tx routine when polling is used
    instead of interrupts (in per-core Tx rings) - should be power of 2 */
 #define MLX4_EN_TX_POLL_MODER	16
 #define MLX4_EN_TX_POLL_TIMEOUT	(HZ / 4)
diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c
index b211613e9dbd..86fde1a90a5a 100644
--- a/drivers/net/ps3_gelic_net.c
+++ b/drivers/net/ps3_gelic_net.c
@@ -296,7 +296,7 @@ static void gelic_card_reset_chain(struct gelic_card *card,
  * @card: card structure
  * @descr: descriptor to re-init
  *
- * return 0 on succes, <0 on failure
+ * return 0 on success, <0 on failure
  *
  * allocates a new rx skb, iommu-maps it and attaches it to the descriptor.
  * Activate the descriptor state-wise
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index c072f7f36acf..9d94a141555c 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -1760,7 +1760,7 @@ static int sis900_rx(struct net_device *net_dev)
 				sis_priv->rx_ring[entry].bufptr, RX_BUF_SIZE,
 				PCI_DMA_FROMDEVICE);
 
-			/* refill the Rx buffer, what if there is not enought
+			/* refill the Rx buffer, what if there is not enough
 			 * memory for new socket buffer ?? */
 			if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) {
 				/*
@@ -1775,7 +1775,7 @@ static int sis900_rx(struct net_device *net_dev)
 			}
 
 			/* This situation should never happen, but due to
-			   some unknow bugs, it is possible that
+			   some unknown bugs, it is possible that
 			   we are working on NULL sk_buff :-( */
 			if (sis_priv->rx_skbuff[entry] == NULL) {
 				if (netif_msg_rx_err(sis_priv))
diff --git a/drivers/net/skfp/h/smc.h b/drivers/net/skfp/h/smc.h
index 1758d9548361..026a83b9f743 100644
--- a/drivers/net/skfp/h/smc.h
+++ b/drivers/net/skfp/h/smc.h
@@ -393,10 +393,10 @@ struct smt_config {
 					 */
 	u_long	mac_d_max ;		/* MAC : D_Max timer value */
 
-	u_long lct_short ;		/* LCT : error threshhold */
-	u_long lct_medium ;		/* LCT : error threshhold */
-	u_long lct_long ;		/* LCT : error threshhold */
-	u_long lct_extended ;		/* LCT : error threshhold */
+	u_long lct_short ;		/* LCT : error threshold */
+	u_long lct_medium ;		/* LCT : error threshold */
+	u_long lct_long ;		/* LCT : error threshold */
+	u_long lct_extended ;		/* LCT : error threshold */
 } ;
 
 #ifdef	DEBUG
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index b27156eaf267..db216a728503 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1002,7 +1002,7 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		}
 		break;
 	default:
-		printk("ioctl for %s: unknow cmd: %04x\n", dev->name, ioc.cmd);
+		printk("ioctl for %s: unknown cmd: %04x\n", dev->name, ioc.cmd);
 		status = -EOPNOTSUPP;
 
 	}			// switch
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index ccdd196f5297..4a00940d0a54 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -816,7 +816,7 @@ static int smsc911x_mii_probe(struct net_device *dev)
 	SMSC_TRACE(HW, "Passed Loop Back Test");
 #endif				/* USE_PHY_WORK_AROUND */
 
-	SMSC_TRACE(HW, "phy initialised succesfully");
+	SMSC_TRACE(HW, "phy initialised successfully");
 	return 0;
 }
 
diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h
index b5716bd8a597..016360c65ce2 100644
--- a/drivers/net/smsc911x.h
+++ b/drivers/net/smsc911x.h
@@ -30,7 +30,7 @@
 #define SMSC_NAPI_WEIGHT	16
 
 /* implements a PHY loopback test at initialisation time, to ensure a packet
- * can be succesfully looped back */
+ * can be successfully looped back */
 #define USE_PHY_WORK_AROUND
 
 #define DPRINTK(nlevel, klevel, fmt, args...) \
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 90e663f4515c..40b51e6bc77b 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -409,7 +409,7 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card)
  * @card: card structure
  * @descr: descriptor to re-init
  *
- * Return 0 on succes, <0 on failure.
+ * Return 0 on success, <0 on failure.
  *
  * Allocates a new rx skb, iommu-maps it and attaches it to the
  * descriptor. Mark the descriptor as activated, ready-to-use.
diff --git a/drivers/net/stmmac/gmac.c b/drivers/net/stmmac/gmac.c
index b624bb5bae0a..52586ee68953 100644
--- a/drivers/net/stmmac/gmac.c
+++ b/drivers/net/stmmac/gmac.c
@@ -112,7 +112,7 @@ static void gmac_dma_operation_mode(unsigned long ioaddr, int txmode,
 			      " (threshold = %d)\n", txmode);
 		csr6 &= ~DMA_CONTROL_TSF;
 		csr6 &= DMA_CONTROL_TC_TX_MASK;
-		/* Set the transmit threashold */
+		/* Set the transmit threshold */
 		if (txmode <= 32)
 			csr6 |= DMA_CONTROL_TTC_32;
 		else if (txmode <= 64)
diff --git a/drivers/net/stmmac/gmac.h b/drivers/net/stmmac/gmac.h
index 684a363120a9..2e82d6c9a148 100644
--- a/drivers/net/stmmac/gmac.h
+++ b/drivers/net/stmmac/gmac.h
@@ -154,14 +154,14 @@ enum rx_tx_priority_ratio {
 #define DMA_CONTROL_DT		0x04000000 /* Disable Drop TCP/IP csum error */
 #define DMA_CONTROL_RSF		0x02000000 /* Receive Store and Forward */
 #define DMA_CONTROL_DFF		0x01000000 /* Disaable flushing */
-/* Theshold for Activating the FC */
+/* Threshold for Activating the FC */
 enum rfa {
 	act_full_minus_1 = 0x00800000,
 	act_full_minus_2 = 0x00800200,
 	act_full_minus_3 = 0x00800400,
 	act_full_minus_4 = 0x00800600,
 };
-/* Theshold for Deactivating the FC */
+/* Threshold for Deactivating the FC */
 enum rfd {
 	deac_full_minus_1 = 0x00400000,
 	deac_full_minus_2 = 0x00400800,
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index ebda61bc4c2f..78e12b5e3ac7 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -426,7 +426,7 @@ static int smctr_alloc_shared_memory(struct net_device *dev)
         smctr_malloc(dev, 1L);
 
         /* Allocate Non-MAC receive data buffers.
-         * To guarantee a minimum of 256 contigous memory to
+         * To guarantee a minimum of 256 contiguous memory to
          * UM_Receive_Packet's lookahead pointer, before a page
          * change or ring end is encountered, place each rx buffer on
          * a 256 byte boundary.
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 4469f2451a6f..5e9adbaf6745 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3798,7 +3798,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
 		prop = of_get_property(np, "tx-clock", NULL);
 		if (!prop) {
 			printk(KERN_ERR
-				"ucc_geth: mising tx-clock-name property\n");
+				"ucc_geth: missing tx-clock-name property\n");
 			return -EINVAL;
 		}
 		if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
index 03a6ca016d5a..a007e2acf651 100644
--- a/drivers/net/ucc_geth.h
+++ b/drivers/net/ucc_geth.h
@@ -80,16 +80,16 @@ struct ucc_geth {
 				   frames) received that were between 128
 				   (Including FCS length==4) and 255 octets */
 	u32 txok;		/* Total number of octets residing in frames
-				   that where involved in succesfull
+				   that where involved in successfull
 				   transmission */
 	u16 txcf;		/* Total number of PAUSE control frames
 				   transmitted by this MAC */
 	u8 res4[0x2];
 	u32 tmca;		/* Total number of frames that were transmitted
-				   succesfully with the group address bit set
+				   successfully with the group address bit set
 				   that are not broadcast frames */
 	u32 tbca;		/* Total number of frames transmitted
-				   succesfully that had destination address
+				   successfully that had destination address
 				   field equal to the broadcast address */
 	u32 rxfok;		/* Total number of frames received OK */
 	u32 rxbok;		/* Total number of octets received OK */
@@ -98,9 +98,9 @@ struct ucc_geth {
 				   HW because it includes octets in frames that
 				   never even reach the UCC */
 	u32 rmca;		/* Total number of frames that were received
-				   succesfully with the group address bit set
+				   successfully with the group address bit set
 				   that are not broadcast frames */
-	u32 rbca;		/* Total number of frames received succesfully
+	u32 rbca;		/* Total number of frames received successfully
 				   that had destination address equal to the
 				   broadcast address */
 	u32 scar;		/* Statistics carry register */
@@ -759,15 +759,15 @@ struct ucc_geth_hardware_statistics {
 				   frames) received that were between 128
 				   (Including FCS length==4) and 255 octets */
 	u32 txok;		/* Total number of octets residing in frames
-				   that where involved in succesfull
+				   that where involved in successfull
 				   transmission */
 	u16 txcf;		/* Total number of PAUSE control frames
 				   transmitted by this MAC */
 	u32 tmca;		/* Total number of frames that were transmitted
-				   succesfully with the group address bit set
+				   successfully with the group address bit set
 				   that are not broadcast frames */
 	u32 tbca;		/* Total number of frames transmitted
-				   succesfully that had destination address
+				   successfully that had destination address
 				   field equal to the broadcast address */
 	u32 rxfok;		/* Total number of frames received OK */
 	u32 rxbok;		/* Total number of octets received OK */
@@ -776,9 +776,9 @@ struct ucc_geth_hardware_statistics {
 				   HW because it includes octets in frames that
 				   never even reach the UCC */
 	u32 rmca;		/* Total number of frames that were received
-				   succesfully with the group address bit set
+				   successfully with the group address bit set
 				   that are not broadcast frames */
-	u32 rbca;		/* Total number of frames received succesfully
+	u32 rbca;		/* Total number of frames received successfully
 				   that had destination address equal to the
 				   broadcast address */
 } __attribute__ ((packed));
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index c6c922247d05..0c3c738d7419 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -748,7 +748,7 @@ static int smsc95xx_phy_initialize(struct usbnet *dev)
 	mii_nway_restart(&dev->mii);
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "phy initialised succesfully");
+		devdbg(dev, "phy initialised successfully");
 	return 0;
 }
 
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 7ea71b33d2e9..ee784e091f67 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -927,7 +927,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev,
         sc->lmc_media = &lmc_t1_media;
         break;
     default:
-	printk(KERN_WARNING "%s: LMC UNKOWN CARD!\n", dev->name);
+	printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name);
         break;
     }
 
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c
index 07c32e68909f..99d27473ba3f 100644
--- a/drivers/net/wimax/i2400m/rx.c
+++ b/drivers/net/wimax/i2400m/rx.c
@@ -1114,7 +1114,7 @@ error:
  * device. See the file header for the format. Run all checks on the
  * buffer header, then run over each payload's descriptors, verify
  * their consistency and act on each payload's contents.  If
- * everything is succesful, update the device's statistics.
+ * everything is successful, update the device's statistics.
  *
  * Note: You need to set the skb to contain only the length of the
  * received buffer; for that, use skb_trim(skb, RECEIVED_SIZE).
diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 1a039f2bd732..6f04cc758dcc 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -117,7 +117,7 @@ static unsigned int ath5k_hw_rfb_op(struct ath5k_hw *ah,
 
 /*
  * This code is used to optimize rf gain on different environments
- * (temprature mostly) based on feedback from a power detector.
+ * (temperature mostly) based on feedback from a power detector.
  *
  * It's only used on RF5111 and RF5112, later RF chips seem to have
  * auto adjustment on hw -notice they have a much smaller BANK 7 and
@@ -2675,7 +2675,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah,
 		/* Fill curves in reverse order
 		 * from lower power (max gain)
 		 * to higher power. Use curve -> idx
-		 * backmaping we did on eeprom init */
+		 * backmapping we did on eeprom init */
 		u8 idx = pdg_curve_to_idx[pdg];
 
 		/* Grab the needed curves by index */
@@ -2777,7 +2777,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah,
 	/* Now we have a set of curves for this
 	 * channel on tmpL (x range is table_max - table_min
 	 * and y values are tmpL[pdg][]) sorted in the same
-	 * order as EEPROM (because we've used the backmaping).
+	 * order as EEPROM (because we've used the backmapping).
 	 * So for RF5112 it's from higher power to lower power
 	 * and for RF2413 it's from lower power to higher power.
 	 * For RF5111 we only have one curve. */
diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index 1895d63aad0a..0a35ee62a02a 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -969,7 +969,7 @@ static bool ath_rc_update_per(struct ath_softc *sc,
 				 * Since this probe succeeded, we allow the next
 				 * probe twice as soon.  This allows the maxRate
 				 * to move up faster if the probes are
-				 * succesful.
+				 * successful.
 				 */
 				ath_rc_priv->probe_time =
 					now_msec - rate_table->probe_interval / 2;
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index a741d37fd96f..e1b330023200 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -551,7 +551,7 @@ static int ipw2100_get_ordinal(struct ipw2100_priv *priv, u32 ord,
 		/* get number of entries */
 		field_count = *(((u16 *) & field_info) + 1);
 
-		/* abort if no enought memory */
+		/* abort if no enough memory */
 		total_length = field_len * field_count;
 		if (total_length > *len) {
 			*len = total_length;
@@ -3044,7 +3044,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
 			     IPW_MAX_BDS)) {
 			/* TODO: Support merging buffers if more than
 			 * IPW_MAX_BDS are used */
-			IPW_DEBUG_INFO("%s: Maximum BD theshold exceeded.  "
+			IPW_DEBUG_INFO("%s: Maximum BD threshold exceeded.  "
 				       "Increase fragmentation level.\n",
 				       priv->net_dev->name);
 		}
@@ -6823,7 +6823,7 @@ static int ipw2100_wx_get_range(struct net_device *dev,
 	range->max_qual.updated = 7;	/* Updated all three */
 
 	range->avg_qual.qual = 70;	/* > 8% missed beacons is 'bad' */
-	/* TODO: Find real 'good' to 'bad' threshol value for RSSI */
+	/* TODO: Find real 'good' to 'bad' threshold value for RSSI */
 	range->avg_qual.level = 20 + IPW2100_RSSI_TO_DBM;
 	range->avg_qual.noise = 0;
 	range->avg_qual.updated = 7;	/* Updated all three */
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 9b0f2c0646e0..b2aa960b8346 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -787,7 +787,7 @@ static int ipw_get_ordinal(struct ipw_priv *priv, u32 ord, void *val, u32 * len)
 		/* get number of entries */
 		field_count = *(((u16 *) & field_info) + 1);
 
-		/* abort if not enought memory */
+		/* abort if not enough memory */
 		total_len = field_len * field_count;
 		if (total_len > *len) {
 			*len = total_len;
@@ -7751,7 +7751,7 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv,
 	case SEC_LEVEL_0:
 		break;
 	default:
-		printk(KERN_ERR "Unknow security level %d\n",
+		printk(KERN_ERR "Unknown security level %d\n",
 		       priv->ieee->sec.level);
 		break;
 	}
@@ -8917,7 +8917,7 @@ static int ipw_wx_get_range(struct net_device *dev,
 	range->max_qual.updated = 7;	/* Updated all three */
 
 	range->avg_qual.qual = 70;
-	/* TODO: Find real 'good' to 'bad' threshol value for RSSI */
+	/* TODO: Find real 'good' to 'bad' threshold value for RSSI */
 	range->avg_qual.level = 0;	/* FIXME to real average level */
 	range->avg_qual.noise = 0;
 	range->avg_qual.updated = 7;	/* Updated all three */
@@ -10290,7 +10290,7 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct libipw_txb *txb,
 		case SEC_LEVEL_0:
 			break;
 		default:
-			printk(KERN_ERR "Unknow security level %d\n",
+			printk(KERN_ERR "Unknown security level %d\n",
 			       priv->ieee->sec.level);
 			break;
 		}
diff --git a/drivers/net/wireless/ipw2x00/libipw_module.c b/drivers/net/wireless/ipw2x00/libipw_module.c
index be5b809ec97a..20b8a8a20644 100644
--- a/drivers/net/wireless/ipw2x00/libipw_module.c
+++ b/drivers/net/wireless/ipw2x00/libipw_module.c
@@ -199,7 +199,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv, int monitor)
 	ieee->host_decrypt = 1;
 	ieee->host_mc_decrypt = 1;
 
-	/* Host fragementation in Open mode. Default is enabled.
+	/* Host fragmentation in Open mode. Default is enabled.
 	 * Note: host fragmentation is always enabled if host encryption
 	 * is enabled. For cards can do hardware encryption, they must do
 	 * hardware fragmentation as well. So we don't need a variable
diff --git a/drivers/net/wireless/iwmc3200wifi/hal.c b/drivers/net/wireless/iwmc3200wifi/hal.c
index c430418248b4..d13c8853ee82 100644
--- a/drivers/net/wireless/iwmc3200wifi/hal.c
+++ b/drivers/net/wireless/iwmc3200wifi/hal.c
@@ -411,7 +411,7 @@ static void iwm_build_lmac_hdr(struct iwm_priv *iwm, struct iwm_lmac_hdr *hdr,
 /*
  * iwm_hal_send_host_cmd(): sends commands to the UMAC or the LMAC.
  * Sending command to the LMAC is equivalent to sending a
- * regular UMAC command with the LMAC passtrough or the LMAC
+ * regular UMAC command with the LMAC passthrough or the LMAC
  * wrapper UMAC command IDs.
  */
 int iwm_hal_send_host_cmd(struct iwm_priv *iwm,
diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c
index 771a301003c9..8ddb51a2a977 100644
--- a/drivers/net/wireless/iwmc3200wifi/rx.c
+++ b/drivers/net/wireless/iwmc3200wifi/rx.c
@@ -1448,7 +1448,7 @@ static void iwm_rx_process_packet(struct iwm_priv *iwm,
 		kfree_skb(packet->skb);
 		break;
 	default:
-		IWM_ERR(iwm, "Unknow ticket action: %d\n",
+		IWM_ERR(iwm, "Unknown ticket action: %d\n",
 			le16_to_cpu(ticket_node->ticket->action));
 		kfree_skb(packet->skb);
 	}
diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c
index 485a8d406525..afe6abecc044 100644
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -934,7 +934,7 @@ static int if_sdio_probe(struct sdio_func *func,
 	}
 
 	if (i == ARRAY_SIZE(if_sdio_models)) {
-		lbs_pr_err("unkown card model 0x%x\n", card->model);
+		lbs_pr_err("unknown card model 0x%x\n", card->model);
 		ret = -ENODEV;
 		goto free;
 	}
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index bc08464d8323..f7f5c793514b 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -1897,7 +1897,7 @@ prism54_get_mac(struct net_device *ndev, struct iw_request_info *info,
 	return 0;
 }
 
-/* Setting policy also clears the MAC acl, even if we don't change the defaut
+/* Setting policy also clears the MAC acl, even if we don't change the default
  * policy
  */
 
@@ -2323,7 +2323,7 @@ prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid,
 
 	case DOT11_OID_BEACON:
 		send_formatted_event(priv,
-				     "Received a beacon from an unkown AP",
+				     "Received a beacon from an unknown AP",
 				     mlme, 0);
 		break;
 
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.h b/drivers/net/wireless/rt2x00/rt2400pci.h
index ccd644104ad1..aced05775693 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.h
+++ b/drivers/net/wireless/rt2x00/rt2400pci.h
@@ -35,7 +35,7 @@
 
 /*
  * Signal information.
- * Defaul offset is required for RSSI <-> dBm conversion.
+ * Default offset is required for RSSI <-> dBm conversion.
  */
 #define DEFAULT_RSSI_OFFSET		100
 
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.h b/drivers/net/wireless/rt2x00/rt2500pci.h
index 54d37957883c..3db9041838a4 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.h
+++ b/drivers/net/wireless/rt2x00/rt2500pci.h
@@ -46,7 +46,7 @@
 
 /*
  * Signal information.
- * Defaul offset is required for RSSI <-> dBm conversion.
+ * Default offset is required for RSSI <-> dBm conversion.
  */
 #define DEFAULT_RSSI_OFFSET		121
 
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.h b/drivers/net/wireless/rt2x00/rt2500usb.h
index b01edca42583..d3000827883a 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.h
+++ b/drivers/net/wireless/rt2x00/rt2500usb.h
@@ -46,7 +46,7 @@
 
 /*
  * Signal information.
- * Defaul offset is required for RSSI <-> dBm conversion.
+ * Default offset is required for RSSI <-> dBm conversion.
  */
 #define DEFAULT_RSSI_OFFSET		120
 
diff --git a/drivers/net/wireless/rt2x00/rt61pci.h b/drivers/net/wireless/rt2x00/rt61pci.h
index 93eb699165cc..77b5116f549b 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.h
+++ b/drivers/net/wireless/rt2x00/rt61pci.h
@@ -37,7 +37,7 @@
 
 /*
  * Signal information.
- * Defaul offset is required for RSSI <-> dBm conversion.
+ * Default offset is required for RSSI <-> dBm conversion.
  */
 #define DEFAULT_RSSI_OFFSET		120
 
diff --git a/drivers/net/wireless/rt2x00/rt73usb.h b/drivers/net/wireless/rt2x00/rt73usb.h
index 81fe0be51c42..e194332dac5f 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.h
+++ b/drivers/net/wireless/rt2x00/rt73usb.h
@@ -37,7 +37,7 @@
 
 /*
  * Signal information.
- * Defaul offset is required for RSSI <-> dBm conversion.
+ * Default offset is required for RSSI <-> dBm conversion.
  */
 #define DEFAULT_RSSI_OFFSET		120
 
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index 431a20ec6db6..b3b0b5b685c6 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -4011,7 +4011,7 @@ wavelan_interrupt(int		irq,
 #endif
 
   /* Prevent reentrancy. We need to do that because we may have
-   * multiple interrupt handler running concurently.
+   * multiple interrupt handler running concurrently.
    * It is safe because interrupts are disabled before aquiring
    * the spinlock. */
   spin_lock(&lp->spinlock);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 6d666359a42f..2b7f96594373 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -312,7 +312,7 @@ static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
  * zd_mac_tx_failed - callback for failed frames
  * @dev: the mac80211 wireless device
  *
- * This function is called if a frame couldn't be succesfully be
+ * This function is called if a frame couldn't be successfully be
  * transferred. The first frame from the tx queue, will be selected and
  * reported as error to the upper layers.
  */
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index a6b4a5a53d40..f511e70d454c 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -650,7 +650,7 @@ ccio_clear_io_tlb(struct ioc *ioc, dma_addr_t iovp, size_t byte_cnt)
  * Mark the I/O Pdir entries invalid and blow away the corresponding I/O
  * TLB entries.
  *
- * FIXME: at some threshhold it might be "cheaper" to just blow
+ * FIXME: at some threshold it might be "cheaper" to just blow
  *        away the entire I/O TLB instead of individual entries.
  *
  * FIXME: Uturn has 256 TLB entries. We don't need to purge every
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index d93108d148fc..36db20a8f892 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6533,7 +6533,7 @@ static struct ibm_struct volume_driver_data = {
  * 	The speeds are stored on handles
  * 	(FANA:FAN9), (FANC:FANB), (FANE:FAND).
  *
- * 	There are three default speed sets, acessible as handles:
+ * 	There are three default speed sets, accessible as handles:
  * 	FS1L,FS1M,FS1H; FS2L,FS2M,FS2H; FS3L,FS3M,FS3H
  *
  * 	ACPI DSDT switches which set is in use depending on various
diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c
index 87b4f49a5251..a5135ebe5f07 100644
--- a/drivers/pnp/pnpbios/rsparser.c
+++ b/drivers/pnp/pnpbios/rsparser.c
@@ -191,7 +191,7 @@ static unsigned char *pnpbios_parse_allocated_resource_data(struct pnp_dev *dev,
 			return (unsigned char *)p;
 			break;
 
-		default:	/* an unkown tag */
+		default:	/* an unknown tag */
 len_err:
 			dev_err(&dev->dev, "unknown tag %#x length %d\n",
 				tag, len);
@@ -405,7 +405,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end,
 		case SMALL_TAG_END:
 			return p + 2;
 
-		default:	/* an unkown tag */
+		default:	/* an unknown tag */
 len_err:
 			dev_err(&dev->dev, "unknown tag %#x length %d\n",
 				tag, len);
@@ -475,7 +475,7 @@ static unsigned char *pnpbios_parse_compatible_ids(unsigned char *p,
 			return (unsigned char *)p;
 			break;
 
-		default:	/* an unkown tag */
+		default:	/* an unknown tag */
 len_err:
 			dev_err(&dev->dev, "unknown tag %#x length %d\n",
 				tag, len);
@@ -744,7 +744,7 @@ static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev
 			return (unsigned char *)p;
 			break;
 
-		default:	/* an unkown tag */
+		default:	/* an unknown tag */
 len_err:
 			dev_err(&dev->dev, "unknown tag %#x length %d\n",
 				tag, len);
diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c
index 88cb74088611..3cbaf1811bd0 100644
--- a/drivers/ps3/ps3-sys-manager.c
+++ b/drivers/ps3/ps3-sys-manager.c
@@ -46,7 +46,7 @@
 /**
  * struct ps3_sys_manager_header - System manager message header.
  * @version: Header version, currently 1.
- * @size: Header size in bytes, curently 16.
+ * @size: Header size in bytes, currently 16.
  * @payload_size: Message payload size in bytes.
  * @service_id: Message type, one of enum ps3_sys_manager_service_id.
  * @request_tag: Unique number to identify reply.
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index ad164056feb6..434e92fdb966 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -335,7 +335,7 @@ static int rtc_probe(struct platform_device *pdev)
 		goto err_io;
 	}
 
-	/* Make sure frequency measurment mode, test modes, and lock
+	/* Make sure frequency measurement mode, test modes, and lock
 	 * are all disabled */
 	v3020_set_reg(chip, V3020_STATUS_0, 0x0);
 
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 097d3846a828..f54b1eec6ddf 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -74,7 +74,7 @@ fs3270_do_io(struct raw3270_view *view, struct raw3270_request *rq)
 		}
 		rc = raw3270_start(view, rq);
 		if (rc == 0) {
-			/* Started sucessfully. Now wait for completion. */
+			/* Started successfully. Now wait for completion. */
 			wait_event(fp->wait, raw3270_request_final(rq));
 		}
 	} while (rc == -EACCES);
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 8ab51608da55..c268a2e5b7c3 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -65,7 +65,7 @@ static void set_chp_logically_online(struct chp_id chpid, int onoff)
 	chpid_to_chp(chpid)->state = onoff;
 }
 
-/* On succes return 0 if channel-path is varied offline, 1 if it is varied
+/* On success return 0 if channel-path is varied offline, 1 if it is varied
  * online. Return -ENODEV if channel-path is not registered. */
 int chp_get_status(struct chp_id chpid)
 {
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 30f516111307..2985eb439485 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -462,7 +462,7 @@ static struct cmb_area cmb_area = {
  * block of memory, which can not be moved as long as any channel
  * is active. Therefore, a maximum number of subchannels needs to
  * be defined somewhere. This is a module parameter, defaulting to
- * a resonable value of 1024, or 32 kb of memory.
+ * a reasonable value of 1024, or 32 kb of memory.
  * Current kernels don't allow kmalloc with more than 128kb, so the
  * maximum is 4096.
  */
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 58e583b61e60..aa2b60a868ba 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -92,11 +92,11 @@
 #define ENVCTRL_CPUTEMP_MON			1    /* cpu temperature monitor */
 #define ENVCTRL_CPUVOLTAGE_MON	  	2    /* voltage monitor         */
 #define ENVCTRL_FANSTAT_MON  		3    /* fan status monitor      */
-#define ENVCTRL_ETHERTEMP_MON		4    /* ethernet temperarture */
+#define ENVCTRL_ETHERTEMP_MON		4    /* ethernet temperature */
 					     /* monitor                     */
 #define ENVCTRL_VOLTAGESTAT_MON	  	5    /* voltage status monitor  */
 #define ENVCTRL_MTHRBDTEMP_MON		6    /* motherboard temperature */
-#define ENVCTRL_SCSITEMP_MON		7    /* scsi temperarture */
+#define ENVCTRL_SCSITEMP_MON		7    /* scsi temperature */
 #define ENVCTRL_GLOBALADDR_MON		8    /* global address */
 
 /* Child device type.
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index f5a9addb7050..07ce9bfcdf06 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1491,7 +1491,7 @@ NCR_700_intr(int irq, void *dev_id)
 	unsigned long flags;
 	int handled = 0;
 
-	/* Use the host lock to serialise acess to the 53c700
+	/* Use the host lock to serialise access to the 53c700
 	 * hardware.  Note: In future, we may need to take the queue
 	 * lock to enter the done routines.  When that happens, we
 	 * need to ensure that for this driver, the host lock and the
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index cdbdec9f4fb2..83986ed86556 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -526,10 +526,10 @@ struct aac_driver_ident
 
 /*
  *	The adapter interface specs all queues to be located in the same
- *	physically contigous block. The host structure that defines the
+ *	physically contiguous block. The host structure that defines the
  *	commuication queues will assume they are each a separate physically
- *	contigous memory region that will support them all being one big
- *	contigous block.
+ *	contiguous memory region that will support them all being one big
+ *	contiguous block.
  *	There is a command and response queue for each level and direction of
  *	commuication. These regions are accessed by both the host and adapter.
  */
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index d598eba630d0..666d5151d628 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -226,7 +226,7 @@ static int aac_comm_init(struct aac_dev * dev)
 	spin_lock_init(&dev->fib_lock);
 
 	/*
-	 *	Allocate the physically contigous space for the commuication
+	 *	Allocate the physically contiguous space for the commuication
 	 *	queue headers. 
 	 */
 
diff --git a/drivers/scsi/aic7xxx/aic79xx.seq b/drivers/scsi/aic7xxx/aic79xx.seq
index 3b66b5ae3d9f..2fb78e35a9e5 100644
--- a/drivers/scsi/aic7xxx/aic79xx.seq
+++ b/drivers/scsi/aic7xxx/aic79xx.seq
@@ -217,7 +217,7 @@ BEGIN_CRITICAL;
 scbdma_tohost_done:
 	test	CCSCBCTL, CCARREN jz fill_qoutfifo_dmadone;
 	/*
-	 * An SCB has been succesfully uploaded to the host.
+	 * An SCB has been successfully uploaded to the host.
 	 * If the SCB was uploaded for some reason other than
 	 * bad SCSI status (currently only for underruns), we
 	 * queue the SCB for normal completion.  Otherwise, we
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 63b521d615f2..4d419c155ce9 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -2487,7 +2487,7 @@ ahd_handle_scsiint(struct ahd_softc *ahd, u_int intstat)
 		/*
 		 * Although the driver does not care about the
 		 * 'Selection in Progress' status bit, the busy
-		 * LED does.  SELINGO is only cleared by a sucessfull
+		 * LED does.  SELINGO is only cleared by a successfull
 		 * selection, so we must manually clear it to insure
 		 * the LED turns off just incase no future successful
 		 * selections occur (e.g. no devices on the bus).
diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
index 8dfb59d58992..45aa728a76b2 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
@@ -1733,7 +1733,7 @@ ahc_handle_scsiint(struct ahc_softc *ahc, u_int intstat)
 		/*
 		 * Although the driver does not care about the
 		 * 'Selection in Progress' status bit, the busy
-		 * LED does.  SELINGO is only cleared by a sucessfull
+		 * LED does.  SELINGO is only cleared by a successfull
 		 * selection, so we must manually clear it to insure
 		 * the LED turns off just incase no future successful
 		 * selections occur (e.g. no devices on the bus).
diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h
index a000bc4e2d4a..bf320412ee24 100644
--- a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h
+++ b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h
@@ -61,7 +61,7 @@ enum bfa_pport_speed {
  * 		Port operational type (in sync with SNIA port type).
  */
 enum bfa_pport_type {
-	BFA_PPORT_TYPE_UNKNOWN = 1,	/*  port type is unkown */
+	BFA_PPORT_TYPE_UNKNOWN = 1,	/*  port type is unknown */
 	BFA_PPORT_TYPE_TRUNKED = 2,	/*  Trunked mode */
 	BFA_PPORT_TYPE_NPORT   = 5,	/*  P2P with switched fabric */
 	BFA_PPORT_TYPE_NLPORT  = 6,	/*  public loop */
diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h
index 31881d218515..ade763dbc8ce 100644
--- a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h
+++ b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h
@@ -25,7 +25,7 @@
  * Temperature sensor status values
  */
 enum bfa_tsensor_status {
-	BFA_TSENSOR_STATUS_UNKNOWN   = 1,   /*  unkown status */
+	BFA_TSENSOR_STATUS_UNKNOWN   = 1,   /*  unknown status */
 	BFA_TSENSOR_STATUS_FAULTY    = 2,   /*  sensor is faulty */
 	BFA_TSENSOR_STATUS_BELOW_MIN = 3,   /*  temperature below mininum */
 	BFA_TSENSOR_STATUS_NOMINAL   = 4,   /*  normal temperature */
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index a0e7e711ff9d..5be67a6fca93 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -834,7 +834,7 @@ static int hptiop_reset_hba(struct hptiop_hba *hba)
 			atomic_read(&hba->resetting) == 0, 60 * HZ);
 
 	if (atomic_read(&hba->resetting)) {
-		/* IOP is in unkown state, abort reset */
+		/* IOP is in unknown state, abort reset */
 		printk(KERN_ERR "scsi%d: reset failed\n", hba->host->host_no);
 		return -1;
 	}
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index bd2f77197447..6486ae4591b8 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -56,7 +56,7 @@
  * at the same time.
  *
  * When discovery succeeds or fails a callback is made to the lport as
- * notification. Currently, succesful discovery causes the lport to take no
+ * notification. Currently, successful discovery causes the lport to take no
  * action. A failure will cause the lport to reset. There is likely a circular
  * locking problem with this implementation.
  */
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 2e0746d70303..ca25ee5190b0 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1004,7 +1004,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
  * iscsi_tcp_task_xmit - xmit normal PDU task
  * @task: iscsi command task
  *
- * We're expected to return 0 when everything was transmitted succesfully,
+ * We're expected to return 0 when everything was transmitted successfully,
  * -EAGAIN if there's still data in the queue, or != 0 for any other kind
  * of error.
  */
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index e1a30a16a9fa..9bd19aa14249 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -654,7 +654,7 @@ lpfc_selective_reset(struct lpfc_hba *phba)
  * Notes:
  * Assumes any error from lpfc_selective_reset() will be negative.
  * If lpfc_selective_reset() returns zero then the length of the buffer
- * is returned which indicates succcess
+ * is returned which indicates success
  *
  * Returns:
  * -EINVAL if the buffer does not contain the string "selective"
@@ -3147,7 +3147,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
  * sysfs_ctlreg_read - Read method for reading from ctlreg
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
- * @buf: if succesful contains the data from the adapter IOREG space.
+ * @buf: if successful contains the data from the adapter IOREG space.
  * @off: offset into buffer to beginning of data.
  * @count: bytes to transfer.
  *
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 45337cd23feb..a14ab4580d4e 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -802,7 +802,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	/* FLOGI completes successfully */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0101 FLOGI completes sucessfully "
+			 "0101 FLOGI completes successfully "
 			 "Data: x%x x%x x%x x%x\n",
 			 irsp->un.ulpWord[4], sp->cmn.e_d_tov,
 			 sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution);
@@ -4133,7 +4133,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 	/* Indicate we are walking fc_rscn_id_list on this vport */
 	vport->fc_rscn_flush = 1;
 	spin_unlock_irq(shost->host_lock);
-	/* Get the array count after sucessfully have the token */
+	/* Get the array count after successfully have the token */
 	rscn_cnt = vport->fc_rscn_id_cnt;
 	/* If we are already processing an RSCN, save the received
 	 * RSCN payload buffer, cmdiocb->context2 to process later.
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 562d8cee874b..82f8ab5c72cd 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -645,7 +645,7 @@ lpfc_hba_down_prep(struct lpfc_hba *phba)
  * down the SLI Layer.
  *
  * Return codes
- *   0 - sucess.
+ *   0 - success.
  *   Any other value - error.
  **/
 static int
@@ -700,7 +700,7 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba)
  * down the SLI Layer.
  *
  * Return codes
- *   0 - sucess.
+ *   0 - success.
  *   Any other value - error.
  **/
 static int
@@ -755,7 +755,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
  * uninitialization after the HBA is reset when bring down the SLI Layer.
  *
  * Return codes
- *   0 - sucess.
+ *   0 - success.
  *   Any other value - error.
  **/
 int
@@ -1254,7 +1254,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
  * routine from the API jump table function pointer from the lpfc_hba struct.
  *
  * Return codes
- *   0 - sucess.
+ *   0 - success.
  *   Any other value - error.
  **/
 void
@@ -3124,7 +3124,7 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
  * PCI devices.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -3220,7 +3220,7 @@ lpfc_reset_hba(struct lpfc_hba *phba)
  * support the SLI-3 HBA device it attached to.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -3321,7 +3321,7 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
  * support the SLI-4 HBA device it attached to.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -3642,7 +3642,7 @@ lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
  * device specific resource setup to support the HBA device it attached to.
  *
  * Return codes
- *	0 - sucessful
+ *	0 - successful
  *	other values - error
  **/
 static int
@@ -3688,7 +3688,7 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
  * device specific resource setup to support the HBA device it attached to.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -3753,7 +3753,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba)
  * list and set up the IOCB tag array accordingly.
  *
  * Return codes
- *	0 - sucessful
+ *	0 - successful
  *	other values - error
  **/
 static int
@@ -3872,7 +3872,7 @@ lpfc_free_active_sgl(struct lpfc_hba *phba)
  * list and set up the sgl xritag tag array accordingly.
  *
  * Return codes
- *	0 - sucessful
+ *	0 - successful
  *	other values - error
  **/
 static int
@@ -3986,7 +3986,7 @@ out_free_mem:
  * enabled and the driver is reinitializing the device.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -4146,7 +4146,7 @@ lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *phba)
  * PCI device data structure is set.
  *
  * Return codes
- *      pointer to @phba - sucessful
+ *      pointer to @phba - successful
  *      NULL - error
  **/
 static struct lpfc_hba *
@@ -4202,7 +4202,7 @@ lpfc_hba_free(struct lpfc_hba *phba)
  * host with it.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      other values - error
  **/
 static int
@@ -4365,7 +4365,7 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
  * with SLI-3 interface spec.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -4662,7 +4662,7 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf)
  * this routine.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - could not allocated memory.
  **/
 static int
@@ -4761,7 +4761,7 @@ lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba)
  * allocation for the port.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -4861,7 +4861,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
  * HBA consistent with the SLI-4 interface spec.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -4910,7 +4910,7 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
  * we just use some constant number as place holder.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -5218,7 +5218,7 @@ out_error:
  * operation.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -5286,7 +5286,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
  * operation.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -5552,7 +5552,7 @@ out_error:
  * operation.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -5599,7 +5599,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
  * Later, this can be used for all the slow-path events.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      -ENOMEM - No availble memory
  **/
 static int
@@ -5760,7 +5760,7 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
  * all resources assigned to the PCI function which originates this request.
  *
  * Return codes
- *      0 - sucessful
+ *      0 - successful
  *      ENOMEM - No availble memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -5923,7 +5923,7 @@ lpfc_sli4_fcfi_unreg(struct lpfc_hba *phba, uint16_t fcfi)
  * with SLI-4 interface spec.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -6052,7 +6052,7 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
  * will be left with MSI-X enabled and leaks its vectors.
  *
  * Return codes
- *   0 - sucessful
+ *   0 - successful
  *   other values - error
  **/
 static int
@@ -6184,7 +6184,7 @@ lpfc_sli_disable_msix(struct lpfc_hba *phba)
  * is done in this function.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  */
 static int
@@ -6243,7 +6243,7 @@ lpfc_sli_disable_msi(struct lpfc_hba *phba)
  * MSI-X -> MSI -> IRQ.
  *
  * Return codes
- *   0 - sucessful
+ *   0 - successful
  *   other values - error
  **/
 static uint32_t
@@ -6333,7 +6333,7 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
  * enabled and leaks its vectors.
  *
  * Return codes
- * 0 - sucessful
+ * 0 - successful
  * other values - error
  **/
 static int
@@ -6443,7 +6443,7 @@ lpfc_sli4_disable_msix(struct lpfc_hba *phba)
  * which is done in this function.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static int
@@ -6508,7 +6508,7 @@ lpfc_sli4_disable_msi(struct lpfc_hba *phba)
  * MSI-X -> MSI -> IRQ.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	other values - error
  **/
 static uint32_t
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 43cbe336f1f8..42d4f3dae1d6 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1794,7 +1794,7 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
 		 */
 		if (lpfc_sli_chk_mbx_command(pmbox->mbxCommand) ==
 		    MBX_SHUTDOWN) {
-			/* Unknow mailbox command compl */
+			/* Unknown mailbox command compl */
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 					"(%d):0323 Unknown Mailbox command "
 					"x%x (x%x) Cmpl\n",
@@ -4163,7 +4163,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba,
  * addition, this routine gets the port vpd data.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - could not allocated memory.
  **/
 static int
@@ -11091,7 +11091,7 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba)
  * sequential.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  *      EIO - The mailbox failed to complete successfully.
  * 	When this error occurs, the driver is not guaranteed
  *	to have any rpi regions posted to the device and
@@ -11129,7 +11129,7 @@ lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *phba)
  * maps up to 64 rpi context regions.
  *
  * Return codes
- * 	0 - sucessful
+ * 	0 - successful
  * 	ENOMEM - No available memory
  *      EIO - The mailbox failed to complete successfully.
  **/
@@ -11191,7 +11191,7 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
  * PAGE_SIZE modulo 64 rpi context headers.
  *
  * Returns
- * 	A nonzero rpi defined as rpi_base <= rpi < max_rpi if sucessful
+ * 	A nonzero rpi defined as rpi_base <= rpi < max_rpi if successful
  * 	LPFC_RPI_ALLOC_ERROR if no rpis are available.
  **/
 int
diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h
index 512c2cc1a33f..d310f49d077e 100644
--- a/drivers/scsi/megaraid.h
+++ b/drivers/scsi/megaraid.h
@@ -381,7 +381,7 @@ typedef struct {
 	u8	battery_status;	/*
 				 * BIT 0: battery module missing
 				 * BIT 1: VBAD
-				 * BIT 2: temprature high
+				 * BIT 2: temperature high
 				 * BIT 3: battery pack missing
 				 * BIT 4,5:
 				 *   00 - charge complete
diff --git a/drivers/scsi/megaraid/mbox_defs.h b/drivers/scsi/megaraid/mbox_defs.h
index b25b74764ec3..ce2487a888ed 100644
--- a/drivers/scsi/megaraid/mbox_defs.h
+++ b/drivers/scsi/megaraid/mbox_defs.h
@@ -497,7 +497,7 @@ typedef struct {
  * @inserted_drive		: channel:Id of inserted drive
  * @battery_status		: bit 0: battery module missing
  *				bit 1: VBAD
- *				bit 2: temprature high
+ *				bit 2: temperature high
  *				bit 3: battery pack missing
  *				bit 4,5:
  *					00 - charge complete
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 234f0b7eb21c..f9ae8037a710 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -2704,7 +2704,7 @@ megaraid_reset_handler(struct scsi_cmnd *scp)
 	}
 	else {
 		con_log(CL_ANN, (KERN_NOTICE
-		"megaraid mbox: reset sequence completed sucessfully\n"));
+		"megaraid mbox: reset sequence completed successfully\n"));
 	}
 
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 86ab32d7ab15..756e509d495c 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -2894,7 +2894,7 @@ _scsih_normalize_sense(char *sense_buffer, struct sense_info *data)
 
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
 /**
- * _scsih_scsi_ioc_info - translated non-succesfull SCSI_IO request
+ * _scsih_scsi_ioc_info - translated non-successfull SCSI_IO request
  * @ioc: per adapter object
  * @scmd: pointer to scsi command object
  * @mpi_reply: reply mf payload returned from firmware
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index e3c482aa87b5..a2d569828308 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -6495,7 +6495,7 @@ static void ncr_int_ma (struct ncb *np)
 	**	we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids 
 	**	bloat for such a should_not_happen situation).
 	**	In all other situation, we reset the BUS.
-	**	Are these assumptions reasonnable ? (Wait and see ...)
+	**	Are these assumptions reasonable ? (Wait and see ...)
 	*/
 unexpected_phase:
 	dsp -= 8;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index f7c70e2a8224..d3d39f86fcf7 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3342,7 +3342,7 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode)
  * @direction : data transfer direction
  *
  * Return value
- *  0 on sucess, non-zero error code on failure
+ *  0 on success, non-zero error code on failure
  */
 static int pmcraid_build_passthrough_ioadls(
 	struct pmcraid_cmd *cmd,
@@ -3401,7 +3401,7 @@ static int pmcraid_build_passthrough_ioadls(
  * @direction: data transfer direction
  *
  * Return value
- *  0 on sucess, non-zero error code on failure
+ *  0 on success, non-zero error code on failure
  */
 static void pmcraid_release_passthrough_ioadls(
 	struct pmcraid_cmd *cmd,
@@ -3429,7 +3429,7 @@ static void pmcraid_release_passthrough_ioadls(
  * @arg: pointer to pmcraid_passthrough_buffer user buffer
  *
  * Return value
- *  0 on sucess, non-zero error code on failure
+ *  0 on success, non-zero error code on failure
  */
 static long pmcraid_ioctl_passthrough(
 	struct pmcraid_instance *pinstance,
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 3441b3f90827..2752b56cad56 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -771,11 +771,11 @@ static struct pmcraid_ioasc_error pmcraid_ioasc_error_table[] = {
 	{0x01180600, IOASC_LOG_LEVEL_MUST,
 	 "Recovered Error, soft media error, sector reassignment suggested"},
 	{0x015D0000, IOASC_LOG_LEVEL_MUST,
-	 "Recovered Error, failure prediction thresold exceeded"},
+	 "Recovered Error, failure prediction threshold exceeded"},
 	{0x015D9200, IOASC_LOG_LEVEL_MUST,
-	 "Recovered Error, soft Cache Card Battery error thresold"},
+	 "Recovered Error, soft Cache Card Battery error threshold"},
 	{0x015D9200, IOASC_LOG_LEVEL_MUST,
-	 "Recovered Error, soft Cache Card Battery error thresold"},
+	 "Recovered Error, soft Cache Card Battery error threshold"},
 	{0x02048000, IOASC_LOG_LEVEL_MUST,
 	 "Not Ready, IOA Reset Required"},
 	{0x02408500, IOASC_LOG_LEVEL_MUST,
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 723fdecd91bd..0fd6ae6911ad 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(scsi_nl_send_transport_msg);
  * @data_buf:		pointer to vendor unique data buffer
  *
  * Returns:
- *   0 on succesful return
+ *   0 on successful return
  *   otherwise, failing error code
  *
  * Notes:
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index fd47cb1bee1b..f27e52d963d3 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -666,7 +666,7 @@ EXPORT_SYMBOL(sas_phy_add);
  *
  * Note:
  *   This function must only be called on a PHY that has not
- *   sucessfully been added using sas_phy_add().
+ *   successfully been added using sas_phy_add().
  */
 void sas_phy_free(struct sas_phy *phy)
 {
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(sas_port_add);
  *
  * Note:
  *   This function must only be called on a PORT that has not
- *   sucessfully been added using sas_port_add().
+ *   successfully been added using sas_port_add().
  */
 void sas_port_free(struct sas_port *port)
 {
@@ -1476,7 +1476,7 @@ EXPORT_SYMBOL(sas_rphy_add);
  *
  * Note:
  *   This function must only be called on a remote
- *   PHY that has not sucessfully been added using
+ *   PHY that has not successfully been added using
  *   sas_rphy_add() (or has been sas_rphy_remove()'d)
  */
 void sas_rphy_free(struct sas_rphy *rphy)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 45374d66d26a..2b38f6ad6e11 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -1864,7 +1864,7 @@ static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev)
  *
  * This routine is similar to sym_set_workarounds(), except
  * that, at this point, we already know that the device was
- * succesfully intialized at least once before, and so most
+ * successfully intialized at least once before, and so most
  * of the steps taken there are un-needed here.
  */
 static void sym2_reset_workarounds(struct pci_dev *pdev)
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 297deb817a5d..a7bc8b7b09ac 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -2692,7 +2692,7 @@ static void sym_int_ma (struct sym_hcb *np)
 	 *  we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids 
 	 *  bloat for such a should_not_happen situation).
 	 *  In all other situation, we reset the BUS.
-	 *  Are these assumptions reasonnable ? (Wait and see ...)
+	 *  Are these assumptions reasonable ? (Wait and see ...)
 	 */
 unexpected_phase:
 	dsp -= 8;
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h
index 053e63c86822..5a80cbac3f92 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.h
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h
@@ -54,7 +54,7 @@
  *
  *    SYM_OPT_LIMIT_COMMAND_REORDERING
  *        When this option is set, the driver tries to limit tagged 
- *        command reordering to some reasonnable value.
+ *        command reordering to some reasonable value.
  *        (set for Linux)
  */
 #if 0
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index d71dfe398940..36ede02ceacf 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -361,9 +361,9 @@ static const struct pnp_device_id pnp_dev_table[] = {
 	{	"LTS0001",		0       },
 	/* Rockwell's (PORALiNK) 33600 INT PNP */
 	{	"WCI0003",		0	},
-	/* Unkown PnP modems */
+	/* Unknown PnP modems */
 	{	"PNPCXXX",		UNKNOWN_DEV	},
-	/* More unkown PnP modems */
+	/* More unknown PnP modems */
 	{	"PNPDXXX",		UNKNOWN_DEV	},
 	{	"",			0	}
 };
diff --git a/drivers/serial/pmac_zilog.h b/drivers/serial/pmac_zilog.h
index 570b0d925e83..f6e77f12acd5 100644
--- a/drivers/serial/pmac_zilog.h
+++ b/drivers/serial/pmac_zilog.h
@@ -73,7 +73,7 @@ static inline struct uart_pmac_port *pmz_get_port_A(struct uart_pmac_port *uap)
 }
 
 /*
- * Register acessors. Note that we don't need to enforce a recovery
+ * Register accessors. Note that we don't need to enforce a recovery
  * delay on PCI PowerMac hardware, it's dealt in HW by the MacIO chip,
  * though if we try to use this driver on older machines, we might have
  * to add it back
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 0c08f286a2ef..46de564aaea0 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -313,7 +313,7 @@ static void qe_uart_stop_tx(struct uart_port *port)
  * This function will attempt to stuff of all the characters from the
  * kernel's transmit buffer into TX BDs.
  *
- * A return value of non-zero indicates that it sucessfully stuffed all
+ * A return value of non-zero indicates that it successfully stuffed all
  * characters from the kernel buffer.
  *
  * A return value of zero indicates that there are still characters in the
diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index 40de151f2789..e89304c72568 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -4190,7 +4190,7 @@ static void ixj_aec_start(IXJ *j, int level)
 				ixj_WriteDSPCommand(0x1224, j);
 
 			ixj_WriteDSPCommand(0xE014, j);
-			ixj_WriteDSPCommand(0x0003, j);	/* Lock threashold at 3dB */
+			ixj_WriteDSPCommand(0x0003, j);	/* Lock threshold at 3dB */
 
 			ixj_WriteDSPCommand(0xE338, j);	/* Set Echo Suppresser Attenuation to 0dB */
 
@@ -4235,7 +4235,7 @@ static void ixj_aec_start(IXJ *j, int level)
 				ixj_WriteDSPCommand(0x1224, j);
 
 			ixj_WriteDSPCommand(0xE014, j);
-			ixj_WriteDSPCommand(0x0003, j);	/* Lock threashold at 3dB */
+			ixj_WriteDSPCommand(0x0003, j);	/* Lock threshold at 3dB */
 
 			ixj_WriteDSPCommand(0xE338, j);	/* Set Echo Suppresser Attenuation to 0dB */
 
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index d171b563e94c..bba4d3eabe0f 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -1958,7 +1958,7 @@ static void uea_dispatch_cmv_e1(struct uea_softc *sc, struct intr_pkt *intr)
 		goto bad1;
 
 	/* FIXME : ADI930 reply wrong preambule (func = 2, sub = 2) to
-	 * the first MEMACESS cmv. Ignore it...
+	 * the first MEMACCESS cmv. Ignore it...
 	 */
 	if (cmv->bFunction != dsc->function) {
 		if (UEA_CHIP_VERSION(sc) == ADI930
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 2473cf0c6b1d..b4bd2411c666 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1,5 +1,5 @@
 /**
- * drivers/usb/class/usbtmc.c - USB Test & Measurment class driver
+ * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver
  *
  * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany
  * Copyright (C) 2008 Novell, Inc.
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index da718e84d58d..e80f1af438c8 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1890,7 +1890,7 @@ static void cancel_async_set_config(struct usb_device *udev)
  * routine gets around the normal restrictions by using a work thread to
  * submit the change-config request.
  *
- * Returns 0 if the request was succesfully queued, error code otherwise.
+ * Returns 0 if the request was successfully queued, error code otherwise.
  * The caller has no way to know whether the queued request will eventually
  * succeed.
  */
diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c
index 7953948bfe4a..4e3657808b0f 100644
--- a/drivers/usb/gadget/f_acm.c
+++ b/drivers/usb/gadget/f_acm.c
@@ -432,7 +432,7 @@ static void acm_disable(struct usb_function *f)
  * @length: size of data
  * Context: irqs blocked, acm->lock held, acm_notify_req non-null
  *
- * Returns zero on sucess or a negative errno.
+ * Returns zero on success or a negative errno.
  *
  * See section 6.3.5 of the CDC 1.1 specification for information
  * about the only notification we issue:  SerialState change.
diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c
index 1937d8c7b433..adda1208a1ec 100644
--- a/drivers/usb/gadget/pxa27x_udc.c
+++ b/drivers/usb/gadget/pxa27x_udc.c
@@ -1524,7 +1524,7 @@ static int pxa_udc_get_frame(struct usb_gadget *_gadget)
  * pxa_udc_wakeup - Force udc device out of suspend
  * @_gadget: usb gadget
  *
- * Returns 0 if succesfull, error code otherwise
+ * Returns 0 if successfull, error code otherwise
  */
 static int pxa_udc_wakeup(struct usb_gadget *_gadget)
 {
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index 62a226b61670..00a29855d0c4 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -627,7 +627,7 @@ irqreturn_t fhci_irq(struct usb_hcd *hcd)
 
 
 /*
- * Process normal completions(error or sucess) and clean the schedule.
+ * Process normal completions(error or success) and clean the schedule.
  *
  * This is the main path for handing urbs back to drivers. The only other patth
  * is process_del_list(),which unlinks URBs by scanning EDs,instead of scanning
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 9ec7fd5da489..9579cf4c38bf 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -111,7 +111,7 @@ struct aes_ccm_b1 {
  *
  * CCM uses Ax blocks to generate a keystream with which the MIC and
  * the message's payload are encoded. A0 always encrypts/decrypts the
- * MIC. Ax (x>0) are used for the sucesive payload blocks.
+ * MIC. Ax (x>0) are used for the successive payload blocks.
  *
  * The x is the counter, and is increased for each block.
  */
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
index 613a5fc490d3..489b47833e2c 100644
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ b/drivers/usb/wusbcore/wa-xfer.c
@@ -558,7 +558,7 @@ static void wa_seg_dto_cb(struct urb *urb)
 /*
  * Callback for the segment request
  *
- * If succesful transition state (unless already transitioned or
+ * If successful transition state (unless already transitioned or
  * outbound transfer); otherwise, take a note of the error, mark this
  * segment done and try completion.
  *
@@ -1364,7 +1364,7 @@ segment_aborted:
 /*
  * Callback for the IN data phase
  *
- * If succesful transition state; otherwise, take a note of the
+ * If successful transition state; otherwise, take a note of the
  * error, mark this segment done and try completion.
  *
  * Note we don't access until we are sure that the transfer hasn't
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index c7080d497311..0bb665a0c024 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -229,7 +229,7 @@ void i1480_usb_neep_cb(struct urb *urb)
  * will verify it.
  *
  * Set i1480->evt_result with the result of getting the event or its
- * size (if succesful).
+ * size (if successful).
  *
  * Delivers the data directly to i1480->evt_buf
  */
diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c
index 86a853b84119..7350ed6909f8 100644
--- a/drivers/uwb/wlp/txrx.c
+++ b/drivers/uwb/wlp/txrx.c
@@ -282,7 +282,7 @@ EXPORT_SYMBOL_GPL(wlp_receive_frame);
  *         and transmission will be done by the calling function.
  * @dst:   On return this will contain the device address to which the
  *         frame is destined.
- * @returns: 0 on success no tx : WLP header sucessfully applied to skb buffer,
+ * @returns: 0 on success no tx : WLP header successfully applied to skb buffer,
  *                                calling function can proceed with tx
  *           1 on success with tx : WLP will take over transmission of this
  *                                  frame
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 913b4a47ae52..1ddeb4c34763 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -3276,7 +3276,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 				txtformat = "24 bit interface";
 				break;
 			default:
-				txtformat = "unkown format";
+				txtformat = "unknown format";
 			}
 		} else {
 			switch (format & 7) {
@@ -3299,7 +3299,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 				txtformat = "262144 colours (FDPI-2 mode)";
 				break;
 			default:
-				txtformat = "unkown format";
+				txtformat = "unknown format";
 			}
 		}
 		PRINTKI("%s%s %s monitor detected: %s\n",
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index 505c0823a105..2cf7ba52f67c 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -158,7 +158,7 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 			goto err_free_pwm;
 		}
 
-		/* Turn display off by defatult. */
+		/* Turn display off by default. */
 		retval = gpio_direction_output(pwmbl->gpio_on,
 				0 ^ pdata->on_active_low);
 		if (retval)
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 50ec17dfc517..fa32b94a4546 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -177,7 +177,7 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi)
 	if (!data)
 		return -ENOMEM;
 
-	data->is_vga = true; /* defaut to VGA mode */
+	data->is_vga = true; /* default to VGA mode */
 
 	/*
 	 * bits_per_word cannot be configured in platform data
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index 857b3668b3ba..6468a297e341 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -436,7 +436,7 @@ sti_init_glob_cfg(struct sti_struct *sti,
 			    (offs < PCI_BASE_ADDRESS_0 ||
 			     offs > PCI_BASE_ADDRESS_5)) {
 				printk (KERN_WARNING
-					"STI pci region maping for region %d (%02x) can't be mapped\n",
+					"STI pci region mapping for region %d (%02x) can't be mapped\n",
 					i,sti->rm_entry[i]);
 				continue;
 			}
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index 1a83709f9611..492e6e64b653 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -701,7 +701,7 @@ static int gbefb_set_par(struct fb_info *info)
 	   blocks of 512x128, 256x128 or 128x128 pixels, respectively for 8bit,
 	   16bit and 32 bit modes (64 kB). They cover the screen with partial
 	   tiles on the right and/or bottom of the screen if needed.
-	   For exemple in 640x480 8 bit mode the mapping is:
+	   For example in 640x480 8 bit mode the mapping is:
 
 	   <-------- 640 ----->
 	   <---- 512 ----><128|384 offscreen>
diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c
index 6120f0c526fe..876648e15e9d 100644
--- a/drivers/video/stifb.c
+++ b/drivers/video/stifb.c
@@ -756,9 +756,9 @@ hyperResetPlanes(struct stifb_info *fb, int enable)
 		if (fb->info.var.bits_per_pixel == 32)
 			controlPlaneReg = 0x04000F00;
 		else
-			controlPlaneReg = 0x00000F00;   /* 0x00000800 should be enought, but lets clear all 4 bits */
+			controlPlaneReg = 0x00000F00;   /* 0x00000800 should be enough, but lets clear all 4 bits */
 	else
-		controlPlaneReg = 0x00000F00; /* 0x00000100 should be enought, but lets clear all 4 bits */
+		controlPlaneReg = 0x00000F00; /* 0x00000100 should be enough, but lets clear all 4 bits */
 
 	switch (enable) {
 	case ENABLE:
diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index ff43c8885028..980548390048 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -52,7 +52,7 @@
  *
  * 0.1.3 (released 1999-11-02)	added Attila's panning support, code
  *				reorg, hwcursor address page size alignment
- *				(for mmaping both frame buffer and regs),
+ *				(for mmapping both frame buffer and regs),
  *				and my changes to get rid of hardcoded
  *				VGA i/o register locations (uses PCI
  *				configuration info now)
diff --git a/drivers/video/via/dvi.c b/drivers/video/via/dvi.c
index c5c32b6b6e6c..67b36932212b 100644
--- a/drivers/video/via/dvi.c
+++ b/drivers/video/via/dvi.c
@@ -467,7 +467,7 @@ static int dvi_get_panel_size_from_DDCv1(void)
 	default:
 		viaparinfo->tmds_setting_info->dvi_panel_size =
 			VIA_RES_1024X768;
-		DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d !\
+		DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d !\
 					 set default panel size.\n", max_h);
 		break;
 	}
@@ -534,7 +534,7 @@ static int dvi_get_panel_size_from_DDCv2(void)
 	default:
 		viaparinfo->tmds_setting_info->dvi_panel_size =
 			VIA_RES_1024X768;
-		DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d!\
+		DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d!\
 					set default panel size.\n", HSize);
 		break;
 	}
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 3df17dc8c3d7..65ccd215d496 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -446,7 +446,7 @@ static int vt8623fb_set_par(struct fb_info *info)
 
 	svga_wseq_mask(0x1E, 0xF0, 0xF0); // DI/DVP bus
 	svga_wseq_mask(0x2A, 0x0F, 0x0F); // DI/DVP bus
-	svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read treshold
+	svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read threshold
 	vga_wseq(NULL, 0x17, 0x1F);       // FIFO depth
 	vga_wseq(NULL, 0x18, 0x4E);
 	svga_wseq_mask(0x1A, 0x08, 0x08); // enable MMIO ?
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index 381026c0bd7b..923cc68dba26 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -508,7 +508,7 @@ void coh901327_watchdog_reset(void)
 	 * deactivating the watchdog before it is shut down by it.
 	 *
 	 * NOTE: on future versions of the watchdog, this restriction is
-	 * gone: the watchdog will be reloaded with a defaul value (1 min)
+	 * gone: the watchdog will be reloaded with a default value (1 min)
 	 * instead of last value, and you can conveniently set the watchdog
 	 * timeout to 10ms (value = 1) without any problems.
 	 */
diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c
index b6b3f59ab446..47d719717a3b 100644
--- a/drivers/watchdog/machzwd.c
+++ b/drivers/watchdog/machzwd.c
@@ -21,7 +21,7 @@
  *      wd#1 - 2 seconds;
  *      wd#2 - 7.2 ms;
  *  After the expiration of wd#1, it can generate a NMI, SCI, SMI, or
- *  a system RESET and it starts wd#2 that unconditionaly will RESET
+ *  a system RESET and it starts wd#2 that unconditionally will RESET
  *  the system when the counter reaches zero.
  *
  *  14-Dec-2001 Matt Domsch <Matt_Domsch@dell.com>
diff --git a/drivers/watchdog/wdrtas.c b/drivers/watchdog/wdrtas.c
index 3bde56bce63a..5bfb1f2c5319 100644
--- a/drivers/watchdog/wdrtas.c
+++ b/drivers/watchdog/wdrtas.c
@@ -542,7 +542,7 @@ static struct notifier_block wdrtas_notifier = {
 /**
  * wdrtas_get_tokens - reads in RTAS tokens
  *
- * returns 0 on succes, <0 on failure
+ * returns 0 on success, <0 on failure
  *
  * wdrtas_get_tokens reads in the tokens for the RTAS calls used in
  * this watchdog driver. It tolerates, if "get-sensor-state" and
@@ -598,7 +598,7 @@ static void wdrtas_unregister_devs(void)
 /**
  * wdrtas_register_devs - registers the misc dev handlers
  *
- * returns 0 on succes, <0 on failure
+ * returns 0 on success, <0 on failure
  *
  * wdrtas_register_devs registers the watchdog and temperature watchdog
  * misc devs
@@ -630,7 +630,7 @@ static int wdrtas_register_devs(void)
 /**
  * wdrtas_init - init function of the watchdog driver
  *
- * returns 0 on succes, <0 on failure
+ * returns 0 on success, <0 on failure
  *
  * registers the file handlers and the reboot notifier
  */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb51b1e4..d15ea1790bfb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -767,7 +767,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	
 	current->mm->start_stack = bprm->p;
 
-	/* Now we do a little grungy work by mmaping the ELF image into
+	/* Now we do a little grungy work by mmapping the ELF image into
 	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
diff --git a/fs/bio.c b/fs/bio.c
index 12da5db8682c..2bd671a08e3b 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(bio_init);
  *   for a &struct bio to become free. If a %NULL @bs is passed in, we will
  *   fall back to just using @kmalloc to allocate the required memory.
  *
- *   Note that the caller must set ->bi_destructor on succesful return
+ *   Note that the caller must set ->bi_destructor on successful return
  *   of a bio, to do the appropriate freeing of the bio once the reference
  *   count drops to zero.
  **/
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2c726b7b9faa..6815d2a84b94 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -256,7 +256,7 @@ out:
  * Insert @em into @tree or perform a simple forward/backward merge with
  * existing mappings.  The extent_map struct passed in will be inserted
  * into the tree directly, with an additional reference taken, or a
- * reference dropped if the merge attempt was sucessfull.
+ * reference dropped if the merge attempt was successfull.
  */
 int add_extent_mapping(struct extent_map_tree *tree,
 		       struct extent_map *em)
diff --git a/fs/cifs/README b/fs/cifs/README
index 79c1a93400be..a727b7cb075f 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -423,7 +423,7 @@ A partial list of the supported mount options follows:
 		source name to use to represent the client netbios machine 
 		name when doing the RFC1001 netbios session initialize.
   direct        Do not do inode data caching on files opened on this mount.
-		This precludes mmaping files on this mount. In some cases
+		This precludes mmapping files on this mount. In some cases
 		with fast networks and little or no caching benefits on the
 		client (e.g. when the application is doing large sequential
 		reads bigger than page size without rereading the same data) 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5d0fde18039c..4b35f7ec0583 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -39,7 +39,7 @@
 
 /*
  * MAX_REQ is the maximum number of requests that WE will send
- * on one socket concurently. It also matches the most common
+ * on one socket concurrently. It also matches the most common
  * value of max multiplex returned by servers.  We may
  * eventually want to use the negotiated value (in case
  * future servers can handle more) when we are more confident that
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 5e2492535daa..83580213fcac 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -917,8 +917,8 @@ undo_setattr:
 /*
  * If dentry->d_inode is null (usually meaning the cached dentry
  * is a negative dentry) then we would attempt a standard SMB delete, but
- * if that fails we can not attempt the fall back mechanisms on EACESS
- * but will return the EACESS to the caller.  Note that the VFS does not call
+ * if that fails we can not attempt the fall back mechanisms on EACCESS
+ * but will return the EACCESS to the caller. Note that the VFS does not call
  * unlink on negative dentries currently.
  */
 int cifs_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 224a1f478966..b6b6dcb500bf 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -371,7 +371,7 @@ E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
 	smbhash(p24 + 16, c8, p21 + 14, 1);
 }
 
-#if 0 /* currently unsued */
+#if 0 /* currently unused */
 static void
 D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
 {
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 16f682e26c07..b540aa5d1f61 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -143,7 +143,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(dlm_posix_lock);
 
-/* Returns failure iff a succesful lock operation should be canceled */
+/* Returns failure iff a successful lock operation should be canceled */
 static int dlm_plock_callback(struct plock_op *op)
 {
 	struct file *file;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 618ca95cbb59..0282ec78cf8f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2932,7 +2932,7 @@ retry:
 		ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
 					&mpd);
 		/*
-		 * If we have a contigous extent of pages and we
+		 * If we have a contiguous extent of pages and we
 		 * haven't done the I/O yet, map the blocks and submit
 		 * them for I/O.
 		 */
@@ -5370,7 +5370,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * worse case, the indexs blocks spread over different block groups
  *
  * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiugous, with flexbg,
+ * different block groups too. If they are contiuguous, with flexbg,
  * they could still across block group boundary.
  *
  * Also account for superblock, inode, quota and xattr blocks
@@ -5446,7 +5446,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824defa..74e495dabe09 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -142,7 +142,7 @@
  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
  * value of s_mb_order2_reqs can be tuned via
  * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
- * stripe size (sbi->s_stripe), we try to search for contigous block in
+ * stripe size (sbi->s_stripe), we try to search for contiguous block in
  * stripe size. This should result in better allocation on RAID setups. If
  * not, we search in the specific group using bitmap for best extents. The
  * tunable min_to_scan and max_to_scan control the behaviour here.
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f25e70c1b51c..f0294410868d 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -177,7 +177,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 		spin_unlock(&jffs2_compressor_list_lock);
 		break;
 	default:
-		printk(KERN_ERR "JFFS2: unknow compression mode.\n");
+		printk(KERN_ERR "JFFS2: unknown compression mode.\n");
 	}
  out:
 	if (ret == JFFS2_COMPR_NONE) {
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1a80301004b8..378991cfe40f 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -931,7 +931,7 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
  * Helper function for jffs2_get_inode_nodes().
  * The function detects whether more data should be read and reads it if yes.
  *
- * Returns: 0 on succes;
+ * Returns: 0 on success;
  * 	    negative error code on failure.
  */
 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 082e844ab2db..4b107881acd5 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
  *   is used to release xattr name/value pair and detach from c->xattrindex.
  * reclaim_xattr_datum(c)
  *   is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
- *   memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold 
+ *   memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold 
  *   is hard coded as 32KiB.
  * do_verify_xattr_datum(c, xd)
  *   is used to load the xdatum informations without name/value pair from the medium.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2bc7d8aa5740..d9b031cf69f5 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -755,7 +755,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	 * allocation group.
 	 */
 	if ((blkno & (bmp->db_agsize - 1)) == 0)
-		/* check if the AG is currenly being written to.
+		/* check if the AG is currently being written to.
 		 * if so, call dbNextAG() to find a non-busy
 		 * AG with sufficient free space.
 		 */
@@ -3337,7 +3337,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
 	for (i = 0, n = 0; i < agno; n++) {
 		bmp->db_agfree[n] = 0;	/* init collection point */
 
-		/* coalesce cotiguous k AGs; */
+		/* coalesce contiguous k AGs; */
 		for (j = 0; j < k && i < agno; j++, i++) {
 			/* merge AGi to AGn */
 			bmp->db_agfree[n] += bmp->db_agfree[i];
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 0d58caf4a6e1..ec8f45f12e05 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -835,7 +835,7 @@ static int ncp_ioctl_need_write(unsigned int cmd)
 	case NCP_IOC_SETROOT:
 		return 0;
 	default:
-		/* unkown IOCTL command, assume write */
+		/* unknown IOCTL command, assume write */
 		return 1;
 	}
 }
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 9669541d0119..08f7530e9341 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -927,7 +927,7 @@ lock_retry_remap:
 		return 0;
 
 	ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
-			"EOVERFLOW" : (!err ? "EIO" : "unkown error"));
+			"EOVERFLOW" : (!err ? "EIO" : "unknown error"));
 	return err < 0 ? err : -EIO;
 
 read_err:
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 663c0e341f8b..43179ddd336f 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -399,7 +399,7 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
  * @cached_page: allocated but as yet unused page
  * @lru_pvec:	lru-buffering pagevec of caller
  *
- * Obtain @nr_pages locked page cache pages from the mapping @maping and
+ * Obtain @nr_pages locked page cache pages from the mapping @mapping and
  * starting at index @index.
  *
  * If a page is newly created, increment its refcount and add it to the
@@ -1281,7 +1281,7 @@ rl_not_mapped_enoent:
 
 /*
  * Copy as much as we can into the pages and return the number of bytes which
- * were sucessfully copied.  If a fault is encountered then clear the pages
+ * were successfully copied.  If a fault is encountered then clear the pages
  * out to (ofs + bytes) and return the number of bytes which were copied.
  */
 static inline size_t ntfs_copy_from_user(struct page **pages,
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 89b02985c054..4dadcdf3d451 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -338,7 +338,7 @@ err_out:
  * copy of the complete multi sector transfer deprotected page.  On failure,
  * *@wrp is undefined.
  *
- * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current
+ * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current
  * logfile lsn according to this restart page.  On failure, *@lsn is undefined.
  *
  * The following error codes are defined:
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38a42f5d59ff..7c7198a5bc90 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2398,7 +2398,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  *
  * The array is assumed to be large enough to hold an entire path (tree depth).
  *
- * Upon succesful return from this function:
+ * Upon successful return from this function:
  *
  * - The 'right_path' array will contain a path to the leaf block
  *   whose range contains e_cpos.
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 83bcaf266b35..03ccf9a7b1f4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2586,7 +2586,7 @@ fail:
 	 * is complete everywhere.  if the target dies while this is
 	 * going on, some nodes could potentially see the target as the
 	 * master, so it is important that my recovery finds the migration
-	 * mle and sets the master to UNKNONWN. */
+	 * mle and sets the master to UNKNOWN. */
 
 
 	/* wait for new node to assert master */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 0d38d67194cb..c5e4a49e3a12 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1855,7 +1855,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
 		 * outstanding lock request, so a cancel convert is
 		 * required. We intentionally overwrite 'ret' - if the
 		 * cancel fails and the lock was granted, it's easier
-		 * to just bubble sucess back up to the user.
+		 * to just bubble success back up to the user.
 		 */
 		ret = ocfs2_flock_handle_signal(lockres, level);
 	} else if (!ret && (level > lockres->l_level)) {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 54c16b66327e..bf34c491ae96 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle,
 
 	default:
 		status = -EINVAL;
-		mlog(ML_ERROR, "Uknown access type!\n");
+		mlog(ML_ERROR, "Unknown access type!\n");
 	}
 	if (!status && ocfs2_meta_ecc(osb) && triggers)
 		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 60287fc56bcb..d00c658ca150 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2431,7 +2431,7 @@ out:
  * we gonna touch and whether we need to create new blocks.
  *
  * Normally the refcount blocks store these refcount should be
- * continguous also, so that we can get the number easily.
+ * contiguous also, so that we can get the number easily.
  * As for meta_ac, we will at most add split 2 refcount record and
  * 2 more refcount block, so just check it in a rough way.
  *
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index e1c0ec0ae989..082234581d05 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -85,7 +85,7 @@ out:
 }
 
 /*
- * Tries to allocate exactly one block.  Returns true if sucessful.
+ * Tries to allocate exactly one block.  Returns true if successful.
  */
 int omfs_allocate_block(struct super_block *sb, u64 block)
 {
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index f94ddf7efba0..868a55ee080f 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -23,7 +23,7 @@
 /*
  * This file implements functions needed to recover from unclean un-mounts.
  * When UBIFS is mounted, it checks a flag on the master node to determine if
- * an un-mount was completed sucessfully. If not, the process of mounting
+ * an un-mount was completed successfully. If not, the process of mounting
  * incorparates additional checking and fixing of on-flash data structures.
  * UBIFS always cleans away all remnants of an unclean un-mount, so that
  * errors do not accumulate. However UBIFS defers recovery if it is mounted
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 6533ead9b889..a2c16bcee90b 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -98,7 +98,7 @@ typedef struct xfs_dquot {
 #define dq_flags	q_lists.dqm_flags
 
 /*
- * Lock hierachy for q_qlock:
+ * Lock hierarchy for q_qlock:
  *	XFS_QLOCK_NORMAL is the implicit default,
  * 	XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
  */
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 4c8d0afae711..fb2d63f13f4c 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -47,7 +47,7 @@
 
 #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
 
-/* memmap is virtually contigious.  */
+/* memmap is virtually contiguous.  */
 #define __pfn_to_page(pfn)	(vmemmap + (pfn))
 #define __page_to_pfn(page)	(unsigned long)((page) - vmemmap)
 
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66acea95..7c38c147e5e6 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -631,7 +631,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
  * these are provided for both review and as a porting
  * help for the C library version.
 *
- * Last chance: are any of these important enought to
+ * Last chance: are any of these important enough to
  * enable by default?
  */
 #ifdef __ARCH_WANT_SYSCALL_NO_AT
diff --git a/include/linux/chio.h b/include/linux/chio.h
index 519248d8b2b6..d9bac7f97282 100644
--- a/include/linux/chio.h
+++ b/include/linux/chio.h
@@ -21,7 +21,7 @@
  *    query vendor-specific element types
  *
  *    accessing elements works by specifing type and unit of the element.
- *    for eample, storage elements are addressed with type = CHET_ST and
+ *    for example, storage elements are addressed with type = CHET_ST and
  *    unit = 0 .. cp_nslots-1
  *
  */
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index e5124ceea769..3402042ddc31 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -45,7 +45,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_CLEAR_INTERRUPT_REGISTER	0x01ffffff
 #define PCAP_MASK_ALL_INTERRUPT		0x01ffffff
 
-/* registers acessible by both pcap ports */
+/* registers accessible by both pcap ports */
 #define PCAP_REG_ISR		0x0	/* Interrupt Status */
 #define PCAP_REG_MSR		0x1	/* Interrupt Mask */
 #define PCAP_REG_PSTAT		0x2	/* Processor Status */
@@ -67,7 +67,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_REG_VENDOR_TEST1	0x1e
 #define PCAP_REG_VENDOR_TEST2	0x1f
 
-/* registers acessible by pcap port 1 only (a1200, e2 & e6) */
+/* registers accessible by pcap port 1 only (a1200, e2 & e6) */
 #define PCAP_REG_INT_SEL	0x3	/* Interrupt Select */
 #define PCAP_REG_SWCTRL		0x4	/* Switching Regulator Control */
 #define PCAP_REG_VREG1		0x5	/* Regulator Bank 1 Control */
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index d745f5b6c7b0..76e5053e1fac 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -30,7 +30,7 @@
 
 /*
  * use drive write caching -- we need deferred error handling to be
- * able to sucessfully recover with this option (drive will return good
+ * able to successfully recover with this option (drive will return good
  * status as soon as the cdb is validated).
  */
 #if defined(CONFIG_CDROM_PKTCDVD_WCACHE)
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 850db2e80510..cf9327c051ad 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -216,10 +216,10 @@
 
 #define UART_IIR_TOD	0x08	/* Character Timeout Indication Detected */
 
-#define UART_FCR_PXAR1	0x00	/* receive FIFO treshold = 1 */
-#define UART_FCR_PXAR8	0x40	/* receive FIFO treshold = 8 */
-#define UART_FCR_PXAR16	0x80	/* receive FIFO treshold = 16 */
-#define UART_FCR_PXAR32	0xc0	/* receive FIFO treshold = 32 */
+#define UART_FCR_PXAR1	0x00	/* receive FIFO threshold = 1 */
+#define UART_FCR_PXAR8	0x40	/* receive FIFO threshold = 8 */
+#define UART_FCR_PXAR16	0x80	/* receive FIFO threshold = 16 */
+#define UART_FCR_PXAR32	0xc0	/* receive FIFO threshold = 32 */
 
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b59e78c57161..dfd4745a955f 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -490,7 +490,7 @@ struct v4l2_jpegcompression {
 				 * you do, leave them untouched.
 				 * Inluding less markers will make the
 				 * resulting code smaller, but there will
-				 * be fewer aplications which can read it.
+				 * be fewer applications which can read it.
 				 * The presence of the APP and COM marker
 				 * is influenced by APP_len and COM_len
 				 * ONLY, not by this property! */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6e5f0e0c7967..ca4789e4f1e1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -988,7 +988,7 @@ struct sctp_transport {
 	int init_sent_count;
 
 	/* state       : The current state of this destination,
-	 *             : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN.
+	 *             : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKNOWN.
 	 */
 	int state;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c703377..1827e7f217d1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1260,7 +1260,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
 	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
  * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static inline bool retransmits_timed_out(const struct sock *sk,
diff --git a/include/net/wimax.h b/include/net/wimax.h
index 2af7bf839f23..3b07f6aad102 100644
--- a/include/net/wimax.h
+++ b/include/net/wimax.h
@@ -79,7 +79,7 @@
  * drivers have to only report state changes due to external
  * conditions.
  *
- * All API operations are 'atomic', serialized thorough a mutex in the
+ * All API operations are 'atomic', serialized through a mutex in the
  * `struct wimax_dev`.
  *
  * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK
diff --git a/include/sound/wm8993.h b/include/sound/wm8993.h
index 9c661f2f8cda..eee19f63c0d8 100644
--- a/include/sound/wm8993.h
+++ b/include/sound/wm8993.h
@@ -36,7 +36,7 @@ struct wm8993_platform_data {
 	unsigned int micbias1_lvl:1;
 	unsigned int micbias2_lvl:1;
 
-	/* Jack detect threashold levels, see datasheet for values */
+	/* Jack detect threshold levels, see datasheet for values */
 	unsigned int jd_scthr:2;
 	unsigned int jd_thr:2;
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..759a629cc4bc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -419,7 +419,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
 	if (!task) {
 		/*
 		 * Per cpu events are removed via an smp call and
-		 * the removal is always sucessful.
+		 * the removal is always successful.
 		 */
 		smp_call_function_single(event->cpu,
 					 __perf_event_remove_from_context,
@@ -827,7 +827,7 @@ perf_install_in_context(struct perf_event_context *ctx,
 	if (!task) {
 		/*
 		 * Per cpu events are installed via an smp call and
-		 * the install is always sucessful.
+		 * the install is always successful.
 		 */
 		smp_call_function_single(cpu, __perf_install_in_context,
 					 event, 1);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 234ceb10861f..456b2bc6b1ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -105,7 +105,7 @@ config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
 	depends on UNDEFINED
 	# This option is on purpose disabled for now.
-	# It will be enabled when we are down to a resonable number
+	# It will be enabled when we are down to a reasonable number
 	# of section mismatch warnings (< 10 for an allyesconfig build)
 	help
 	  The section mismatch analysis checks if there are illegal
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 600f473a5610..76074209f9a2 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -299,7 +299,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
 		   again when using them (during symbol decoding).*/
 		base = hufGroup->base-1;
 		limit = hufGroup->limit-1;
-		/* Calculate permute[].  Concurently, initialize
+		/* Calculate permute[].  Concurrently, initialize
 		 * temp[] and limit[]. */
 		pp = 0;
 		for (i = minLen; i <= maxLen; i++) {
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ce6b7eabf674..d9b08e0f7f55 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -259,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 		 * times. Without a hardware IOMMU this results in the
 		 * same device addresses being put into the dma-debug
 		 * hash multiple times too. This can result in false
-		 * positives being reported. Therfore we implement a
+		 * positives being reported. Therefore we implement a
 		 * best-fit algorithm here which returns the entry from
 		 * the hash which fits best to the reference value
 		 * instead of the first-fit.
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ac25cd28e807..853907e45868 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -453,7 +453,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
+	 * entries to indicate the number of contiguous entries available.
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
diff --git a/mm/filemap.c b/mm/filemap.c
index ef169f37156d..c3d3506ecaba 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1844,7 +1844,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 
 /*
  * Copy as much as we can into the page and return the number of bytes which
- * were sucessfully copied.  If a fault is encountered then return the number of
+ * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
 size_t iov_iter_copy_from_user_atomic(struct page *page,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7226e60e52af..c31a310aa146 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -209,7 +209,7 @@ struct mem_cgroup {
 	int	prev_priority;	/* for recording reclaim priority */
 
 	/*
-	 * While reclaiming in a hiearchy, we cache the last child we
+	 * While reclaiming in a hierarchy, we cache the last child we
 	 * reclaimed from.
 	 */
 	int last_scanned_child;
@@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
 
 	cgroup_lock();
 	/*
-	 * If parent's use_hiearchy is set, we can't make any modifications
+	 * If parent's use_hierarchy is set, we can't make any modifications
 	 * in the child subtrees. If it is unset, then the change can
 	 * occur, provided the current cgroup has no children.
 	 *
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dacc64183874..1ac49fef95ab 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
 	list_for_each_entry_safe (tk, next, to_kill, nd) {
 		if (doit) {
 			/*
-			 * In case something went wrong with munmaping
+			 * In case something went wrong with munmapping
 			 * make sure the process doesn't catch the
 			 * signal and then access the memory. Just kill it.
 			 * the signal handlers
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f7e2fa0974dc..16ad251c9725 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 	struct tcphdr _tcph, *tcph;
 	__be16 oldval;
 
-	/* Not enought header? */
+	/* Not enough header? */
 	tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (!tcph)
 		return false;
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 356e65b1dc42..783c5f367d29 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -450,10 +450,10 @@ void irlap_disconnect_request(struct irlap_cb *self)
 
 	/* Check if we are in the right state for disconnecting */
 	switch (self->state) {
-	case LAP_XMIT_P:        /* FALLTROUGH */
-	case LAP_XMIT_S:        /* FALLTROUGH */
-	case LAP_CONN:          /* FALLTROUGH */
-	case LAP_RESET_WAIT:    /* FALLTROUGH */
+	case LAP_XMIT_P:        /* FALLTHROUGH */
+	case LAP_XMIT_S:        /* FALLTHROUGH */
+	case LAP_CONN:          /* FALLTHROUGH */
+	case LAP_RESET_WAIT:    /* FALLTHROUGH */
 	case LAP_RESET_CHECK:
 		irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL);
 		break;
@@ -485,9 +485,9 @@ void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason)
 		IRDA_DEBUG(1, "%s(), Sending reset request!\n", __func__);
 		irlap_do_event(self, RESET_REQUEST, NULL, NULL);
 		break;
-	case LAP_NO_RESPONSE:	   /* FALLTROUGH */
-	case LAP_DISC_INDICATION:  /* FALLTROUGH */
-	case LAP_FOUND_NONE:       /* FALLTROUGH */
+	case LAP_NO_RESPONSE:	   /* FALLTHROUGH */
+	case LAP_DISC_INDICATION:  /* FALLTHROUGH */
+	case LAP_FOUND_NONE:       /* FALLTHROUGH */
 	case LAP_MEDIA_BUSY:
 		irlmp_link_disconnect_indication(self->notify.instance, self,
 						 reason, NULL);
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index c5c51959e3ce..94a9884d7146 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -1741,7 +1741,7 @@ static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event,
  * Function irlap_state_xmit_s (event, skb, info)
  *
  *   XMIT_S, The secondary station has been given the right to transmit,
- *   and we therefor do not expect to receive any transmissions from other
+ *   and we therefore do not expect to receive any transmissions from other
  *   stations.
  */
 static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 7bf5b913828b..0e7d8bde145d 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -105,7 +105,7 @@ int __init irlmp_init(void)
 
 	init_timer(&irlmp->discovery_timer);
 
-	/* Do discovery every 3 seconds, conditionaly */
+	/* Do discovery every 3 seconds, conditionally */
 	if (sysctl_discovery)
 		irlmp_start_discovery_timer(irlmp,
 					    sysctl_discovery_timeout*HZ);
@@ -1842,7 +1842,7 @@ LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason)
 		reason = LM_CONNECT_FAILURE;
 		break;
 	default:
-		IRDA_DEBUG(1, "%s(), Unknow IrLAP disconnect reason %d!\n",
+		IRDA_DEBUG(1, "%s(), Unknown IrLAP disconnect reason %d!\n",
 			   __func__, lap_reason);
 		reason = LM_LAP_DISCONNECT;
 		break;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 751c4d0e2b36..719ddbc9e48c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -244,7 +244,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data
  * @addr: destination address of the path (ETH_ALEN length)
  * @sdata: local subif
  *
- * Returns: 0 on sucess
+ * Returns: 0 on success
  *
  * State: the initial state of the new path is set to 0
  */
@@ -530,7 +530,7 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
  * @addr: dst address (ETH_ALEN length)
  * @sdata: local subif
  *
- * Returns: 0 if succesful
+ * Returns: 0 if successful
  */
 int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 7a10bbe02c13..c5d9f97ef217 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -682,7 +682,7 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
  * buckets and @skip_chain entries.  For each entry in the table call
  * @callback, if @callback returns a negative value stop 'walking' through the
  * table and return.  Updates the values in @skip_bkt and @skip_chain on
- * return.  Returns zero on succcess, negative values on failure.
+ * return.  Returns zero on success, negative values on failure.
  *
  */
 int netlbl_domhsh_walk(u32 *skip_bkt,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 8674d4919556..29d8501bf156 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -719,7 +719,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 
 	if (sctp_style(sk, TCP)) {
 		/* Change the sk->sk_state of a TCP-style socket that has
-		 * sucessfully completed a connect() call.
+		 * successfully completed a connect() call.
 		 */
 		if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
 			sk->sk_state = SCTP_SS_ESTABLISHED;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index f11be72a1a80..b15e1ebb2bfa 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -54,7 +54,7 @@
  * Assumptions:
  * - head[0] is physically contiguous.
  * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contigous and consists of
+ * - pages[] is not physically or virtually contiguous and consists of
  *   PAGE_SIZE elements.
  *
  * Output:
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index ca269178c4d4..35f370091f4f 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -62,7 +62,7 @@
  * Called when wanting to reset the device for any reason. Device is
  * taken back to power on status.
  *
- * This call blocks; on succesful return, the device has completed the
+ * This call blocks; on successful return, the device has completed the
  * reset process and is ready to operate.
  */
 int wimax_reset(struct wimax_dev *wimax_dev)
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index d82953573588..8413cf38ed27 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -213,7 +213,7 @@ load_config_help[] = N_(
 	"to modify that configuration.\n"
 	"\n"
 	"If you are uncertain, then you have probably never used alternate\n"
-	"configuration files.  You should therefor leave this blank to abort.\n"),
+	"configuration files. You should therefore leave this blank to abort.\n"),
 save_config_text[] = N_(
 	"Enter a filename to which this configuration should be saved "
 	"as an alternate.  Leave blank to abort."),
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index e68823741ad5..2534400317c5 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -204,7 +204,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
  *
  * Description
  * Call the NetLabel mechanism to set the label of a packet using @sid.
- * Returns zero on auccess, negative values on failure.
+ * Returns zero on success, negative values on failure.
  *
  */
 int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index ff17820d35ec..5914eeb0b339 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -741,7 +741,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid)
 		goto out;
 	}
 
-	/* type/domain unchaned */
+	/* type/domain unchanged */
 	if (old_context->type == new_context->type) {
 		rc = 0;
 		goto out;
diff --git a/sound/Kconfig b/sound/Kconfig
index 4b5365ad6b46..fcad760f5691 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -55,7 +55,7 @@ config SOUND_OSS_CORE_PRECLAIM
 	  Please read Documentation/feature-removal-schedule.txt for
 	  details.
 
-	  If unusre, say Y.
+	  If unsure, say Y.
 
 source "sound/oss/dmasound/Kconfig"
 
diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c
index a076a6ce8071..a828baaab636 100644
--- a/sound/isa/cs423x/cs4236.c
+++ b/sound/isa/cs423x/cs4236.c
@@ -177,7 +177,7 @@ static struct pnp_card_device_id snd_cs423x_pnpids[] = {
 	{ .id = "CSC0437", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } },
 	/* Digital PC 5000 Onboard - CS4236B */
 	{ .id = "CSC0735", .devs = { { "CSC0000" }, { "CSC0010" } } },
-	/* some uknown CS4236B */
+	/* some unknown CS4236B */
 	{ .id = "CSC0b35", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } },
 	/* Intel PR440FX Onboard sound */
 	{ .id = "CSC0b36", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } },
diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c
index 02e30d7c6a93..ddad60ef3f37 100644
--- a/sound/isa/opti9xx/miro.c
+++ b/sound/isa/opti9xx/miro.c
@@ -137,7 +137,7 @@ struct snd_miro {
 static void snd_miro_proc_init(struct snd_miro * miro);
 
 static char * snd_opti9xx_names[] = {
-	"unkown",
+	"unknown",
 	"82C928", "82C929",
 	"82C924", "82C925",
 	"82C930", "82C931", "82C933"
diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c
index 5cd555325b9d..848007508ffd 100644
--- a/sound/isa/opti9xx/opti92x-ad1848.c
+++ b/sound/isa/opti9xx/opti92x-ad1848.c
@@ -185,7 +185,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids);
 #endif
 
 static char * snd_opti9xx_names[] = {
-	"unkown",
+	"unknown",
 	"82C928",	"82C929",
 	"82C924",	"82C925",
 	"82C930",	"82C931",	"82C933"
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 06e9e88e4c05..bb14e4c67e89 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -657,7 +657,7 @@ static int AmiStateInfo(char *buffer, size_t space)
 	len += sprintf(buffer+len, "\tsound.volume_right = %d [0...64]\n",
 		       dmasound.volume_right);
 	if (len >= space) {
-		printk(KERN_ERR "dmasound_paula: overlowed state buffer alloc.\n") ;
+		printk(KERN_ERR "dmasound_paula: overflowed state buffer alloc.\n") ;
 		len = space ;
 	}
 	return len;
diff --git a/sound/pci/ca0106/ca0106_proc.c b/sound/pci/ca0106/ca0106_proc.c
index c62b7d10ec61..8d13092300da 100644
--- a/sound/pci/ca0106/ca0106_proc.c
+++ b/sound/pci/ca0106/ca0106_proc.c
@@ -233,7 +233,7 @@ static void snd_ca0106_proc_dump_iec958( struct snd_info_buffer *buffer, u32 val
 			snd_iprintf(buffer, "user-defined\n");
 			break;
 		default:
-			snd_iprintf(buffer, "unkown\n");
+			snd_iprintf(buffer, "unknown\n");
 			break;
 		}
 		snd_iprintf(buffer, "Sample Bits: ");
diff --git a/sound/pci/cs46xx/imgs/cwcdma.asp b/sound/pci/cs46xx/imgs/cwcdma.asp
index 09d24c76f034..a65e1193c89a 100644
--- a/sound/pci/cs46xx/imgs/cwcdma.asp
+++ b/sound/pci/cs46xx/imgs/cwcdma.asp
@@ -26,10 +26,11 @@
 //
 //
 // The purpose of this code is very simple: make it possible to tranfser
-// the samples 'as they are' with no alteration from a PCMreader SCB (DMA from host)
-// to any other SCB. This is useful for AC3 throug SPDIF. SRC (source rate converters) 
-// task always alters the samples in some how, however it's from 48khz -> 48khz. The
-// alterations are not audible, but AC3 wont work. 
+// the samples 'as they are' with no alteration from a PCMreader
+// SCB (DMA from host) to any other SCB. This is useful for AC3 through SPDIF.
+// SRC (source rate converters) task always alters the samples in somehow,
+// however it's from 48khz -> 48khz.
+// The alterations are not audible, but AC3 wont work. 
 //
 //        ...
 //         |
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 36e08bd2b3cc..360e3809a60b 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -184,7 +184,7 @@ MODULE_PARM_DESC(enable, "Enable the EMU10K1X soundcard.");
  * The hardware has 3 channels for playback and 1 for capture.
  *  - channel 0 is the front channel
  *  - channel 1 is the rear channel
- *  - channel 2 is the center/lfe chanel
+ *  - channel 2 is the center/lfe channel
  * Volume is controlled by the AC97 for the front and rear channels by
  * the PCM Playback Volume, Sigmatel Surround Playback Volume and 
  * Surround Playback Volume. The Sigmatel 4-Speaker Stereo switch affects
diff --git a/sound/pci/hda/patch_cmedia.c b/sound/pci/hda/patch_cmedia.c
index 780e1a72114a..8917071d5b6a 100644
--- a/sound/pci/hda/patch_cmedia.c
+++ b/sound/pci/hda/patch_cmedia.c
@@ -66,7 +66,7 @@ struct cmi_spec {
 
 	struct hda_pcm pcm_rec[2];	/* PCM information */
 
-	/* pin deafault configuration */
+	/* pin default configuration */
 	hda_nid_t pin_nid[NUM_PINS];
 	unsigned int def_conf[NUM_PINS];
 	unsigned int pin_def_confs;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ff20048504b6..872731eb49e8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6619,7 +6619,7 @@ static struct hda_input_mux alc889A_mb31_capture_source = {
 		/* Front Mic (0x01) unused */
 		{ "Line", 0x2 },
 		/* Line 2 (0x03) unused */
-		/* CD (0x04) unsused? */
+		/* CD (0x04) unused? */
 	},
 };
 
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 0dce331a2a3b..a1b10d1a384d 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -3017,7 +3017,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry,
 		insel = "Coaxial";
 		break;
 	default:
-		insel = "Unkown";
+		insel = "Unknown";
 	}
 
 	switch (hdspm->control_register & HDSPM_SyncRefMask) {
@@ -3028,7 +3028,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry,
 		syncref = "MADI";
 		break;
 	default:
-		syncref = "Unkown";
+		syncref = "Unknown";
 	}
 	snd_iprintf(buffer, "Inputsel = %s, SyncRef = %s\n", insel,
 		    syncref);
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index c33b92edbded..8ce1c9b2e5b8 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -101,7 +101,7 @@ static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg,
 	pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value);
 
 	if (reg >= UDA134X_REGS_NUM) {
-		printk(KERN_ERR "%s unkown register: reg: %u",
+		printk(KERN_ERR "%s unknown register: reg: %u",
 		       __func__, reg);
 		return -EINVAL;
 	}
@@ -552,7 +552,7 @@ static int uda134x_soc_probe(struct platform_device *pdev)
 					ARRAY_SIZE(uda1341_snd_controls));
 	break;
 	default:
-		printk(KERN_ERR "%s unkown codec type: %d",
+		printk(KERN_ERR "%s unknown codec type: %d",
 			__func__, pd->model);
 	return -EINVAL;
 	}
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index fe1307b500cf..d72347d90b70 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -607,7 +607,7 @@ SOC_SINGLE("Right Input PGA Common Mode Switch", WM8903_ANALOGUE_RIGHT_INPUT_1,
 SOC_SINGLE("DRC Switch", WM8903_DRC_0, 15, 1, 0),
 SOC_ENUM("DRC Compressor Slope R0", drc_slope_r0),
 SOC_ENUM("DRC Compressor Slope R1", drc_slope_r1),
-SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8903_DRC_3, 5, 124, 1,
+SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8903_DRC_3, 5, 124, 1,
 	       drc_tlv_thresh),
 SOC_SINGLE_TLV("DRC Volume", WM8903_DRC_3, 0, 30, 1, drc_tlv_amp),
 SOC_SINGLE_TLV("DRC Minimum Gain Volume", WM8903_DRC_1, 2, 3, 1, drc_tlv_min),
@@ -617,11 +617,11 @@ SOC_ENUM("DRC Decay Rate", drc_decay),
 SOC_ENUM("DRC FF Delay", drc_ff_delay),
 SOC_SINGLE("DRC Anticlip Switch", WM8903_DRC_0, 1, 1, 0),
 SOC_SINGLE("DRC QR Switch", WM8903_DRC_0, 2, 1, 0),
-SOC_SINGLE_TLV("DRC QR Threashold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max),
+SOC_SINGLE_TLV("DRC QR Threshold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max),
 SOC_ENUM("DRC QR Decay Rate", drc_qr_decay),
 SOC_SINGLE("DRC Smoothing Switch", WM8903_DRC_0, 3, 1, 0),
 SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8903_DRC_0, 0, 1, 0),
-SOC_ENUM("DRC Smoothing Threashold", drc_smoothing),
+SOC_ENUM("DRC Smoothing Threshold", drc_smoothing),
 SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup),
 
 SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT,
diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c
index d9987999e92c..bc033687b220 100644
--- a/sound/soc/codecs/wm8993.c
+++ b/sound/soc/codecs/wm8993.c
@@ -689,7 +689,7 @@ SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8993_DIGITAL_SIDE_TONE,
 
 SOC_SINGLE("DRC Switch", WM8993_DRC_CONTROL_1, 15, 1, 0),
 SOC_ENUM("DRC Path", drc_path),
-SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8993_DRC_CONTROL_2,
+SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8993_DRC_CONTROL_2,
 	       2, 60, 1, drc_comp_threash),
 SOC_SINGLE_TLV("DRC Compressor Amplitude Volume", WM8993_DRC_CONTROL_3,
 	       11, 30, 1, drc_comp_amp),
@@ -709,7 +709,7 @@ SOC_SINGLE_TLV("DRC Quick Release Volume", WM8993_DRC_CONTROL_3, 2, 3, 0,
 SOC_ENUM("DRC Quick Release Rate", drc_qr_rate),
 SOC_SINGLE("DRC Smoothing Switch", WM8993_DRC_CONTROL_1, 11, 1, 0),
 SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8993_DRC_CONTROL_1, 8, 1, 0),
-SOC_ENUM("DRC Smoothing Hysteresis Threashold", drc_smooth),
+SOC_ENUM("DRC Smoothing Hysteresis Threshold", drc_smooth),
 SOC_SINGLE_TLV("DRC Startup Volume", WM8993_DRC_CONTROL_4, 8, 18, 0,
 	       drc_startup_tlv),
 
diff --git a/sound/soc/s3c24xx/s3c24xx_simtec.c b/sound/soc/s3c24xx/s3c24xx_simtec.c
index 1966e0d5652d..3c7ccb78b6ab 100644
--- a/sound/soc/s3c24xx/s3c24xx_simtec.c
+++ b/sound/soc/s3c24xx/s3c24xx_simtec.c
@@ -270,7 +270,7 @@ static int attach_gpio_amp(struct device *dev,
 		gpio_direction_output(pd->amp_gain[1], 0);
 	}
 
-	/* note, curently we assume GPA0 isn't valid amp */
+	/* note, currently we assume GPA0 isn't valid amp */
 	if (pdata->amp_gpio > 0) {
 		ret = gpio_request(pd->amp_gpio, "gpio-amp");
 		if (ret) {
diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c
index 83b8028e209d..81d6f983f51e 100644
--- a/sound/soc/s6000/s6000-pcm.c
+++ b/sound/soc/s6000/s6000-pcm.c
@@ -196,7 +196,7 @@ static int s6000_pcm_start(struct snd_pcm_substream *substream)
 			   0 /* destination skip after chunk (impossible) */,
 			   4 /* 16 byte burst size */,
 			   -1 /* don't conserve bandwidth */,
-			   0 /* low watermark irq descriptor theshold */,
+			   0 /* low watermark irq descriptor threshold */,
 			   0 /* disable hardware timestamps */,
 			   1 /* enable channel */);
 
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 49c998186592..dbca7c909a31 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -353,7 +353,7 @@ static struct sound_unit *chains[SOUND_STEP];
  *      @dev: device pointer
  *
  *	Allocate a special sound device by minor number from the sound
- *	subsystem. The allocated number is returned on succes. On failure
+ *	subsystem. The allocated number is returned on success. On failure
  *	a negative error code is returned.
  */
  
-- 
cgit v1.2.3


From bebd04cc4569844effbdae49c01a48e57fa77864 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Sun, 15 Nov 2009 18:57:24 +0100
Subject: doc: Fix IRQ chip docs

This patch updates the IRQ docs to match reality.

Signed-off-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/DocBook/genericirq.tmpl | 4 ++--
 include/linux/irq.h                   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
index c671a0168096..1448b33fd222 100644
--- a/Documentation/DocBook/genericirq.tmpl
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -417,8 +417,8 @@ desc->chip->end();
       </para>
       <para>
 	To make use of the split implementation, replace the call to
-	__do_IRQ by a call to desc->chip->handle_irq() and associate
-        the appropriate handler function to desc->chip->handle_irq().
+	__do_IRQ by a call to desc->handle_irq() and associate
+        the appropriate handler function to desc->handle_irq().
 	In most cases the generic handler implementations should
 	be sufficient.
      </para>
diff --git a/include/linux/irq.h b/include/linux/irq.h
index ae9653dbcd78..a287cfc0b1a6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -282,7 +282,7 @@ extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
 
 /*
  * Built-in IRQ handlers for various IRQ types,
- * callable via desc->chip->handle_irq()
+ * callable via desc->handle_irq()
  */
 extern void handle_level_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
-- 
cgit v1.2.3


From b69f2292063d2caf37ca9aec7d63ded203701bf3 Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 4 Dec 2009 14:52:42 +0100
Subject: block: Fix io_context leak after failure of clone with CLONE_IO

With CLONE_IO, parent's io_context->nr_tasks is incremented, but never
decremented whenever copy_process() fails afterwards, which prevents
exit_io_context() from calling IO schedulers exit functions.

Give a task_struct to exit_io_context(), and call exit_io_context() instead of
put_io_context() in copy_process() cleanup path.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           | 10 +++++-----
 include/linux/iocontext.h |  5 +++--
 kernel/exit.c             |  2 +-
 kernel/fork.c             |  3 ++-
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index dcd041290b28..cbdabb0dd6d7 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -66,14 +66,14 @@ static void cfq_exit(struct io_context *ioc)
 }
 
 /* Called by the exitting task */
-void exit_io_context(void)
+void exit_io_context(struct task_struct *task)
 {
 	struct io_context *ioc;
 
-	task_lock(current);
-	ioc = current->io_context;
-	current->io_context = NULL;
-	task_unlock(current);
+	task_lock(task);
+	ioc = task->io_context;
+	task->io_context = NULL;
+	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
 		if (ioc->aic && ioc->aic->exit)
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index d61b0b8b5cd1..a63235996309 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -98,14 +98,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 	return NULL;
 }
 
+struct task_struct;
 #ifdef CONFIG_BLOCK
 int put_io_context(struct io_context *ioc);
-void exit_io_context(void);
+void exit_io_context(struct task_struct *task);
 struct io_context *get_io_context(gfp_t gfp_flags, int node);
 struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 #else
-static inline void exit_io_context(void)
+static inline void exit_io_context(struct task_struct *task)
 {
 }
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..2544000125d9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1004,7 +1004,7 @@ NORET_TYPE void do_exit(long code)
 	tsk->flags |= PF_EXITPIDONE;
 
 	if (tsk->io_context)
-		exit_io_context();
+		exit_io_context(tsk);
 
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..607353425bb0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1310,7 +1310,8 @@ bad_fork_free_pid:
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_io:
-	put_io_context(p->io_context);
+	if (p->io_context)
+		exit_io_context(p);
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
-- 
cgit v1.2.3


From dbf9bfe615717d1145f263c0049fe2328e6ed395 Mon Sep 17 00:00:00 2001
From: jack wang <jack_wang@usish.com>
Date: Wed, 14 Oct 2009 16:19:21 +0800
Subject: [SCSI] pm8001: add SAS/SATA HBA driver

This driver supports PMC-Sierra PCIe SAS/SATA 8x6G SPC 8001 chip based
host adapters.

Signed-off-by: Jack Wang <jack_wang@usish.com>
Signed-off-by: Lindar Liu <lindar_liu@usish.com>
Signed-off-by: Tom Peng <tom_peng@usish.com>
Signed-off-by: Kevin Ao <aoqingyun@usish.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 MAINTAINERS                        |    7 +
 drivers/scsi/Kconfig               |    8 +
 drivers/scsi/Makefile              |    1 +
 drivers/scsi/pm8001/Makefile       |   12 +
 drivers/scsi/pm8001/pm8001_chips.h |   89 +
 drivers/scsi/pm8001/pm8001_ctl.c   |  573 +++++
 drivers/scsi/pm8001/pm8001_ctl.h   |   67 +
 drivers/scsi/pm8001/pm8001_defs.h  |  112 +
 drivers/scsi/pm8001/pm8001_hwi.c   | 4371 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/pm8001/pm8001_hwi.h   | 1011 +++++++++
 drivers/scsi/pm8001/pm8001_init.c  |  888 ++++++++
 drivers/scsi/pm8001/pm8001_sas.c   | 1104 +++++++++
 drivers/scsi/pm8001/pm8001_sas.h   |  480 ++++
 include/linux/pci_ids.h            |    2 +
 14 files changed, 8725 insertions(+)
 create mode 100644 drivers/scsi/pm8001/Makefile
 create mode 100644 drivers/scsi/pm8001/pm8001_chips.h
 create mode 100644 drivers/scsi/pm8001/pm8001_ctl.c
 create mode 100644 drivers/scsi/pm8001/pm8001_ctl.h
 create mode 100644 drivers/scsi/pm8001/pm8001_defs.h
 create mode 100644 drivers/scsi/pm8001/pm8001_hwi.c
 create mode 100644 drivers/scsi/pm8001/pm8001_hwi.h
 create mode 100644 drivers/scsi/pm8001/pm8001_init.c
 create mode 100644 drivers/scsi/pm8001/pm8001_sas.c
 create mode 100644 drivers/scsi/pm8001/pm8001_sas.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a1a2aceca5bd..016411cadc9a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4116,6 +4116,13 @@ W:	http://www.pmc-sierra.com/
 S:	Supported
 F:	drivers/scsi/pmcraid.*
 
+PMC SIERRA PM8001 DRIVER
+M:	jack_wang@usish.com
+M:	lindar_liu@usish.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/pm8001/
+
 POSIX CLOCKS and TIMERS
 M:	Thomas Gleixner <tglx@linutronix.de>
 S:	Supported
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index e11cca4c784c..2e4f7d0ee639 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1818,6 +1818,14 @@ config SCSI_PMCRAID
 	---help---
 	  This driver supports the PMC SIERRA MaxRAID adapters.
 
+config SCSI_PM8001
+	tristate "PMC-Sierra SPC 8001 SAS/SATA Based Host Adapter driver"
+	depends on PCI && SCSI
+	select SCSI_SAS_LIBSAS
+	help
+	  This driver supports PMC-Sierra PCIE SAS/SATA 8x6G SPC 8001 chip
+	  based host adapters.
+
 config SCSI_SRP
 	tristate "SCSI RDMA Protocol helper library"
 	depends on SCSI && PCI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 3ad61db5e3fa..53b1dac7e7d9 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_SCSI_AIC79XX)	+= aic7xxx/
 obj-$(CONFIG_SCSI_AACRAID)	+= aacraid/
 obj-$(CONFIG_SCSI_AIC7XXX_OLD)	+= aic7xxx_old.o
 obj-$(CONFIG_SCSI_AIC94XX)	+= aic94xx/
+obj-$(CONFIG_SCSI_PM8001)	+= pm8001/
 obj-$(CONFIG_SCSI_IPS)		+= ips.o
 obj-$(CONFIG_SCSI_FD_MCS)	+= fd_mcs.o
 obj-$(CONFIG_SCSI_FUTURE_DOMAIN)+= fdomain.o
diff --git a/drivers/scsi/pm8001/Makefile b/drivers/scsi/pm8001/Makefile
new file mode 100644
index 000000000000..52f04296171c
--- /dev/null
+++ b/drivers/scsi/pm8001/Makefile
@@ -0,0 +1,12 @@
+#
+# Kernel configuration file for the PM8001 SAS/SATA 8x6G based HBA driver
+#
+# Copyright (C) 2008-2009  USI Co., Ltd.
+
+
+obj-$(CONFIG_SCSI_PM8001) += pm8001.o
+pm8001-y += pm8001_init.o \
+		pm8001_sas.o  \
+		pm8001_ctl.o  \
+		pm8001_hwi.o
+
diff --git a/drivers/scsi/pm8001/pm8001_chips.h b/drivers/scsi/pm8001/pm8001_chips.h
new file mode 100644
index 000000000000..4efa4d0950e5
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_chips.h
@@ -0,0 +1,89 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef _PM8001_CHIPS_H_
+#define _PM8001_CHIPS_H_
+
+static inline u32 pm8001_read_32(void *virt_addr)
+{
+	return *((u32 *)virt_addr);
+}
+
+static inline void pm8001_write_32(void *addr, u32 offset, u32 val)
+{
+	*((u32 *)(addr + offset)) = val;
+}
+
+static inline u32 pm8001_cr32(struct pm8001_hba_info *pm8001_ha, u32 bar,
+		u32 offset)
+{
+	return readl(pm8001_ha->io_mem[bar].memvirtaddr + offset);
+}
+
+static inline void pm8001_cw32(struct pm8001_hba_info *pm8001_ha, u32 bar,
+		u32 addr, u32 val)
+{
+	writel(val, pm8001_ha->io_mem[bar].memvirtaddr + addr);
+}
+static inline u32 pm8001_mr32(void __iomem *addr, u32 offset)
+{
+	return readl(addr + offset);
+}
+static inline void pm8001_mw32(void __iomem *addr, u32 offset, u32 val)
+{
+	writel(val, addr + offset);
+}
+static inline u32 get_pci_bar_index(u32 pcibar)
+{
+		switch (pcibar) {
+		case 0x18:
+		case 0x1C:
+			return 1;
+		case 0x20:
+			return 2;
+		case 0x24:
+			return 3;
+		default:
+			return 0;
+	}
+}
+
+#endif  /* _PM8001_CHIPS_H_ */
+
diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c
new file mode 100644
index 000000000000..14b13acae6dd
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_ctl.c
@@ -0,0 +1,573 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+#include <linux/firmware.h>
+#include "pm8001_sas.h"
+#include "pm8001_ctl.h"
+
+/* scsi host attributes */
+
+/**
+ * pm8001_ctl_mpi_interface_rev_show - MPI interface revision number
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_mpi_interface_rev_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		pm8001_ha->main_cfg_tbl.interface_rev);
+}
+static
+DEVICE_ATTR(interface_rev, S_IRUGO, pm8001_ctl_mpi_interface_rev_show, NULL);
+
+/**
+ * pm8001_ctl_fw_version_show - firmware version
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_fw_version_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x.%02x\n",
+		       (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 24),
+		       (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 16),
+		       (u8)(pm8001_ha->main_cfg_tbl.firmware_rev >> 8),
+		       (u8)(pm8001_ha->main_cfg_tbl.firmware_rev));
+}
+static DEVICE_ATTR(fw_version, S_IRUGO, pm8001_ctl_fw_version_show, NULL);
+/**
+ * pm8001_ctl_max_out_io_show - max outstanding io supported
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_max_out_io_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			pm8001_ha->main_cfg_tbl.max_out_io);
+}
+static DEVICE_ATTR(max_out_io, S_IRUGO, pm8001_ctl_max_out_io_show, NULL);
+/**
+ * pm8001_ctl_max_devices_show - max devices support
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_max_devices_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%04d\n",
+			(u16)(pm8001_ha->main_cfg_tbl.max_sgl >> 16));
+}
+static DEVICE_ATTR(max_devices, S_IRUGO, pm8001_ctl_max_devices_show, NULL);
+/**
+ * pm8001_ctl_max_sg_list_show - max sg list supported iff not 0.0 for no
+ * hardware limitation
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_max_sg_list_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%04d\n",
+			pm8001_ha->main_cfg_tbl.max_sgl & 0x0000FFFF);
+}
+static DEVICE_ATTR(max_sg_list, S_IRUGO, pm8001_ctl_max_sg_list_show, NULL);
+
+#define SAS_1_0 0x1
+#define SAS_1_1 0x2
+#define SAS_2_0 0x4
+
+static ssize_t
+show_sas_spec_support_status(unsigned int mode, char *buf)
+{
+	ssize_t len = 0;
+
+	if (mode & SAS_1_1)
+		len = sprintf(buf, "%s", "SAS1.1");
+	if (mode & SAS_2_0)
+		len += sprintf(buf + len, "%s%s", len ? ", " : "", "SAS2.0");
+	len += sprintf(buf + len, "\n");
+
+	return len;
+}
+
+/**
+ * pm8001_ctl_sas_spec_support_show - sas spec supported
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_sas_spec_support_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	unsigned int mode;
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	mode = (pm8001_ha->main_cfg_tbl.ctrl_cap_flag & 0xfe000000)>>25;
+	return show_sas_spec_support_status(mode, buf);
+}
+static DEVICE_ATTR(sas_spec_support, S_IRUGO,
+		   pm8001_ctl_sas_spec_support_show, NULL);
+
+/**
+ * pm8001_ctl_sas_address_show - sas address
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * This is the controller sas address
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_host_sas_address_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	return snprintf(buf, PAGE_SIZE, "0x%016llx\n",
+			be64_to_cpu(*(__be64 *)pm8001_ha->sas_addr));
+}
+static DEVICE_ATTR(host_sas_address, S_IRUGO,
+		   pm8001_ctl_host_sas_address_show, NULL);
+
+/**
+ * pm8001_ctl_logging_level_show - logging level
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read/write' shost attribute.
+ */
+static ssize_t pm8001_ctl_logging_level_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	return snprintf(buf, PAGE_SIZE, "%08xh\n", pm8001_ha->logging_level);
+}
+static ssize_t pm8001_ctl_logging_level_store(struct device *cdev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	int val = 0;
+
+	if (sscanf(buf, "%x", &val) != 1)
+		return -EINVAL;
+
+	pm8001_ha->logging_level = val;
+	return strlen(buf);
+}
+
+static DEVICE_ATTR(logging_level, S_IRUGO | S_IWUSR,
+	pm8001_ctl_logging_level_show, pm8001_ctl_logging_level_store);
+/**
+ * pm8001_ctl_aap_log_show - aap1 event log
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_aap_log_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	int i;
+#define AAP1_MEMMAP(r, c) \
+	(*(u32 *)((u8*)pm8001_ha->memoryMap.region[AAP1].virt_ptr + (r) * 32 \
+	+ (c)))
+
+	char *str = buf;
+	int max = 2;
+	for (i = 0; i < max; i++) {
+		str += sprintf(str, "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x"
+			       "0x%08x 0x%08x\n",
+			       AAP1_MEMMAP(i, 0),
+			       AAP1_MEMMAP(i, 4),
+			       AAP1_MEMMAP(i, 8),
+			       AAP1_MEMMAP(i, 12),
+			       AAP1_MEMMAP(i, 16),
+			       AAP1_MEMMAP(i, 20),
+			       AAP1_MEMMAP(i, 24),
+			       AAP1_MEMMAP(i, 28));
+	}
+
+	return str - buf;
+}
+static DEVICE_ATTR(aap_log, S_IRUGO, pm8001_ctl_aap_log_show, NULL);
+/**
+ * pm8001_ctl_aap_log_show - IOP event log
+ * @cdev: pointer to embedded class device
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' shost attribute.
+ */
+static ssize_t pm8001_ctl_iop_log_show(struct device *cdev,
+	struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+#define IOP_MEMMAP(r, c) \
+	(*(u32 *)((u8*)pm8001_ha->memoryMap.region[IOP].virt_ptr + (r) * 32 \
+	+ (c)))
+	int i;
+	char *str = buf;
+	int max = 2;
+	for (i = 0; i < max; i++) {
+		str += sprintf(str, "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x"
+			       "0x%08x 0x%08x\n",
+			       IOP_MEMMAP(i, 0),
+			       IOP_MEMMAP(i, 4),
+			       IOP_MEMMAP(i, 8),
+			       IOP_MEMMAP(i, 12),
+			       IOP_MEMMAP(i, 16),
+			       IOP_MEMMAP(i, 20),
+			       IOP_MEMMAP(i, 24),
+			       IOP_MEMMAP(i, 28));
+	}
+
+	return str - buf;
+}
+static DEVICE_ATTR(iop_log, S_IRUGO, pm8001_ctl_iop_log_show, NULL);
+
+#define FLASH_CMD_NONE      0x00
+#define FLASH_CMD_UPDATE    0x01
+#define FLASH_CMD_SET_NVMD    0x02
+
+struct flash_command {
+     u8      command[8];
+     int     code;
+};
+
+static struct flash_command flash_command_table[] =
+{
+     {"set_nvmd",    FLASH_CMD_SET_NVMD},
+     {"update",      FLASH_CMD_UPDATE},
+     {"",            FLASH_CMD_NONE} /* Last entry should be NULL. */
+};
+
+struct error_fw {
+     char    *reason;
+     int     err_code;
+};
+
+static struct error_fw flash_error_table[] =
+{
+     {"Failed to open fw image file",	FAIL_OPEN_BIOS_FILE},
+     {"image header mismatch",		FLASH_UPDATE_HDR_ERR},
+     {"image offset mismatch",		FLASH_UPDATE_OFFSET_ERR},
+     {"image CRC Error",		FLASH_UPDATE_CRC_ERR},
+     {"image length Error.",		FLASH_UPDATE_LENGTH_ERR},
+     {"Failed to program flash chip",	FLASH_UPDATE_HW_ERR},
+     {"Flash chip not supported.",	FLASH_UPDATE_DNLD_NOT_SUPPORTED},
+     {"Flash update disabled.",		FLASH_UPDATE_DISABLED},
+     {"Flash in progress",		FLASH_IN_PROGRESS},
+     {"Image file size Error",		FAIL_FILE_SIZE},
+     {"Input parameter error",		FAIL_PARAMETERS},
+     {"Out of memory",			FAIL_OUT_MEMORY},
+     {"OK", 0}	/* Last entry err_code = 0. */
+};
+
+static int pm8001_set_nvmd(struct pm8001_hba_info *pm8001_ha)
+{
+	struct pm8001_ioctl_payload	*payload;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	u8		*ioctlbuffer = NULL;
+	u32		length = 0;
+	u32		ret = 0;
+
+	length = 1024 * 5 + sizeof(*payload) - 1;
+	ioctlbuffer = kzalloc(length, GFP_KERNEL);
+	if (!ioctlbuffer)
+		return -ENOMEM;
+	if ((pm8001_ha->fw_image->size <= 0) ||
+	    (pm8001_ha->fw_image->size > 4096)) {
+		ret = FAIL_FILE_SIZE;
+		goto out;
+	}
+	payload = (struct pm8001_ioctl_payload *)ioctlbuffer;
+	memcpy((u8 *)payload->func_specific, (u8 *)pm8001_ha->fw_image->data,
+				pm8001_ha->fw_image->size);
+	payload->length = pm8001_ha->fw_image->size;
+	payload->id = 0;
+	pm8001_ha->nvmd_completion = &completion;
+	ret = PM8001_CHIP_DISP->set_nvmd_req(pm8001_ha, payload);
+	wait_for_completion(&completion);
+out:
+	kfree(ioctlbuffer);
+	return ret;
+}
+
+static int pm8001_update_flash(struct pm8001_hba_info *pm8001_ha)
+{
+	struct pm8001_ioctl_payload	*payload;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	u8		*ioctlbuffer = NULL;
+	u32		length = 0;
+	struct fw_control_info	*fwControl;
+	u32		loopNumber, loopcount = 0;
+	u32		sizeRead = 0;
+	u32		partitionSize, partitionSizeTmp;
+	u32		ret = 0;
+	u32		partitionNumber = 0;
+	struct pm8001_fw_image_header *image_hdr;
+
+	length = 1024 * 16 + sizeof(*payload) - 1;
+	ioctlbuffer = kzalloc(length, GFP_KERNEL);
+	image_hdr = (struct pm8001_fw_image_header *)pm8001_ha->fw_image->data;
+	if (!ioctlbuffer)
+		return -ENOMEM;
+	if (pm8001_ha->fw_image->size < 28) {
+		ret = FAIL_FILE_SIZE;
+		goto out;
+	}
+
+	while (sizeRead < pm8001_ha->fw_image->size) {
+		partitionSizeTmp =
+			*(u32 *)((u8 *)&image_hdr->image_length + sizeRead);
+		partitionSize = be32_to_cpu(partitionSizeTmp);
+		loopcount = (partitionSize + HEADER_LEN)/IOCTL_BUF_SIZE;
+		if (loopcount % IOCTL_BUF_SIZE)
+			loopcount++;
+		if (loopcount == 0)
+			loopcount++;
+		for (loopNumber = 0; loopNumber < loopcount; loopNumber++) {
+			payload = (struct pm8001_ioctl_payload *)ioctlbuffer;
+			payload->length = 1024*16;
+			payload->id = 0;
+			fwControl =
+			      (struct fw_control_info *)payload->func_specific;
+			fwControl->len = IOCTL_BUF_SIZE;   /* IN */
+			fwControl->size = partitionSize + HEADER_LEN;/* IN */
+			fwControl->retcode = 0;/* OUT */
+			fwControl->offset = loopNumber * IOCTL_BUF_SIZE;/*OUT */
+
+		/* for the last chunk of data in case file size is not even with
+		4k, load only the rest*/
+		if (((loopcount-loopNumber) == 1) &&
+			((partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE)) {
+			fwControl->len =
+				(partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE;
+			memcpy((u8 *)fwControl->buffer,
+				(u8 *)pm8001_ha->fw_image->data + sizeRead,
+				(partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE);
+			sizeRead +=
+				(partitionSize + HEADER_LEN) % IOCTL_BUF_SIZE;
+		} else {
+			memcpy((u8 *)fwControl->buffer,
+				(u8 *)pm8001_ha->fw_image->data + sizeRead,
+				IOCTL_BUF_SIZE);
+			sizeRead += IOCTL_BUF_SIZE;
+		}
+
+		pm8001_ha->nvmd_completion = &completion;
+		ret = PM8001_CHIP_DISP->fw_flash_update_req(pm8001_ha, payload);
+		wait_for_completion(&completion);
+		if (ret || (fwControl->retcode > FLASH_UPDATE_IN_PROGRESS)) {
+			ret = fwControl->retcode;
+			kfree(ioctlbuffer);
+			ioctlbuffer = NULL;
+			break;
+		}
+	}
+	if (ret)
+		break;
+	partitionNumber++;
+}
+out:
+	kfree(ioctlbuffer);
+	return ret;
+}
+static ssize_t pm8001_store_update_fw(struct device *cdev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	char *cmd_ptr, *filename_ptr;
+	int res, i;
+	int flash_command = FLASH_CMD_NONE;
+	int err = 0;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	cmd_ptr = kzalloc(count*2, GFP_KERNEL);
+
+	if (!cmd_ptr) {
+		err = FAIL_OUT_MEMORY;
+		goto out;
+	}
+
+	filename_ptr = cmd_ptr + count;
+	res = sscanf(buf, "%s %s", cmd_ptr, filename_ptr);
+	if (res != 2) {
+		err = FAIL_PARAMETERS;
+		goto out1;
+	}
+
+	for (i = 0; flash_command_table[i].code != FLASH_CMD_NONE; i++) {
+		if (!memcmp(flash_command_table[i].command,
+				 cmd_ptr, strlen(cmd_ptr))) {
+			flash_command = flash_command_table[i].code;
+			break;
+		}
+	}
+	if (flash_command == FLASH_CMD_NONE) {
+		err = FAIL_PARAMETERS;
+		goto out1;
+	}
+
+	if (pm8001_ha->fw_status == FLASH_IN_PROGRESS) {
+		err = FLASH_IN_PROGRESS;
+		goto out1;
+	}
+	err = request_firmware(&pm8001_ha->fw_image,
+			       filename_ptr,
+			       pm8001_ha->dev);
+
+	if (err) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Failed to load firmware image file %s,"
+			" error %d\n", filename_ptr, err));
+		err = FAIL_OPEN_BIOS_FILE;
+		goto out1;
+	}
+
+	switch (flash_command) {
+	case FLASH_CMD_UPDATE:
+		pm8001_ha->fw_status = FLASH_IN_PROGRESS;
+		err = pm8001_update_flash(pm8001_ha);
+		break;
+	case FLASH_CMD_SET_NVMD:
+		pm8001_ha->fw_status = FLASH_IN_PROGRESS;
+		err = pm8001_set_nvmd(pm8001_ha);
+		break;
+	default:
+		pm8001_ha->fw_status = FAIL_PARAMETERS;
+		err = FAIL_PARAMETERS;
+		break;
+	}
+	release_firmware(pm8001_ha->fw_image);
+out1:
+	kfree(cmd_ptr);
+out:
+	pm8001_ha->fw_status = err;
+
+	if (!err)
+		return count;
+	else
+		return -err;
+}
+
+static ssize_t pm8001_show_update_fw(struct device *cdev,
+				     struct device_attribute *attr, char *buf)
+{
+	int i;
+	struct Scsi_Host *shost = class_to_shost(cdev);
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+
+	for (i = 0; flash_error_table[i].err_code != 0; i++) {
+		if (flash_error_table[i].err_code == pm8001_ha->fw_status)
+			break;
+	}
+	if (pm8001_ha->fw_status != FLASH_IN_PROGRESS)
+		pm8001_ha->fw_status = FLASH_OK;
+
+	return snprintf(buf, PAGE_SIZE, "status=%x %s\n",
+			flash_error_table[i].err_code,
+			flash_error_table[i].reason);
+}
+
+static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUGO,
+	pm8001_show_update_fw, pm8001_store_update_fw);
+struct device_attribute *pm8001_host_attrs[] = {
+	&dev_attr_interface_rev,
+	&dev_attr_fw_version,
+	&dev_attr_update_fw,
+	&dev_attr_aap_log,
+	&dev_attr_iop_log,
+	&dev_attr_max_out_io,
+	&dev_attr_max_devices,
+	&dev_attr_max_sg_list,
+	&dev_attr_sas_spec_support,
+	&dev_attr_logging_level,
+	&dev_attr_host_sas_address,
+	NULL,
+};
+
diff --git a/drivers/scsi/pm8001/pm8001_ctl.h b/drivers/scsi/pm8001/pm8001_ctl.h
new file mode 100644
index 000000000000..22644de26399
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_ctl.h
@@ -0,0 +1,67 @@
+ /*
+  * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+  *
+  * Copyright (c) 2008-2009 USI Co., Ltd.
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions, and the following disclaimer,
+  *    without modification.
+  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+  *    substantially similar to the "NO WARRANTY" disclaimer below
+  *    ("Disclaimer") and any redistribution must be conditioned upon
+  *    including a substantially similar Disclaimer requirement for further
+  *    binary redistribution.
+  * 3. Neither the names of the above-listed copyright holders nor the names
+  *    of any contributors may be used to endorse or promote products derived
+  *    from this software without specific prior written permission.
+  *
+  * Alternatively, this software may be distributed under the terms of the
+  * GNU General Public License ("GPL") version 2 as published by the Free
+  * Software Foundation.
+  *
+  * NO WARRANTY
+  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGES.
+  *
+  */
+
+#ifndef PM8001_CTL_H_INCLUDED
+#define PM8001_CTL_H_INCLUDED
+
+#define IOCTL_BUF_SIZE		4096
+#define HEADER_LEN			28
+#define SIZE_OFFSET			16
+
+struct pm8001_ioctl_payload {
+	u32	signature;
+	u16	major_function;
+	u16	minor_function;
+	u16	length;
+	u16	status;
+	u16	offset;
+	u16	id;
+	u8	func_specific[1];
+};
+
+#define FLASH_OK                        0x000000
+#define FAIL_OPEN_BIOS_FILE             0x000100
+#define FAIL_FILE_SIZE                  0x000a00
+#define FAIL_PARAMETERS                 0x000b00
+#define FAIL_OUT_MEMORY                 0x000c00
+#define FLASH_IN_PROGRESS               0x001000
+
+#endif /* PM8001_CTL_H_INCLUDED */
+
diff --git a/drivers/scsi/pm8001/pm8001_defs.h b/drivers/scsi/pm8001/pm8001_defs.h
new file mode 100644
index 000000000000..944afada61ee
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_defs.h
@@ -0,0 +1,112 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef _PM8001_DEFS_H_
+#define _PM8001_DEFS_H_
+
+enum chip_flavors {
+	chip_8001,
+};
+#define USI_MAX_MEMCNT			9
+#define PM8001_MAX_DMA_SG		SG_ALL
+enum phy_speed {
+	PHY_SPEED_15 = 0x01,
+	PHY_SPEED_30 = 0x02,
+	PHY_SPEED_60 = 0x04,
+};
+
+enum data_direction {
+	DATA_DIR_NONE = 0x0,	/* NO TRANSFER */
+	DATA_DIR_IN = 0x01,	/* INBOUND */
+	DATA_DIR_OUT = 0x02,	/* OUTBOUND */
+	DATA_DIR_BYRECIPIENT = 0x04, /* UNSPECIFIED */
+};
+
+enum port_type {
+	PORT_TYPE_SAS = (1L << 1),
+	PORT_TYPE_SATA = (1L << 0),
+};
+
+/* driver compile-time configuration */
+#define	PM8001_MAX_CCB		 512	/* max ccbs supported */
+#define	PM8001_MAX_INB_NUM	 1
+#define	PM8001_MAX_OUTB_NUM	 1
+#define	PM8001_CAN_QUEUE	 128	/* SCSI Queue depth */
+
+/* unchangeable hardware details */
+#define	PM8001_MAX_PHYS		 8	/* max. possible phys */
+#define	PM8001_MAX_PORTS	 8	/* max. possible ports */
+#define	PM8001_MAX_DEVICES	 1024	/* max supported device */
+
+enum memory_region_num {
+	AAP1 = 0x0, /* application acceleration processor */
+	IOP,	    /* IO processor */
+	CI,	    /* consumer index */
+	PI,	    /* producer index */
+	IB,	    /* inbound queue */
+	OB,	    /* outbound queue */
+	NVMD,	    /* NVM device */
+	DEV_MEM,    /* memory for devices */
+	CCB_MEM,    /* memory for command control block */
+};
+#define	PM8001_EVENT_LOG_SIZE	 (128 * 1024)
+
+/*error code*/
+enum mpi_err {
+	MPI_IO_STATUS_SUCCESS = 0x0,
+	MPI_IO_STATUS_BUSY = 0x01,
+	MPI_IO_STATUS_FAIL = 0x02,
+};
+
+/**
+ * Phy Control constants
+ */
+enum phy_control_type {
+	PHY_LINK_RESET = 0x01,
+	PHY_HARD_RESET = 0x02,
+	PHY_NOTIFY_ENABLE_SPINUP = 0x10,
+};
+
+enum pm8001_hba_info_flags {
+	PM8001F_INIT_TIME	= (1U << 0),
+	PM8001F_RUN_TIME	= (1U << 1),
+};
+
+#endif
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
new file mode 100644
index 000000000000..aa5756fe0574
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -0,0 +1,4371 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+ #include "pm8001_sas.h"
+ #include "pm8001_hwi.h"
+ #include "pm8001_chips.h"
+ #include "pm8001_ctl.h"
+
+/**
+ * read_main_config_table - read the configure table and save it.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit read_main_config_table(struct pm8001_hba_info *pm8001_ha)
+{
+	void __iomem *address = pm8001_ha->main_cfg_tbl_addr;
+	pm8001_ha->main_cfg_tbl.signature	= pm8001_mr32(address, 0x00);
+	pm8001_ha->main_cfg_tbl.interface_rev	= pm8001_mr32(address, 0x04);
+	pm8001_ha->main_cfg_tbl.firmware_rev	= pm8001_mr32(address, 0x08);
+	pm8001_ha->main_cfg_tbl.max_out_io	= pm8001_mr32(address, 0x0C);
+	pm8001_ha->main_cfg_tbl.max_sgl		= pm8001_mr32(address, 0x10);
+	pm8001_ha->main_cfg_tbl.ctrl_cap_flag	= pm8001_mr32(address, 0x14);
+	pm8001_ha->main_cfg_tbl.gst_offset	= pm8001_mr32(address, 0x18);
+	pm8001_ha->main_cfg_tbl.inbound_queue_offset =
+		pm8001_mr32(address, 0x1C);
+	pm8001_ha->main_cfg_tbl.outbound_queue_offset =
+		pm8001_mr32(address, 0x20);
+	pm8001_ha->main_cfg_tbl.hda_mode_flag	=
+		pm8001_mr32(address, MAIN_HDA_FLAGS_OFFSET);
+
+	/* read analog Setting offset from the configuration table */
+	pm8001_ha->main_cfg_tbl.anolog_setup_table_offset =
+		pm8001_mr32(address, MAIN_ANALOG_SETUP_OFFSET);
+
+	/* read Error Dump Offset and Length */
+	pm8001_ha->main_cfg_tbl.fatal_err_dump_offset0 =
+		pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP0_OFFSET);
+	pm8001_ha->main_cfg_tbl.fatal_err_dump_length0 =
+		pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP0_LENGTH);
+	pm8001_ha->main_cfg_tbl.fatal_err_dump_offset1 =
+		pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP1_OFFSET);
+	pm8001_ha->main_cfg_tbl.fatal_err_dump_length1 =
+		pm8001_mr32(address, MAIN_FATAL_ERROR_RDUMP1_LENGTH);
+}
+
+/**
+ * read_general_status_table - read the general status table and save it.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+read_general_status_table(struct pm8001_hba_info *pm8001_ha)
+{
+	void __iomem *address = pm8001_ha->general_stat_tbl_addr;
+	pm8001_ha->gs_tbl.gst_len_mpistate	= pm8001_mr32(address, 0x00);
+	pm8001_ha->gs_tbl.iq_freeze_state0	= pm8001_mr32(address, 0x04);
+	pm8001_ha->gs_tbl.iq_freeze_state1	= pm8001_mr32(address, 0x08);
+	pm8001_ha->gs_tbl.msgu_tcnt		= pm8001_mr32(address, 0x0C);
+	pm8001_ha->gs_tbl.iop_tcnt		= pm8001_mr32(address, 0x10);
+	pm8001_ha->gs_tbl.reserved		= pm8001_mr32(address, 0x14);
+	pm8001_ha->gs_tbl.phy_state[0]	= pm8001_mr32(address, 0x18);
+	pm8001_ha->gs_tbl.phy_state[1]	= pm8001_mr32(address, 0x1C);
+	pm8001_ha->gs_tbl.phy_state[2]	= pm8001_mr32(address, 0x20);
+	pm8001_ha->gs_tbl.phy_state[3]	= pm8001_mr32(address, 0x24);
+	pm8001_ha->gs_tbl.phy_state[4]	= pm8001_mr32(address, 0x28);
+	pm8001_ha->gs_tbl.phy_state[5]	= pm8001_mr32(address, 0x2C);
+	pm8001_ha->gs_tbl.phy_state[6]	= pm8001_mr32(address, 0x30);
+	pm8001_ha->gs_tbl.phy_state[7]	= pm8001_mr32(address, 0x34);
+	pm8001_ha->gs_tbl.reserved1		= pm8001_mr32(address, 0x38);
+	pm8001_ha->gs_tbl.reserved2		= pm8001_mr32(address, 0x3C);
+	pm8001_ha->gs_tbl.reserved3		= pm8001_mr32(address, 0x40);
+	pm8001_ha->gs_tbl.recover_err_info[0]	= pm8001_mr32(address, 0x44);
+	pm8001_ha->gs_tbl.recover_err_info[1]	= pm8001_mr32(address, 0x48);
+	pm8001_ha->gs_tbl.recover_err_info[2]	= pm8001_mr32(address, 0x4C);
+	pm8001_ha->gs_tbl.recover_err_info[3]	= pm8001_mr32(address, 0x50);
+	pm8001_ha->gs_tbl.recover_err_info[4]	= pm8001_mr32(address, 0x54);
+	pm8001_ha->gs_tbl.recover_err_info[5]	= pm8001_mr32(address, 0x58);
+	pm8001_ha->gs_tbl.recover_err_info[6]	= pm8001_mr32(address, 0x5C);
+	pm8001_ha->gs_tbl.recover_err_info[7]	= pm8001_mr32(address, 0x60);
+}
+
+/**
+ * read_inbnd_queue_table - read the inbound queue table and save it.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+read_inbnd_queue_table(struct pm8001_hba_info *pm8001_ha)
+{
+	int inbQ_num = 1;
+	int i;
+	void __iomem *address = pm8001_ha->inbnd_q_tbl_addr;
+	for (i = 0; i < inbQ_num; i++) {
+		u32 offset = i * 0x24;
+		pm8001_ha->inbnd_q_tbl[i].pi_pci_bar =
+		      get_pci_bar_index(pm8001_mr32(address, (offset + 0x14)));
+		pm8001_ha->inbnd_q_tbl[i].pi_offset =
+			pm8001_mr32(address, (offset + 0x18));
+	}
+}
+
+/**
+ * read_outbnd_queue_table - read the outbound queue table and save it.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+read_outbnd_queue_table(struct pm8001_hba_info *pm8001_ha)
+{
+	int outbQ_num = 1;
+	int i;
+	void __iomem *address = pm8001_ha->outbnd_q_tbl_addr;
+	for (i = 0; i < outbQ_num; i++) {
+		u32 offset = i * 0x24;
+		pm8001_ha->outbnd_q_tbl[i].ci_pci_bar =
+		      get_pci_bar_index(pm8001_mr32(address, (offset + 0x14)));
+		pm8001_ha->outbnd_q_tbl[i].ci_offset =
+			pm8001_mr32(address, (offset + 0x18));
+	}
+}
+
+/**
+ * init_default_table_values - init the default table.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+init_default_table_values(struct pm8001_hba_info *pm8001_ha)
+{
+	int qn = 1;
+	int i;
+	u32 offsetib, offsetob;
+	void __iomem *addressib = pm8001_ha->inbnd_q_tbl_addr;
+	void __iomem *addressob = pm8001_ha->outbnd_q_tbl_addr;
+
+	pm8001_ha->main_cfg_tbl.inbound_q_nppd_hppd			= 0;
+	pm8001_ha->main_cfg_tbl.outbound_hw_event_pid0_3 		= 0;
+	pm8001_ha->main_cfg_tbl.outbound_hw_event_pid4_7		= 0;
+	pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid0_3		= 0;
+	pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid4_7		= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid0_3	= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid4_7	= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid0_3	= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid4_7	= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid0_3	= 0;
+	pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid4_7	= 0;
+
+	pm8001_ha->main_cfg_tbl.upper_event_log_addr		=
+		pm8001_ha->memoryMap.region[AAP1].phys_addr_hi;
+	pm8001_ha->main_cfg_tbl.lower_event_log_addr		=
+		pm8001_ha->memoryMap.region[AAP1].phys_addr_lo;
+	pm8001_ha->main_cfg_tbl.event_log_size	= PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->main_cfg_tbl.event_log_option		= 0x01;
+	pm8001_ha->main_cfg_tbl.upper_iop_event_log_addr	=
+		pm8001_ha->memoryMap.region[IOP].phys_addr_hi;
+	pm8001_ha->main_cfg_tbl.lower_iop_event_log_addr	=
+		pm8001_ha->memoryMap.region[IOP].phys_addr_lo;
+	pm8001_ha->main_cfg_tbl.iop_event_log_size	= PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->main_cfg_tbl.iop_event_log_option		= 0x01;
+	pm8001_ha->main_cfg_tbl.fatal_err_interrupt		= 0x01;
+	for (i = 0; i < qn; i++) {
+		pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt	=
+			0x00000100 | (0x00000040 << 16) | (0x00<<30);
+		pm8001_ha->inbnd_q_tbl[i].upper_base_addr	=
+			pm8001_ha->memoryMap.region[IB].phys_addr_hi;
+		pm8001_ha->inbnd_q_tbl[i].lower_base_addr	=
+		pm8001_ha->memoryMap.region[IB].phys_addr_lo;
+		pm8001_ha->inbnd_q_tbl[i].base_virt		=
+			(u8 *)pm8001_ha->memoryMap.region[IB].virt_ptr;
+		pm8001_ha->inbnd_q_tbl[i].total_length		=
+			pm8001_ha->memoryMap.region[IB].total_len;
+		pm8001_ha->inbnd_q_tbl[i].ci_upper_base_addr	=
+			pm8001_ha->memoryMap.region[CI].phys_addr_hi;
+		pm8001_ha->inbnd_q_tbl[i].ci_lower_base_addr	=
+			pm8001_ha->memoryMap.region[CI].phys_addr_lo;
+		pm8001_ha->inbnd_q_tbl[i].ci_virt		=
+			pm8001_ha->memoryMap.region[CI].virt_ptr;
+		offsetib = i * 0x20;
+		pm8001_ha->inbnd_q_tbl[i].pi_pci_bar		=
+			get_pci_bar_index(pm8001_mr32(addressib,
+				(offsetib + 0x14)));
+		pm8001_ha->inbnd_q_tbl[i].pi_offset		=
+			pm8001_mr32(addressib, (offsetib + 0x18));
+		pm8001_ha->inbnd_q_tbl[i].producer_idx		= 0;
+		pm8001_ha->inbnd_q_tbl[i].consumer_index	= 0;
+	}
+	for (i = 0; i < qn; i++) {
+		pm8001_ha->outbnd_q_tbl[i].element_size_cnt	=
+			256 | (64 << 16) | (1<<30);
+		pm8001_ha->outbnd_q_tbl[i].upper_base_addr	=
+			pm8001_ha->memoryMap.region[OB].phys_addr_hi;
+		pm8001_ha->outbnd_q_tbl[i].lower_base_addr	=
+			pm8001_ha->memoryMap.region[OB].phys_addr_lo;
+		pm8001_ha->outbnd_q_tbl[i].base_virt		=
+			(u8 *)pm8001_ha->memoryMap.region[OB].virt_ptr;
+		pm8001_ha->outbnd_q_tbl[i].total_length		=
+			pm8001_ha->memoryMap.region[OB].total_len;
+		pm8001_ha->outbnd_q_tbl[i].pi_upper_base_addr	=
+			pm8001_ha->memoryMap.region[PI].phys_addr_hi;
+		pm8001_ha->outbnd_q_tbl[i].pi_lower_base_addr	=
+			pm8001_ha->memoryMap.region[PI].phys_addr_lo;
+		pm8001_ha->outbnd_q_tbl[i].interrup_vec_cnt_delay	=
+			0 | (0 << 16) | (0 << 24);
+		pm8001_ha->outbnd_q_tbl[i].pi_virt		=
+			pm8001_ha->memoryMap.region[PI].virt_ptr;
+		offsetob = i * 0x24;
+		pm8001_ha->outbnd_q_tbl[i].ci_pci_bar		=
+			get_pci_bar_index(pm8001_mr32(addressob,
+			offsetob + 0x14));
+		pm8001_ha->outbnd_q_tbl[i].ci_offset		=
+			pm8001_mr32(addressob, (offsetob + 0x18));
+		pm8001_ha->outbnd_q_tbl[i].consumer_idx		= 0;
+		pm8001_ha->outbnd_q_tbl[i].producer_index	= 0;
+	}
+}
+
+/**
+ * update_main_config_table - update the main default table to the HBA.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+update_main_config_table(struct pm8001_hba_info *pm8001_ha)
+{
+	void __iomem *address = pm8001_ha->main_cfg_tbl_addr;
+	pm8001_mw32(address, 0x24,
+		pm8001_ha->main_cfg_tbl.inbound_q_nppd_hppd);
+	pm8001_mw32(address, 0x28,
+		pm8001_ha->main_cfg_tbl.outbound_hw_event_pid0_3);
+	pm8001_mw32(address, 0x2C,
+		pm8001_ha->main_cfg_tbl.outbound_hw_event_pid4_7);
+	pm8001_mw32(address, 0x30,
+		pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid0_3);
+	pm8001_mw32(address, 0x34,
+		pm8001_ha->main_cfg_tbl.outbound_ncq_event_pid4_7);
+	pm8001_mw32(address, 0x38,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid0_3);
+	pm8001_mw32(address, 0x3C,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_ITNexus_event_pid4_7);
+	pm8001_mw32(address, 0x40,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid0_3);
+	pm8001_mw32(address, 0x44,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_ssp_event_pid4_7);
+	pm8001_mw32(address, 0x48,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid0_3);
+	pm8001_mw32(address, 0x4C,
+		pm8001_ha->main_cfg_tbl.outbound_tgt_smp_event_pid4_7);
+	pm8001_mw32(address, 0x50,
+		pm8001_ha->main_cfg_tbl.upper_event_log_addr);
+	pm8001_mw32(address, 0x54,
+		pm8001_ha->main_cfg_tbl.lower_event_log_addr);
+	pm8001_mw32(address, 0x58, pm8001_ha->main_cfg_tbl.event_log_size);
+	pm8001_mw32(address, 0x5C, pm8001_ha->main_cfg_tbl.event_log_option);
+	pm8001_mw32(address, 0x60,
+		pm8001_ha->main_cfg_tbl.upper_iop_event_log_addr);
+	pm8001_mw32(address, 0x64,
+		pm8001_ha->main_cfg_tbl.lower_iop_event_log_addr);
+	pm8001_mw32(address, 0x68, pm8001_ha->main_cfg_tbl.iop_event_log_size);
+	pm8001_mw32(address, 0x6C,
+		pm8001_ha->main_cfg_tbl.iop_event_log_option);
+	pm8001_mw32(address, 0x70,
+		pm8001_ha->main_cfg_tbl.fatal_err_interrupt);
+}
+
+/**
+ * update_inbnd_queue_table - update the inbound queue table to the HBA.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+update_inbnd_queue_table(struct pm8001_hba_info *pm8001_ha, int number)
+{
+	void __iomem *address = pm8001_ha->inbnd_q_tbl_addr;
+	u16 offset = number * 0x20;
+	pm8001_mw32(address, offset + 0x00,
+		pm8001_ha->inbnd_q_tbl[number].element_pri_size_cnt);
+	pm8001_mw32(address, offset + 0x04,
+		pm8001_ha->inbnd_q_tbl[number].upper_base_addr);
+	pm8001_mw32(address, offset + 0x08,
+		pm8001_ha->inbnd_q_tbl[number].lower_base_addr);
+	pm8001_mw32(address, offset + 0x0C,
+		pm8001_ha->inbnd_q_tbl[number].ci_upper_base_addr);
+	pm8001_mw32(address, offset + 0x10,
+		pm8001_ha->inbnd_q_tbl[number].ci_lower_base_addr);
+}
+
+/**
+ * update_outbnd_queue_table - update the outbound queue table to the HBA.
+ * @pm8001_ha: our hba card information
+ */
+static void __devinit
+update_outbnd_queue_table(struct pm8001_hba_info *pm8001_ha, int number)
+{
+	void __iomem *address = pm8001_ha->outbnd_q_tbl_addr;
+	u16 offset = number * 0x24;
+	pm8001_mw32(address, offset + 0x00,
+		pm8001_ha->outbnd_q_tbl[number].element_size_cnt);
+	pm8001_mw32(address, offset + 0x04,
+		pm8001_ha->outbnd_q_tbl[number].upper_base_addr);
+	pm8001_mw32(address, offset + 0x08,
+		pm8001_ha->outbnd_q_tbl[number].lower_base_addr);
+	pm8001_mw32(address, offset + 0x0C,
+		pm8001_ha->outbnd_q_tbl[number].pi_upper_base_addr);
+	pm8001_mw32(address, offset + 0x10,
+		pm8001_ha->outbnd_q_tbl[number].pi_lower_base_addr);
+	pm8001_mw32(address, offset + 0x1C,
+		pm8001_ha->outbnd_q_tbl[number].interrup_vec_cnt_delay);
+}
+
+/**
+ * bar4_shift - function is called to shift BAR base address
+ * @pm8001_ha : our hba card infomation
+ * @shiftValue : shifting value in memory bar.
+ */
+static u32 bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shiftValue)
+{
+	u32 regVal;
+	u32 max_wait_count;
+
+	/* program the inbound AXI translation Lower Address */
+	pm8001_cw32(pm8001_ha, 1, SPC_IBW_AXI_TRANSLATION_LOW, shiftValue);
+
+	/* confirm the setting is written */
+	max_wait_count = 1 * 1000 * 1000;  /* 1 sec */
+	do {
+		udelay(1);
+		regVal = pm8001_cr32(pm8001_ha, 1, SPC_IBW_AXI_TRANSLATION_LOW);
+	} while ((regVal != shiftValue) && (--max_wait_count));
+
+	if (!max_wait_count) {
+		PM8001_INIT_DBG(pm8001_ha,
+			pm8001_printk("TIMEOUT:SPC_IBW_AXI_TRANSLATION_LOW"
+			" = 0x%x\n", regVal));
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * mpi_set_phys_g3_with_ssc
+ * @pm8001_ha: our hba card information
+ * @SSCbit: set SSCbit to 0 to disable all phys ssc; 1 to enable all phys ssc.
+ */
+static void __devinit
+mpi_set_phys_g3_with_ssc(struct pm8001_hba_info *pm8001_ha, u32 SSCbit)
+{
+	u32 offset;
+	u32 value;
+	u32 i;
+
+#define SAS2_SETTINGS_LOCAL_PHY_0_3_SHIFT_ADDR 0x00030000
+#define SAS2_SETTINGS_LOCAL_PHY_4_7_SHIFT_ADDR 0x00040000
+#define SAS2_SETTINGS_LOCAL_PHY_0_3_OFFSET 0x1074
+#define SAS2_SETTINGS_LOCAL_PHY_4_7_OFFSET 0x1074
+#define PHY_SSC_BIT_SHIFT 13
+
+   /*
+    * Using shifted destination address 0x3_0000:0x1074 + 0x4000*N (N=0:3)
+    * Using shifted destination address 0x4_0000:0x1074 + 0x4000*(N-4) (N=4:7)
+    */
+	if (-1 == bar4_shift(pm8001_ha, SAS2_SETTINGS_LOCAL_PHY_0_3_SHIFT_ADDR))
+		return;
+	/* set SSC bit of PHY 0 - 3 */
+	for (i = 0; i < 4; i++) {
+		offset = SAS2_SETTINGS_LOCAL_PHY_0_3_OFFSET + 0x4000 * i;
+		value = pm8001_cr32(pm8001_ha, 2, offset);
+		if (SSCbit)
+			value = value | (0x00000001 << PHY_SSC_BIT_SHIFT);
+		else
+			value = value & (~(0x00000001<<PHY_SSC_BIT_SHIFT));
+		pm8001_cw32(pm8001_ha, 2, offset, value);
+	}
+
+	/* shift membase 3 for SAS2_SETTINGS_LOCAL_PHY 4 - 7 */
+	if (-1 == bar4_shift(pm8001_ha, SAS2_SETTINGS_LOCAL_PHY_4_7_SHIFT_ADDR))
+		return;
+
+	/* set SSC bit of PHY 4 - 7 */
+	for (i = 4; i < 8; i++) {
+		offset = SAS2_SETTINGS_LOCAL_PHY_4_7_OFFSET + 0x4000 * (i-4);
+		value = pm8001_cr32(pm8001_ha, 2, offset);
+		if (SSCbit)
+			value = value | (0x00000001 << PHY_SSC_BIT_SHIFT);
+		else
+			value = value & (~(0x00000001<<PHY_SSC_BIT_SHIFT));
+		pm8001_cw32(pm8001_ha, 2, offset, value);
+	}
+
+	/*set the shifted destination address to 0x0 to avoid error operation */
+	bar4_shift(pm8001_ha, 0x0);
+	return;
+}
+
+/**
+ * mpi_set_open_retry_interval_reg
+ * @pm8001_ha: our hba card information
+ * @interval - interval time for each OPEN_REJECT (RETRY). The units are in 1us.
+ */
+static void __devinit
+mpi_set_open_retry_interval_reg(struct pm8001_hba_info *pm8001_ha,
+				u32 interval)
+{
+	u32 offset;
+	u32 value;
+	u32 i;
+
+#define OPEN_RETRY_INTERVAL_PHY_0_3_SHIFT_ADDR 0x00030000
+#define OPEN_RETRY_INTERVAL_PHY_4_7_SHIFT_ADDR 0x00040000
+#define OPEN_RETRY_INTERVAL_PHY_0_3_OFFSET 0x30B4
+#define OPEN_RETRY_INTERVAL_PHY_4_7_OFFSET 0x30B4
+#define OPEN_RETRY_INTERVAL_REG_MASK 0x0000FFFF
+
+	value = interval & OPEN_RETRY_INTERVAL_REG_MASK;
+	/* shift bar and set the OPEN_REJECT(RETRY) interval time of PHY 0 -3.*/
+	if (-1 == bar4_shift(pm8001_ha,
+			     OPEN_RETRY_INTERVAL_PHY_0_3_SHIFT_ADDR))
+		return;
+	for (i = 0; i < 4; i++) {
+		offset = OPEN_RETRY_INTERVAL_PHY_0_3_OFFSET + 0x4000 * i;
+		pm8001_cw32(pm8001_ha, 2, offset, value);
+	}
+
+	if (-1 == bar4_shift(pm8001_ha,
+			     OPEN_RETRY_INTERVAL_PHY_4_7_SHIFT_ADDR))
+		return;
+	for (i = 4; i < 8; i++) {
+		offset = OPEN_RETRY_INTERVAL_PHY_4_7_OFFSET + 0x4000 * (i-4);
+		pm8001_cw32(pm8001_ha, 2, offset, value);
+	}
+	/*set the shifted destination address to 0x0 to avoid error operation */
+	bar4_shift(pm8001_ha, 0x0);
+	return;
+}
+
+/**
+ * mpi_init_check - check firmware initialization status.
+ * @pm8001_ha: our hba card information
+ */
+static int mpi_init_check(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 max_wait_count;
+	u32 value;
+	u32 gst_len_mpistate;
+	/* Write bit0=1 to Inbound DoorBell Register to tell the SPC FW the
+	table is updated */
+	pm8001_cw32(pm8001_ha, 0, MSGU_IBDB_SET, SPC_MSGU_CFG_TABLE_UPDATE);
+	/* wait until Inbound DoorBell Clear Register toggled */
+	max_wait_count = 1 * 1000 * 1000;/* 1 sec */
+	do {
+		udelay(1);
+		value = pm8001_cr32(pm8001_ha, 0, MSGU_IBDB_SET);
+		value &= SPC_MSGU_CFG_TABLE_UPDATE;
+	} while ((value != 0) && (--max_wait_count));
+
+	if (!max_wait_count)
+		return -1;
+	/* check the MPI-State for initialization */
+	gst_len_mpistate =
+		pm8001_mr32(pm8001_ha->general_stat_tbl_addr,
+		GST_GSTLEN_MPIS_OFFSET);
+	if (GST_MPI_STATE_INIT != (gst_len_mpistate & GST_MPI_STATE_MASK))
+		return -1;
+	/* check MPI Initialization error */
+	gst_len_mpistate = gst_len_mpistate >> 16;
+	if (0x0000 != gst_len_mpistate)
+		return -1;
+	return 0;
+}
+
+/**
+ * check_fw_ready - The LLDD check if the FW is ready, if not, return error.
+ * @pm8001_ha: our hba card information
+ */
+static int check_fw_ready(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 value, value1;
+	u32 max_wait_count;
+	/* check error state */
+	value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+	value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2);
+	/* check AAP error */
+	if (SCRATCH_PAD1_ERR == (value & SCRATCH_PAD_STATE_MASK)) {
+		/* error state */
+		value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0);
+		return -1;
+	}
+
+	/* check IOP error */
+	if (SCRATCH_PAD2_ERR == (value1 & SCRATCH_PAD_STATE_MASK)) {
+		/* error state */
+		value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3);
+		return -1;
+	}
+
+	/* bit 4-31 of scratch pad1 should be zeros if it is not
+	in error state*/
+	if (value & SCRATCH_PAD1_STATE_MASK) {
+		/* error case */
+		pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0);
+		return -1;
+	}
+
+	/* bit 2, 4-31 of scratch pad2 should be zeros if it is not
+	in error state */
+	if (value1 & SCRATCH_PAD2_STATE_MASK) {
+		/* error case */
+		return -1;
+	}
+
+	max_wait_count = 1 * 1000 * 1000;/* 1 sec timeout */
+
+	/* wait until scratch pad 1 and 2 registers in ready state  */
+	do {
+		udelay(1);
+		value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1)
+			& SCRATCH_PAD1_RDY;
+		value1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2)
+			& SCRATCH_PAD2_RDY;
+		if ((--max_wait_count) == 0)
+			return -1;
+	} while ((value != SCRATCH_PAD1_RDY) || (value1 != SCRATCH_PAD2_RDY));
+	return 0;
+}
+
+static void init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha)
+{
+	void __iomem *base_addr;
+	u32	value;
+	u32	offset;
+	u32	pcibar;
+	u32	pcilogic;
+
+	value = pm8001_cr32(pm8001_ha, 0, 0x44);
+	offset = value & 0x03FFFFFF;
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("Scratchpad 0 Offset: %x \n", offset));
+	pcilogic = (value & 0xFC000000) >> 26;
+	pcibar = get_pci_bar_index(pcilogic);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("Scratchpad 0 PCI BAR: %d \n", pcibar));
+	pm8001_ha->main_cfg_tbl_addr = base_addr =
+		pm8001_ha->io_mem[pcibar].memvirtaddr + offset;
+	pm8001_ha->general_stat_tbl_addr =
+		base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x18);
+	pm8001_ha->inbnd_q_tbl_addr =
+		base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x1C);
+	pm8001_ha->outbnd_q_tbl_addr =
+		base_addr + pm8001_cr32(pm8001_ha, pcibar, offset + 0x20);
+}
+
+/**
+ * pm8001_chip_init - the main init function that initialize whole PM8001 chip.
+ * @pm8001_ha: our hba card information
+ */
+static int __devinit pm8001_chip_init(struct pm8001_hba_info *pm8001_ha)
+{
+	/* check the firmware status */
+	if (-1 == check_fw_ready(pm8001_ha)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Firmware is not ready!\n"));
+		return -EBUSY;
+	}
+
+	/* Initialize pci space address eg: mpi offset */
+	init_pci_device_addresses(pm8001_ha);
+	init_default_table_values(pm8001_ha);
+	read_main_config_table(pm8001_ha);
+	read_general_status_table(pm8001_ha);
+	read_inbnd_queue_table(pm8001_ha);
+	read_outbnd_queue_table(pm8001_ha);
+	/* update main config table ,inbound table and outbound table */
+	update_main_config_table(pm8001_ha);
+	update_inbnd_queue_table(pm8001_ha, 0);
+	update_outbnd_queue_table(pm8001_ha, 0);
+	mpi_set_phys_g3_with_ssc(pm8001_ha, 0);
+	mpi_set_open_retry_interval_reg(pm8001_ha, 7);
+	/* notify firmware update finished and check initialization status */
+	if (0 == mpi_init_check(pm8001_ha)) {
+		PM8001_INIT_DBG(pm8001_ha,
+			pm8001_printk("MPI initialize successful!\n"));
+	} else
+		return -EBUSY;
+	/*This register is a 16-bit timer with a resolution of 1us. This is the
+	timer used for interrupt delay/coalescing in the PCIe Application Layer.
+	Zero is not a valid value. A value of 1 in the register will cause the
+	interrupts to be normal. A value greater than 1 will cause coalescing
+	delays.*/
+	pm8001_cw32(pm8001_ha, 1, 0x0033c0, 0x1);
+	pm8001_cw32(pm8001_ha, 1, 0x0033c4, 0x0);
+	return 0;
+}
+
+static int mpi_uninit_check(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 max_wait_count;
+	u32 value;
+	u32 gst_len_mpistate;
+	init_pci_device_addresses(pm8001_ha);
+	/* Write bit1=1 to Inbound DoorBell Register to tell the SPC FW the
+	table is stop */
+	pm8001_cw32(pm8001_ha, 0, MSGU_IBDB_SET, SPC_MSGU_CFG_TABLE_RESET);
+
+	/* wait until Inbound DoorBell Clear Register toggled */
+	max_wait_count = 1 * 1000 * 1000;/* 1 sec */
+	do {
+		udelay(1);
+		value = pm8001_cr32(pm8001_ha, 0, MSGU_IBDB_SET);
+		value &= SPC_MSGU_CFG_TABLE_RESET;
+	} while ((value != 0) && (--max_wait_count));
+
+	if (!max_wait_count) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("TIMEOUT:IBDB value/=0x%x\n", value));
+		return -1;
+	}
+
+	/* check the MPI-State for termination in progress */
+	/* wait until Inbound DoorBell Clear Register toggled */
+	max_wait_count = 1 * 1000 * 1000;  /* 1 sec */
+	do {
+		udelay(1);
+		gst_len_mpistate =
+			pm8001_mr32(pm8001_ha->general_stat_tbl_addr,
+			GST_GSTLEN_MPIS_OFFSET);
+		if (GST_MPI_STATE_UNINIT ==
+			(gst_len_mpistate & GST_MPI_STATE_MASK))
+			break;
+	} while (--max_wait_count);
+	if (!max_wait_count) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk(" TIME OUT MPI State = 0x%x\n",
+				gst_len_mpistate & GST_MPI_STATE_MASK));
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * soft_reset_ready_check - Function to check FW is ready for soft reset.
+ * @pm8001_ha: our hba card information
+ */
+static u32 soft_reset_ready_check(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 regVal, regVal1, regVal2;
+	if (mpi_uninit_check(pm8001_ha) != 0) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("MPI state is not ready\n"));
+		return -1;
+	}
+	/* read the scratch pad 2 register bit 2 */
+	regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2)
+		& SCRATCH_PAD2_FWRDY_RST;
+	if (regVal == SCRATCH_PAD2_FWRDY_RST) {
+		PM8001_INIT_DBG(pm8001_ha,
+			pm8001_printk("Firmware is ready for reset .\n"));
+	} else {
+	/* Trigger NMI twice via RB6 */
+		if (-1 == bar4_shift(pm8001_ha, RB6_ACCESS_REG)) {
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("Shift Bar4 to 0x%x failed\n",
+					RB6_ACCESS_REG));
+			return -1;
+		}
+		pm8001_cw32(pm8001_ha, 2, SPC_RB6_OFFSET,
+			RB6_MAGIC_NUMBER_RST);
+		pm8001_cw32(pm8001_ha, 2, SPC_RB6_OFFSET, RB6_MAGIC_NUMBER_RST);
+		/* wait for 100 ms */
+		mdelay(100);
+		regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2) &
+			SCRATCH_PAD2_FWRDY_RST;
+		if (regVal != SCRATCH_PAD2_FWRDY_RST) {
+			regVal1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+			regVal2 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2);
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("TIMEOUT:MSGU_SCRATCH_PAD1"
+				"=0x%x, MSGU_SCRATCH_PAD2=0x%x\n",
+				regVal1, regVal2));
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD0 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0)));
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD3 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3)));
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/**
+ * pm8001_chip_soft_rst - soft reset the PM8001 chip, so that the clear all
+ * the FW register status to the originated status.
+ * @pm8001_ha: our hba card information
+ * @signature: signature in host scratch pad0 register.
+ */
+static int
+pm8001_chip_soft_rst(struct pm8001_hba_info *pm8001_ha, u32 signature)
+{
+	u32	regVal, toggleVal;
+	u32	max_wait_count;
+	u32	regVal1, regVal2, regVal3;
+
+	/* step1: Check FW is ready for soft reset */
+	if (soft_reset_ready_check(pm8001_ha) != 0) {
+		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("FW is not ready\n"));
+		return -1;
+	}
+
+	/* step 2: clear NMI status register on AAP1 and IOP, write the same
+	value to clear */
+	/* map 0x60000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, MBIC_AAP1_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Shift Bar4 to 0x%x failed\n",
+			MBIC_AAP1_ADDR_BASE));
+		return -1;
+	}
+	regVal = pm8001_cr32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_IOP);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("MBIC - NMI Enable VPE0 (IOP)= 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_IOP, 0x0);
+	/* map 0x70000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, MBIC_IOP_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Shift Bar4 to 0x%x failed\n",
+			MBIC_IOP_ADDR_BASE));
+		return -1;
+	}
+	regVal = pm8001_cr32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_AAP1);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("MBIC - NMI Enable VPE0 (AAP1)= 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 2, MBIC_NMI_ENABLE_VPE0_AAP1, 0x0);
+
+	regVal = pm8001_cr32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT_ENABLE);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("PCIE -Event Interrupt Enable = 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT_ENABLE, 0x0);
+
+	regVal = pm8001_cr32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("PCIE - Event Interrupt  = 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 1, PCIE_EVENT_INTERRUPT, regVal);
+
+	regVal = pm8001_cr32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT_ENABLE);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("PCIE -Error Interrupt Enable = 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT_ENABLE, 0x0);
+
+	regVal = pm8001_cr32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("PCIE - Error Interrupt = 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 1, PCIE_ERROR_INTERRUPT, regVal);
+
+	/* read the scratch pad 1 register bit 2 */
+	regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1)
+		& SCRATCH_PAD1_RST;
+	toggleVal = regVal ^ SCRATCH_PAD1_RST;
+
+	/* set signature in host scratch pad0 register to tell SPC that the
+	host performs the soft reset */
+	pm8001_cw32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_0, signature);
+
+	/* read required registers for confirmming */
+	/* map 0x0700000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, GSM_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Shift Bar4 to 0x%x failed\n",
+			GSM_ADDR_BASE));
+		return -1;
+	}
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x0(0x00007b88)-GSM Configuration and"
+		" Reset = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET)));
+
+	/* step 3: host read GSM Configuration and Reset register */
+	regVal = pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET);
+	/* Put those bits to low */
+	/* GSM XCBI offset = 0x70 0000
+	0x00 Bit 13 COM_SLV_SW_RSTB 1
+	0x00 Bit 12 QSSP_SW_RSTB 1
+	0x00 Bit 11 RAAE_SW_RSTB 1
+	0x00 Bit 9 RB_1_SW_RSTB 1
+	0x00 Bit 8 SM_SW_RSTB 1
+	*/
+	regVal &= ~(0x00003b00);
+	/* host write GSM Configuration and Reset register */
+	pm8001_cw32(pm8001_ha, 2, GSM_CONFIG_RESET, regVal);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x0 (0x00007b88 ==> 0x00004088) - GSM "
+		"Configuration and Reset is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET)));
+
+	/* step 4: */
+	/* disable GSM - Read Address Parity Check */
+	regVal1 = pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700038 - Read Address Parity Check "
+		"Enable = 0x%x\n", regVal1));
+	pm8001_cw32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK, 0x0);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700038 - Read Address Parity Check Enable"
+		"is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK)));
+
+	/* disable GSM - Write Address Parity Check */
+	regVal2 = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700040 - Write Address Parity Check"
+		" Enable = 0x%x\n", regVal2));
+	pm8001_cw32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK, 0x0);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700040 - Write Address Parity Check "
+		"Enable is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK)));
+
+	/* disable GSM - Write Data Parity Check */
+	regVal3 = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x300048 - Write Data Parity Check"
+		" Enable = 0x%x\n", regVal3));
+	pm8001_cw32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK, 0x0);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x300048 - Write Data Parity Check Enable"
+		"is set to = 0x%x\n",
+	pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK)));
+
+	/* step 5: delay 10 usec */
+	udelay(10);
+	/* step 5-b: set GPIO-0 output control to tristate anyway */
+	if (-1 == bar4_shift(pm8001_ha, GPIO_ADDR_BASE)) {
+		PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("Shift Bar4 to 0x%x failed\n",
+				GPIO_ADDR_BASE));
+		return -1;
+	}
+	regVal = pm8001_cr32(pm8001_ha, 2, GPIO_GPIO_0_0UTPUT_CTL_OFFSET);
+		PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("GPIO Output Control Register:"
+				" = 0x%x\n", regVal));
+	/* set GPIO-0 output control to tri-state */
+	regVal &= 0xFFFFFFFC;
+	pm8001_cw32(pm8001_ha, 2, GPIO_GPIO_0_0UTPUT_CTL_OFFSET, regVal);
+
+	/* Step 6: Reset the IOP and AAP1 */
+	/* map 0x00000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, SPC_TOP_LEVEL_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("SPC Shift Bar4 to 0x%x failed\n",
+			SPC_TOP_LEVEL_ADDR_BASE));
+		return -1;
+	}
+	regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("Top Register before resetting IOP/AAP1"
+		":= 0x%x\n", regVal));
+	regVal &= ~(SPC_REG_RESET_PCS_IOP_SS | SPC_REG_RESET_PCS_AAP1_SS);
+	pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal);
+
+	/* step 7: Reset the BDMA/OSSP */
+	regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("Top Register before resetting BDMA/OSSP"
+		": = 0x%x\n", regVal));
+	regVal &= ~(SPC_REG_RESET_BDMA_CORE | SPC_REG_RESET_OSSP);
+	pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal);
+
+	/* step 8: delay 10 usec */
+	udelay(10);
+
+	/* step 9: bring the BDMA and OSSP out of reset */
+	regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("Top Register before bringing up BDMA/OSSP"
+		":= 0x%x\n", regVal));
+	regVal |= (SPC_REG_RESET_BDMA_CORE | SPC_REG_RESET_OSSP);
+	pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal);
+
+	/* step 10: delay 10 usec */
+	udelay(10);
+
+	/* step 11: reads and sets the GSM Configuration and Reset Register */
+	/* map 0x0700000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, GSM_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("SPC Shift Bar4 to 0x%x failed\n",
+			GSM_ADDR_BASE));
+		return -1;
+	}
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x0 (0x00007b88)-GSM Configuration and "
+		"Reset = 0x%x\n", pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET)));
+	regVal = pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET);
+	/* Put those bits to high */
+	/* GSM XCBI offset = 0x70 0000
+	0x00 Bit 13 COM_SLV_SW_RSTB 1
+	0x00 Bit 12 QSSP_SW_RSTB 1
+	0x00 Bit 11 RAAE_SW_RSTB 1
+	0x00 Bit 9   RB_1_SW_RSTB 1
+	0x00 Bit 8   SM_SW_RSTB 1
+	*/
+	regVal |= (GSM_CONFIG_RESET_VALUE);
+	pm8001_cw32(pm8001_ha, 2, GSM_CONFIG_RESET, regVal);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM (0x00004088 ==> 0x00007b88) - GSM"
+		" Configuration and Reset is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_CONFIG_RESET)));
+
+	/* step 12: Restore GSM - Read Address Parity Check */
+	regVal = pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK);
+	/* just for debugging */
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700038 - Read Address Parity Check Enable"
+		" = 0x%x\n", regVal));
+	pm8001_cw32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK, regVal1);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700038 - Read Address Parity"
+		" Check Enable is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_READ_ADDR_PARITY_CHECK)));
+	/* Restore GSM - Write Address Parity Check */
+	regVal = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK);
+	pm8001_cw32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK, regVal2);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700040 - Write Address Parity Check"
+		" Enable is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_WRITE_ADDR_PARITY_CHECK)));
+	/* Restore GSM - Write Data Parity Check */
+	regVal = pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK);
+	pm8001_cw32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK, regVal3);
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("GSM 0x700048 - Write Data Parity Check Enable"
+		"is set to = 0x%x\n",
+		pm8001_cr32(pm8001_ha, 2, GSM_WRITE_DATA_PARITY_CHECK)));
+
+	/* step 13: bring the IOP and AAP1 out of reset */
+	/* map 0x00000 to BAR4(0x20), BAR2(win) */
+	if (-1 == bar4_shift(pm8001_ha, SPC_TOP_LEVEL_ADDR_BASE)) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Shift Bar4 to 0x%x failed\n",
+			SPC_TOP_LEVEL_ADDR_BASE));
+		return -1;
+	}
+	regVal = pm8001_cr32(pm8001_ha, 2, SPC_REG_RESET);
+	regVal |= (SPC_REG_RESET_PCS_IOP_SS | SPC_REG_RESET_PCS_AAP1_SS);
+	pm8001_cw32(pm8001_ha, 2, SPC_REG_RESET, regVal);
+
+	/* step 14: delay 10 usec - Normal Mode */
+	udelay(10);
+	/* check Soft Reset Normal mode or Soft Reset HDA mode */
+	if (signature == SPC_SOFT_RESET_SIGNATURE) {
+		/* step 15 (Normal Mode): wait until scratch pad1 register
+		bit 2 toggled */
+		max_wait_count = 2 * 1000 * 1000;/* 2 sec */
+		do {
+			udelay(1);
+			regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1) &
+				SCRATCH_PAD1_RST;
+		} while ((regVal != toggleVal) && (--max_wait_count));
+
+		if (!max_wait_count) {
+			regVal = pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_1);
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("TIMEOUT : ToggleVal 0x%x,"
+				"MSGU_SCRATCH_PAD1 = 0x%x\n",
+				toggleVal, regVal));
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD0 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_0)));
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD2 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_2)));
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD3 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_3)));
+			return -1;
+		}
+
+		/* step 16 (Normal) - Clear ODMR and ODCR */
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, ODCR_CLEAR_ALL);
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_CLEAR_ALL);
+
+		/* step 17 (Normal Mode): wait for the FW and IOP to get
+		ready - 1 sec timeout */
+		/* Wait for the SPC Configuration Table to be ready */
+		if (check_fw_ready(pm8001_ha) == -1) {
+			regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+			/* return error if MPI Configuration Table not ready */
+			PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("FW not ready SCRATCH_PAD1"
+				" = 0x%x\n", regVal));
+			regVal = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2);
+			/* return error if MPI Configuration Table not ready */
+			PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("FW not ready SCRATCH_PAD2"
+				" = 0x%x\n", regVal));
+			PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD0 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_0)));
+			PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("SCRATCH_PAD3 value = 0x%x\n",
+				pm8001_cr32(pm8001_ha, 0,
+				MSGU_SCRATCH_PAD_3)));
+			return -1;
+		}
+	}
+
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("SPC soft reset Complete\n"));
+	return 0;
+}
+
+static void pm8001_hw_chip_rst(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 i;
+	u32 regVal;
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("chip reset start\n"));
+
+	/* do SPC chip reset. */
+	regVal = pm8001_cr32(pm8001_ha, 1, SPC_REG_RESET);
+	regVal &= ~(SPC_REG_RESET_DEVICE);
+	pm8001_cw32(pm8001_ha, 1, SPC_REG_RESET, regVal);
+
+	/* delay 10 usec */
+	udelay(10);
+
+	/* bring chip reset out of reset */
+	regVal = pm8001_cr32(pm8001_ha, 1, SPC_REG_RESET);
+	regVal |= SPC_REG_RESET_DEVICE;
+	pm8001_cw32(pm8001_ha, 1, SPC_REG_RESET, regVal);
+
+	/* delay 10 usec */
+	udelay(10);
+
+	/* wait for 20 msec until the firmware gets reloaded */
+	i = 20;
+	do {
+		mdelay(1);
+	} while ((--i) != 0);
+
+	PM8001_INIT_DBG(pm8001_ha,
+		pm8001_printk("chip reset finished\n"));
+}
+
+/**
+ * pm8001_chip_iounmap - which maped when initilized.
+ * @pm8001_ha: our hba card information
+ */
+static void pm8001_chip_iounmap(struct pm8001_hba_info *pm8001_ha)
+{
+	s8 bar, logical = 0;
+	for (bar = 0; bar < 6; bar++) {
+		/*
+		** logical BARs for SPC:
+		** bar 0 and 1 - logical BAR0
+		** bar 2 and 3 - logical BAR1
+		** bar4 - logical BAR2
+		** bar5 - logical BAR3
+		** Skip the appropriate assignments:
+		*/
+		if ((bar == 1) || (bar == 3))
+			continue;
+		if (pm8001_ha->io_mem[logical].memvirtaddr) {
+			iounmap(pm8001_ha->io_mem[logical].memvirtaddr);
+			logical++;
+		}
+	}
+}
+
+/**
+ * pm8001_chip_interrupt_enable - enable PM8001 chip interrupt
+ * @pm8001_ha: our hba card information
+ */
+static void
+pm8001_chip_intx_interrupt_enable(struct pm8001_hba_info *pm8001_ha)
+{
+	pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_CLEAR_ALL);
+	pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, ODCR_CLEAR_ALL);
+}
+
+ /**
+  * pm8001_chip_intx_interrupt_disable- disable PM8001 chip interrupt
+  * @pm8001_ha: our hba card information
+  */
+static void
+pm8001_chip_intx_interrupt_disable(struct pm8001_hba_info *pm8001_ha)
+{
+	pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, ODMR_MASK_ALL);
+}
+
+/**
+ * pm8001_chip_msix_interrupt_enable - enable PM8001 chip interrupt
+ * @pm8001_ha: our hba card information
+ */
+static void
+pm8001_chip_msix_interrupt_enable(struct pm8001_hba_info *pm8001_ha,
+	u32 int_vec_idx)
+{
+	u32 msi_index;
+	u32 value;
+	msi_index = int_vec_idx * MSIX_TABLE_ELEMENT_SIZE;
+	msi_index += MSIX_TABLE_BASE;
+	pm8001_cw32(pm8001_ha, 0, msi_index, MSIX_INTERRUPT_ENABLE);
+	value = (1 << int_vec_idx);
+	pm8001_cw32(pm8001_ha, 0,  MSGU_ODCR, value);
+
+}
+
+/**
+ * pm8001_chip_msix_interrupt_disable - disable PM8001 chip interrupt
+ * @pm8001_ha: our hba card information
+ */
+static void
+pm8001_chip_msix_interrupt_disable(struct pm8001_hba_info *pm8001_ha,
+	u32 int_vec_idx)
+{
+	u32 msi_index;
+	msi_index = int_vec_idx * MSIX_TABLE_ELEMENT_SIZE;
+	msi_index += MSIX_TABLE_BASE;
+	pm8001_cw32(pm8001_ha, 0,  msi_index, MSIX_INTERRUPT_DISABLE);
+
+}
+/**
+ * pm8001_chip_interrupt_enable - enable PM8001 chip interrupt
+ * @pm8001_ha: our hba card information
+ */
+static void
+pm8001_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha)
+{
+#ifdef PM8001_USE_MSIX
+	pm8001_chip_msix_interrupt_enable(pm8001_ha, 0);
+	return;
+#endif
+	pm8001_chip_intx_interrupt_enable(pm8001_ha);
+
+}
+
+/**
+ * pm8001_chip_intx_interrupt_disable- disable PM8001 chip interrupt
+ * @pm8001_ha: our hba card information
+ */
+static void
+pm8001_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha)
+{
+#ifdef PM8001_USE_MSIX
+	pm8001_chip_msix_interrupt_disable(pm8001_ha, 0);
+	return;
+#endif
+	pm8001_chip_intx_interrupt_disable(pm8001_ha);
+
+}
+
+/**
+ * mpi_msg_free_get- get the free message buffer for transfer inbound queue.
+ * @circularQ: the inbound queue  we want to transfer to HBA.
+ * @messageSize: the message size of this transfer, normally it is 64 bytes
+ * @messagePtr: the pointer to message.
+ */
+static u32 mpi_msg_free_get(struct inbound_queue_table *circularQ,
+			    u16 messageSize, void **messagePtr)
+{
+	u32 offset, consumer_index;
+	struct mpi_msg_hdr *msgHeader;
+	u8 bcCount = 1; /* only support single buffer */
+
+	/* Checks is the requested message size can be allocated in this queue*/
+	if (messageSize > 64) {
+		*messagePtr = NULL;
+		return -1;
+	}
+
+	/* Stores the new consumer index */
+	consumer_index = pm8001_read_32(circularQ->ci_virt);
+	circularQ->consumer_index = cpu_to_le32(consumer_index);
+	if (((circularQ->producer_idx + bcCount) % 256) ==
+		circularQ->consumer_index) {
+		*messagePtr = NULL;
+		return -1;
+	}
+	/* get memory IOMB buffer address */
+	offset = circularQ->producer_idx * 64;
+	/* increment to next bcCount element */
+	circularQ->producer_idx = (circularQ->producer_idx + bcCount) % 256;
+	/* Adds that distance to the base of the region virtual address plus
+	the message header size*/
+	msgHeader = (struct mpi_msg_hdr *)(circularQ->base_virt	+ offset);
+	*messagePtr = ((void *)msgHeader) + sizeof(struct mpi_msg_hdr);
+	return 0;
+}
+
+/**
+ * mpi_build_cmd- build the message queue for transfer, update the PI to FW
+ * to tell the fw to get this message from IOMB.
+ * @pm8001_ha: our hba card information
+ * @circularQ: the inbound queue we want to transfer to HBA.
+ * @opCode: the operation code represents commands which LLDD and fw recognized.
+ * @payload: the command payload of each operation command.
+ */
+static u32 mpi_build_cmd(struct pm8001_hba_info *pm8001_ha,
+			 struct inbound_queue_table *circularQ,
+			 u32 opCode, void *payload)
+{
+	u32 Header = 0, hpriority = 0, bc = 1, category = 0x02;
+	u32 responseQueue = 0;
+	void *pMessage;
+
+	if (mpi_msg_free_get(circularQ, 64, &pMessage) < 0) {
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("No free mpi buffer \n"));
+		return -1;
+	}
+
+	/*Copy to the payload*/
+	memcpy(pMessage, payload, (64 - sizeof(struct mpi_msg_hdr)));
+
+	/*Build the header*/
+	Header = ((1 << 31) | (hpriority << 30) | ((bc & 0x1f) << 24)
+		| ((responseQueue & 0x3F) << 16)
+		| ((category & 0xF) << 12) | (opCode & 0xFFF));
+
+	pm8001_write_32((pMessage - 4), 0, cpu_to_le32(Header));
+	/*Update the PI to the firmware*/
+	pm8001_cw32(pm8001_ha, circularQ->pi_pci_bar,
+		circularQ->pi_offset, circularQ->producer_idx);
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("after PI= %d CI= %d \n", circularQ->producer_idx,
+		circularQ->consumer_index));
+	return 0;
+}
+
+static u32 mpi_msg_free_set(struct pm8001_hba_info *pm8001_ha,
+			    struct outbound_queue_table *circularQ, u8 bc)
+{
+	u32 producer_index;
+	/* free the circular queue buffer elements associated with the message*/
+	circularQ->consumer_idx = (circularQ->consumer_idx + bc) % 256;
+	/* update the CI of outbound queue */
+	pm8001_cw32(pm8001_ha, circularQ->ci_pci_bar, circularQ->ci_offset,
+		circularQ->consumer_idx);
+	/* Update the producer index from SPC*/
+	producer_index = pm8001_read_32(circularQ->pi_virt);
+	circularQ->producer_index = cpu_to_le32(producer_index);
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk(" CI=%d PI=%d\n", circularQ->consumer_idx,
+		circularQ->producer_index));
+	return 0;
+}
+
+/**
+ * mpi_msg_consume- get the MPI message from  outbound queue message table.
+ * @pm8001_ha: our hba card information
+ * @circularQ: the outbound queue  table.
+ * @messagePtr1: the message contents of this outbound message.
+ * @pBC: the message size.
+ */
+static u32 mpi_msg_consume(struct pm8001_hba_info *pm8001_ha,
+			   struct outbound_queue_table *circularQ,
+			   void **messagePtr1, u8 *pBC)
+{
+	struct mpi_msg_hdr	*msgHeader;
+	__le32	msgHeader_tmp;
+	u32 header_tmp;
+	do {
+		/* If there are not-yet-delivered messages ... */
+		if (circularQ->producer_index != circularQ->consumer_idx) {
+			PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk("process an IOMB\n"));
+			/*Get the pointer to the circular queue buffer element*/
+			msgHeader = (struct mpi_msg_hdr *)
+				(circularQ->base_virt +
+				circularQ->consumer_idx * 64);
+			/* read header */
+			header_tmp = pm8001_read_32(msgHeader);
+			msgHeader_tmp = cpu_to_le32(header_tmp);
+			if (0 != (msgHeader_tmp & 0x80000000)) {
+				if (OPC_OUB_SKIP_ENTRY !=
+					(msgHeader_tmp & 0xfff)) {
+					*messagePtr1 =
+						((u8 *)msgHeader) +
+						sizeof(struct mpi_msg_hdr);
+					*pBC = (u8)((msgHeader_tmp >> 24) &
+						0x1f);
+					PM8001_IO_DBG(pm8001_ha,
+						pm8001_printk("mpi_msg_consume"
+						": CI=%d PI=%d msgHeader=%x\n",
+						circularQ->consumer_idx,
+						circularQ->producer_index,
+						msgHeader_tmp));
+					return MPI_IO_STATUS_SUCCESS;
+				} else {
+					u32 producer_index;
+					void *pi_virt = circularQ->pi_virt;
+					/* free the circular queue buffer
+					elements associated with the message*/
+					circularQ->consumer_idx =
+						(circularQ->consumer_idx +
+						((msgHeader_tmp >> 24) & 0x1f))
+						% 256;
+					/* update the CI of outbound queue */
+					pm8001_cw32(pm8001_ha,
+						circularQ->ci_pci_bar,
+						circularQ->ci_offset,
+						circularQ->consumer_idx);
+					/* Update the producer index from SPC */
+					producer_index =
+						pm8001_read_32(pi_virt);
+					circularQ->producer_index =
+						cpu_to_le32(producer_index);
+				}
+			} else
+				return MPI_IO_STATUS_FAIL;
+		}
+	} while (circularQ->producer_index != circularQ->consumer_idx);
+	/* while we don't have any more not-yet-delivered message */
+	/* report empty */
+	return MPI_IO_STATUS_BUSY;
+}
+
+static void pm8001_work_queue(struct work_struct *work)
+{
+	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct pm8001_wq *wq = container_of(dw, struct pm8001_wq, work_q);
+	struct pm8001_device *pm8001_dev;
+	struct domain_device	*dev;
+
+	switch (wq->handler) {
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		pm8001_dev = wq->data;
+		dev = pm8001_dev->sas_device;
+		pm8001_I_T_nexus_reset(dev);
+		break;
+	case IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
+		pm8001_dev = wq->data;
+		dev = pm8001_dev->sas_device;
+		pm8001_I_T_nexus_reset(dev);
+		break;
+	case IO_DS_IN_ERROR:
+		pm8001_dev = wq->data;
+		dev = pm8001_dev->sas_device;
+		pm8001_I_T_nexus_reset(dev);
+		break;
+	case IO_DS_NON_OPERATIONAL:
+		pm8001_dev = wq->data;
+		dev = pm8001_dev->sas_device;
+		pm8001_I_T_nexus_reset(dev);
+		break;
+	}
+	list_del(&wq->entry);
+	kfree(wq);
+}
+
+static int pm8001_handle_event(struct pm8001_hba_info *pm8001_ha, void *data,
+			       int handler)
+{
+	struct pm8001_wq *wq;
+	int ret = 0;
+
+	wq = kmalloc(sizeof(struct pm8001_wq), GFP_ATOMIC);
+	if (wq) {
+		wq->pm8001_ha = pm8001_ha;
+		wq->data = data;
+		wq->handler = handler;
+		INIT_DELAYED_WORK(&wq->work_q, pm8001_work_queue);
+		list_add_tail(&wq->entry, &pm8001_ha->wq_list);
+		schedule_delayed_work(&wq->work_q, 0);
+	} else
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+/**
+ * mpi_ssp_completion- process the event that FW response to the SSP request.
+ * @pm8001_ha: our hba card information
+ * @piomb: the message contents of this outbound message.
+ *
+ * When FW has completed a ssp request for example a IO request, after it has
+ * filled the SG data with the data, it will trigger this event represent
+ * that he has finished the job,please check the coresponding buffer.
+ * So we will tell the caller who maybe waiting the result to tell upper layer
+ * that the task has been finished.
+ */
+static int
+mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha , void *piomb)
+{
+	struct sas_task *t;
+	struct pm8001_ccb_info *ccb;
+	unsigned long flags;
+	u32 status;
+	u32 param;
+	u32 tag;
+	struct ssp_completion_resp *psspPayload;
+	struct task_status_struct *ts;
+	struct ssp_response_iu *iu;
+	struct pm8001_device *pm8001_dev;
+	psspPayload = (struct ssp_completion_resp *)(piomb + 4);
+	status = le32_to_cpu(psspPayload->status);
+	tag = le32_to_cpu(psspPayload->tag);
+	ccb = &pm8001_ha->ccb_info[tag];
+	pm8001_dev = ccb->device;
+	param = le32_to_cpu(psspPayload->param);
+
+	PM8001_IO_DBG(pm8001_ha, pm8001_printk("OPC_OUB_SSP_COMP\n"));
+	t = ccb->task;
+
+	if (status)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("sas IO status 0x%x\n", status));
+	if (unlikely(!t || !t->lldd_task || !t->dev))
+		return -1;
+	ts = &t->task_status;
+	switch (status) {
+	case IO_SUCCESS:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS"
+			",param = %d \n", param));
+		if (param == 0) {
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAM_GOOD;
+		} else {
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAS_PROTO_RESPONSE;
+			ts->residual = param;
+			iu = &psspPayload->ssp_resp_iu;
+			sas_ssp_task_response(pm8001_ha->dev, t, iu);
+		}
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_ABORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_ABORTED IOMB Tag \n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		break;
+	case IO_UNDERFLOW:
+		/* SSP Completion with error */
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW"
+			",param = %d \n", param));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		ts->residual = param;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_NO_DEVICE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_NO_DEVICE\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case IO_XFER_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_XFER_ERROR_PHY_NOT_READY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_EPROTO;
+		break;
+	case IO_OPEN_CNX_ERROR_ZONE_VIOLATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_CONT0;
+		break;
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		if (!t->uldd_task)
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+		break;
+	case IO_OPEN_CNX_ERROR_BAD_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_BAD_DEST;
+		break;
+	case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_"
+			"NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_CONN_RATE;
+		break;
+	case IO_OPEN_CNX_ERROR_WRONG_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_WRONG_DEST;
+		break;
+	case IO_XFER_ERROR_NAK_RECEIVED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_XFER_ERROR_ACK_NAK_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_ERROR_DMA:
+		PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("IO_XFER_ERROR_DMA\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_XFER_OPEN_RETRY_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_XFER_ERROR_OFFSET_MISMATCH:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_PORT_IN_RESET:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_PORT_IN_RESET\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_DS_NON_OPERATIONAL:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_NON_OPERATIONAL\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		if (!t->uldd_task)
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_DS_NON_OPERATIONAL);
+		break;
+	case IO_DS_IN_RECOVERY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_IN_RECOVERY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_TM_TAG_NOT_FOUND:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_TM_TAG_NOT_FOUND\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_SSP_EXT_IU_ZERO_LEN_ERROR:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_SSP_EXT_IU_ZERO_LEN_ERROR\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+	default:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("Unknown status 0x%x\n", status));
+		/* not allowed case. Therefore, return failed status */
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	}
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("scsi_satus = %x \n ",
+		psspPayload->ssp_resp_iu.status));
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with"
+			" io_status 0x%x resp 0x%x "
+			"stat 0x%x but aborted by upper layer!\n",
+			t, status, ts->resp, ts->stat));
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+	} else {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+		mb();/* in order to force CPU ordering */
+		t->task_done(t);
+	}
+	return 0;
+}
+
+/*See the comments for mpi_ssp_completion */
+static int mpi_ssp_event(struct pm8001_hba_info *pm8001_ha , void *piomb)
+{
+	struct sas_task *t;
+	unsigned long flags;
+	struct task_status_struct *ts;
+	struct pm8001_ccb_info *ccb;
+	struct pm8001_device *pm8001_dev;
+	struct ssp_event_resp *psspPayload =
+		(struct ssp_event_resp *)(piomb + 4);
+	u32 event = le32_to_cpu(psspPayload->event);
+	u32 tag = le32_to_cpu(psspPayload->tag);
+	u32 port_id = le32_to_cpu(psspPayload->port_id);
+	u32 dev_id = le32_to_cpu(psspPayload->device_id);
+
+	ccb = &pm8001_ha->ccb_info[tag];
+	t = ccb->task;
+	pm8001_dev = ccb->device;
+	if (event)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("sas IO status 0x%x\n", event));
+	if (unlikely(!t || !t->lldd_task || !t->dev))
+		return -1;
+	ts = &t->task_status;
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("port_id = %x,device_id = %x\n",
+		port_id, dev_id));
+	switch (event) {
+	case IO_OVERFLOW:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n");)
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		ts->residual = 0;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_XFER_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_INTERRUPTED;
+		break;
+	case IO_XFER_ERROR_PHY_NOT_READY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT"
+			"_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_EPROTO;
+		break;
+	case IO_OPEN_CNX_ERROR_ZONE_VIOLATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_CONT0;
+		break;
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		if (!t->uldd_task)
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+		break;
+	case IO_OPEN_CNX_ERROR_BAD_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_BAD_DEST;
+		break;
+	case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_"
+			"NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_CONN_RATE;
+		break;
+	case IO_OPEN_CNX_ERROR_WRONG_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_WRONG_DEST;
+		break;
+	case IO_XFER_ERROR_NAK_RECEIVED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		break;
+	case IO_XFER_ERROR_ACK_NAK_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_OPEN_RETRY_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_XFER_ERROR_UNEXPECTED_PHASE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_UNEXPECTED_PHASE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_ERROR_XFER_RDY_OVERRUN:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_XFER_RDY_OVERRUN\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_ERROR_OFFSET_MISMATCH:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_ERROR_XFER_ZERO_DATA_LEN:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_XFER_ZERO_DATA_LEN\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	case IO_XFER_CMD_FRAME_ISSUED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("  IO_XFER_CMD_FRAME_ISSUED\n"));
+		return 0;
+	default:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("Unknown status 0x%x\n", event));
+		/* not allowed case. Therefore, return failed status */
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		break;
+	}
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with"
+			" event 0x%x resp 0x%x "
+			"stat 0x%x but aborted by upper layer!\n",
+			t, event, ts->resp, ts->stat));
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+	} else {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+		mb();/* in order to force CPU ordering */
+		t->task_done(t);
+	}
+	return 0;
+}
+
+/*See the comments for mpi_ssp_completion */
+static int
+mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct sas_task *t;
+	struct pm8001_ccb_info *ccb;
+	unsigned long flags;
+	u32 param;
+	u32 status;
+	u32 tag;
+	struct sata_completion_resp *psataPayload;
+	struct task_status_struct *ts;
+	struct ata_task_resp *resp ;
+	u32 *sata_resp;
+	struct pm8001_device *pm8001_dev;
+
+	psataPayload = (struct sata_completion_resp *)(piomb + 4);
+	status = le32_to_cpu(psataPayload->status);
+	tag = le32_to_cpu(psataPayload->tag);
+
+	ccb = &pm8001_ha->ccb_info[tag];
+	param = le32_to_cpu(psataPayload->param);
+	t = ccb->task;
+	ts = &t->task_status;
+	pm8001_dev = ccb->device;
+	if (status)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("sata IO status 0x%x\n", status));
+	if (unlikely(!t || !t->lldd_task || !t->dev))
+		return -1;
+
+	switch (status) {
+	case IO_SUCCESS:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
+		if (param == 0) {
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAM_GOOD;
+		} else {
+			u8 len;
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAS_PROTO_RESPONSE;
+			ts->residual = param;
+			PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk("SAS_PROTO_RESPONSE len = %d\n",
+				param));
+			sata_resp = &psataPayload->sata_resp[0];
+			resp = (struct ata_task_resp *)ts->buf;
+			if (t->ata_task.dma_xfer == 0 &&
+			t->data_dir == PCI_DMA_FROMDEVICE) {
+				len = sizeof(struct pio_setup_fis);
+				PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk("PIO read len = %d\n", len));
+			} else if (t->ata_task.use_ncq) {
+				len = sizeof(struct set_dev_bits_fis);
+				PM8001_IO_DBG(pm8001_ha,
+					pm8001_printk("FPDMA len = %d\n", len));
+			} else {
+				len = sizeof(struct dev_to_host_fis);
+				PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk("other len = %d\n", len));
+			}
+			if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) {
+				resp->frame_len = len;
+				memcpy(&resp->ending_fis[0], sata_resp, len);
+				ts->buf_valid_size = sizeof(*resp);
+			} else
+				PM8001_IO_DBG(pm8001_ha,
+					pm8001_printk("response to large \n"));
+		}
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_ABORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_ABORTED IOMB Tag \n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+		/* following cases are to do cases */
+	case IO_UNDERFLOW:
+		/* SATA Completion with error */
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_UNDERFLOW param = %d\n", param));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		ts->residual =  param;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_NO_DEVICE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_NO_DEVICE\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case IO_XFER_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_INTERRUPTED;
+		break;
+	case IO_XFER_ERROR_PHY_NOT_READY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT"
+			"_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_EPROTO;
+		break;
+	case IO_OPEN_CNX_ERROR_ZONE_VIOLATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_CONT0;
+		break;
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/*in order to force CPU ordering*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_OPEN_CNX_ERROR_BAD_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_BAD_DEST;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/*ditto*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_"
+			"NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_CONN_RATE;
+		break;
+	case IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_STP_RESOURCES"
+			"_BUSY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/* ditto*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_OPEN_CNX_ERROR_WRONG_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_WRONG_DEST;
+		break;
+	case IO_XFER_ERROR_NAK_RECEIVED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_ERROR_ACK_NAK_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_ACK_NAK_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_ERROR_DMA:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_DMA\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		break;
+	case IO_XFER_ERROR_SATA_LINK_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_SATA_LINK_TIMEOUT\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case IO_XFER_ERROR_REJECTED_NCQ_MODE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_REJECTED_NCQ_MODE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		break;
+	case IO_XFER_OPEN_RETRY_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_PORT_IN_RESET:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_PORT_IN_RESET\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case IO_DS_NON_OPERATIONAL:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_NON_OPERATIONAL\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha, pm8001_dev,
+				    IO_DS_NON_OPERATIONAL);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/*ditto*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_DS_IN_RECOVERY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("  IO_DS_IN_RECOVERY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case IO_DS_IN_ERROR:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_IN_ERROR\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha, pm8001_dev,
+				    IO_DS_IN_ERROR);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/*ditto*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+	default:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("Unknown status 0x%x\n", status));
+		/* not allowed case. Therefore, return failed status */
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	}
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("task 0x%p done with io_status 0x%x"
+			" resp 0x%x stat 0x%x but aborted by upper layer!\n",
+			t, status, ts->resp, ts->stat));
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+	} else {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+		mb();/* ditto */
+		t->task_done(t);
+	}
+	return 0;
+}
+
+/*See the comments for mpi_ssp_completion */
+static int mpi_sata_event(struct pm8001_hba_info *pm8001_ha , void *piomb)
+{
+	struct sas_task *t;
+	unsigned long flags;
+	struct task_status_struct *ts;
+	struct pm8001_ccb_info *ccb;
+	struct pm8001_device *pm8001_dev;
+	struct sata_event_resp *psataPayload =
+		(struct sata_event_resp *)(piomb + 4);
+	u32 event = le32_to_cpu(psataPayload->event);
+	u32 tag = le32_to_cpu(psataPayload->tag);
+	u32 port_id = le32_to_cpu(psataPayload->port_id);
+	u32 dev_id = le32_to_cpu(psataPayload->device_id);
+
+	ccb = &pm8001_ha->ccb_info[tag];
+	t = ccb->task;
+	pm8001_dev = ccb->device;
+	if (event)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("sata IO status 0x%x\n", event));
+	if (unlikely(!t || !t->lldd_task || !t->dev))
+		return -1;
+	ts = &t->task_status;
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("port_id = %x,device_id = %x\n",
+		port_id, dev_id));
+	switch (event) {
+	case IO_OVERFLOW:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		ts->residual = 0;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_XFER_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_INTERRUPTED;
+		break;
+	case IO_XFER_ERROR_PHY_NOT_READY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT"
+			"_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_EPROTO;
+		break;
+	case IO_OPEN_CNX_ERROR_ZONE_VIOLATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_CONT0;
+		break;
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		if (!t->uldd_task) {
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAS_QUEUE_FULL;
+			pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+			mb();/*ditto*/
+			t->task_done(t);
+			return 0;
+		}
+		break;
+	case IO_OPEN_CNX_ERROR_BAD_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n"));
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_BAD_DEST;
+		break;
+	case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_"
+			"NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_CONN_RATE;
+		break;
+	case IO_OPEN_CNX_ERROR_WRONG_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_WRONG_DEST;
+		break;
+	case IO_XFER_ERROR_NAK_RECEIVED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_NAK_RECEIVED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_ERROR_PEER_ABORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PEER_ABORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case IO_XFER_ERROR_REJECTED_NCQ_MODE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_REJECTED_NCQ_MODE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		break;
+	case IO_XFER_OPEN_RETRY_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_ERROR_UNEXPECTED_PHASE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_UNEXPECTED_PHASE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_ERROR_XFER_RDY_OVERRUN:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_XFER_RDY_OVERRUN\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_ERROR_OFFSET_MISMATCH:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_OFFSET_MISMATCH\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_ERROR_XFER_ZERO_DATA_LEN:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_XFER_ZERO_DATA_LEN\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case IO_XFER_CMD_FRAME_ISSUED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_CMD_FRAME_ISSUED\n"));
+		break;
+	case IO_XFER_PIO_SETUP_ERROR:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_PIO_SETUP_ERROR\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	default:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("Unknown status 0x%x\n", event));
+		/* not allowed case. Therefore, return failed status */
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	}
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("task 0x%p done with io_status 0x%x"
+			" resp 0x%x stat 0x%x but aborted by upper layer!\n",
+			t, event, ts->resp, ts->stat));
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+	} else {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+		mb();/* in order to force CPU ordering */
+		t->task_done(t);
+	}
+	return 0;
+}
+
+/*See the comments for mpi_ssp_completion */
+static int
+mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	u32 param;
+	struct sas_task *t;
+	struct pm8001_ccb_info *ccb;
+	unsigned long flags;
+	u32 status;
+	u32 tag;
+	struct smp_completion_resp *psmpPayload;
+	struct task_status_struct *ts;
+	struct pm8001_device *pm8001_dev;
+
+	psmpPayload = (struct smp_completion_resp *)(piomb + 4);
+	status = le32_to_cpu(psmpPayload->status);
+	tag = le32_to_cpu(psmpPayload->tag);
+
+	ccb = &pm8001_ha->ccb_info[tag];
+	param = le32_to_cpu(psmpPayload->param);
+	t = ccb->task;
+	ts = &t->task_status;
+	pm8001_dev = ccb->device;
+	if (status)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("smp IO status 0x%x\n", status));
+	if (unlikely(!t || !t->lldd_task || !t->dev))
+		return -1;
+
+	switch (status) {
+	case IO_SUCCESS:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_GOOD;
+	if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_ABORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_ABORTED IOMB\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_OVERFLOW:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_UNDERFLOW\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		ts->residual = 0;
+		if (pm8001_dev)
+			pm8001_dev->running_req--;
+		break;
+	case IO_NO_DEVICE:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_NO_DEVICE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case IO_ERROR_HW_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_ERROR_HW_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_BUSY;
+		break;
+	case IO_XFER_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_BUSY;
+		break;
+	case IO_XFER_ERROR_PHY_NOT_READY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_PHY_NOT_READY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_BUSY;
+		break;
+	case IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk("IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_ZONE_VIOLATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_ZONE_VIOLATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case IO_OPEN_CNX_ERROR_BREAK:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BREAK\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_CONT0;
+		break;
+	case IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+		break;
+	case IO_OPEN_CNX_ERROR_BAD_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_BAD_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_BAD_DEST;
+		break;
+	case IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_CONNECTION_RATE_"
+			"NOT_SUPPORTED\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_CONN_RATE;
+		break;
+	case IO_OPEN_CNX_ERROR_WRONG_DESTINATION:
+		PM8001_IO_DBG(pm8001_ha,
+		       pm8001_printk("IO_OPEN_CNX_ERROR_WRONG_DESTINATION\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_WRONG_DEST;
+		break;
+	case IO_XFER_ERROR_RX_FRAME:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_ERROR_RX_FRAME\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case IO_XFER_OPEN_RETRY_TIMEOUT:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_XFER_OPEN_RETRY_TIMEOUT\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_ERROR_INTERNAL_SMP_RESOURCE:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_ERROR_INTERNAL_SMP_RESOURCE\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_QUEUE_FULL;
+		break;
+	case IO_PORT_IN_RESET:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_PORT_IN_RESET\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_DS_NON_OPERATIONAL:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_NON_OPERATIONAL\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case IO_DS_IN_RECOVERY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_DS_IN_RECOVERY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	case IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_OPEN_REJECT;
+		ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+		break;
+	default:
+		PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk("Unknown status 0x%x\n", status));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		/* not allowed case. Therefore, return failed status */
+		break;
+	}
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk("task 0x%p done with"
+			" io_status 0x%x resp 0x%x "
+			"stat 0x%x but aborted by upper layer!\n",
+			t, status, ts->resp, ts->stat));
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+	} else {
+		spin_unlock_irqrestore(&t->task_state_lock, flags);
+		pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+		mb();/* in order to force CPU ordering */
+		t->task_done(t);
+	}
+	return 0;
+}
+
+static void
+mpi_set_dev_state_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct set_dev_state_resp *pPayload =
+		(struct set_dev_state_resp *)(piomb + 4);
+	u32 tag = le32_to_cpu(pPayload->tag);
+	struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag];
+	struct pm8001_device *pm8001_dev = ccb->device;
+	u32 status = le32_to_cpu(pPayload->status);
+	u32 device_id = le32_to_cpu(pPayload->device_id);
+	u8 pds = le32_to_cpu(pPayload->pds_nds) | PDS_BITS;
+	u8 nds = le32_to_cpu(pPayload->pds_nds) | NDS_BITS;
+	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Set device id = 0x%x state "
+		"from 0x%x to 0x%x status = 0x%x!\n",
+		device_id, pds, nds, status));
+	complete(pm8001_dev->setds_completion);
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, tag);
+}
+
+static void
+mpi_set_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct get_nvm_data_resp *pPayload =
+		(struct get_nvm_data_resp *)(piomb + 4);
+	u32 tag = le32_to_cpu(pPayload->tag);
+	struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag];
+	u32 dlen_status = le32_to_cpu(pPayload->dlen_status);
+	complete(pm8001_ha->nvmd_completion);
+	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Set nvm data complete!\n"));
+	if ((dlen_status & NVMD_STAT) != 0) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Set nvm data error!\n"));
+		return;
+	}
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, tag);
+}
+
+static void
+mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct fw_control_ex	*fw_control_context;
+	struct get_nvm_data_resp *pPayload =
+		(struct get_nvm_data_resp *)(piomb + 4);
+	u32 tag = le32_to_cpu(pPayload->tag);
+	struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag];
+	u32 dlen_status = le32_to_cpu(pPayload->dlen_status);
+	u32 ir_tds_bn_dps_das_nvm =
+		le32_to_cpu(pPayload->ir_tda_bn_dps_das_nvm);
+	void *virt_addr = pm8001_ha->memoryMap.region[NVMD].virt_ptr;
+	fw_control_context = ccb->fw_control_context;
+
+	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Get nvm data complete!\n"));
+	if ((dlen_status & NVMD_STAT) != 0) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Get nvm data error!\n"));
+		complete(pm8001_ha->nvmd_completion);
+		return;
+	}
+
+	if (ir_tds_bn_dps_das_nvm & IPMode) {
+		/* indirect mode - IR bit set */
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("Get NVMD success, IR=1\n"));
+		if ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == TWI_DEVICE) {
+			if (ir_tds_bn_dps_das_nvm == 0x80a80200) {
+				memcpy(pm8001_ha->sas_addr,
+				      ((u8 *)virt_addr + 4),
+				       SAS_ADDR_SIZE);
+				PM8001_MSG_DBG(pm8001_ha,
+					pm8001_printk("Get SAS address"
+					" from VPD successfully!\n"));
+			}
+		} else if (((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == C_SEEPROM)
+			|| ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == VPD_FLASH) ||
+			((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == EXPAN_ROM)) {
+				;
+		} else if (((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == AAP1_RDUMP)
+			|| ((ir_tds_bn_dps_das_nvm & NVMD_TYPE) == IOP_RDUMP)) {
+			;
+		} else {
+			/* Should not be happened*/
+			PM8001_MSG_DBG(pm8001_ha,
+				pm8001_printk("(IR=1)Wrong Device type 0x%x\n",
+				ir_tds_bn_dps_das_nvm));
+		}
+	} else /* direct mode */{
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("Get NVMD success, IR=0, dataLen=%d\n",
+			(dlen_status & NVMD_LEN) >> 24));
+	}
+	memcpy((void *)(fw_control_context->usrAddr),
+		(void *)(pm8001_ha->memoryMap.region[NVMD].virt_ptr),
+		fw_control_context->len);
+	complete(pm8001_ha->nvmd_completion);
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, tag);
+}
+
+static int mpi_local_phy_ctl(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct local_phy_ctl_resp *pPayload =
+		(struct local_phy_ctl_resp *)(piomb + 4);
+	u32 status = le32_to_cpu(pPayload->status);
+	u32 phy_id = le32_to_cpu(pPayload->phyop_phyid) & ID_BITS;
+	u32 phy_op = le32_to_cpu(pPayload->phyop_phyid) & OP_BITS;
+	if (status != 0) {
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("%x phy execute %x phy op failed! \n",
+			phy_id, phy_op));
+	} else
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("%x phy execute %x phy op success! \n",
+			phy_id, phy_op));
+	return 0;
+}
+
+/**
+ * pm8001_bytes_dmaed - one of the interface function communication with libsas
+ * @pm8001_ha: our hba card information
+ * @i: which phy that received the event.
+ *
+ * when HBA driver received the identify done event or initiate FIS received
+ * event(for SATA), it will invoke this function to notify the sas layer that
+ * the sas toplogy has formed, please discover the the whole sas domain,
+ * while receive a broadcast(change) primitive just tell the sas
+ * layer to discover the changed domain rather than the whole domain.
+ */
+static void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i)
+{
+	struct pm8001_phy *phy = &pm8001_ha->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	struct sas_ha_struct *sas_ha;
+	if (!phy->phy_attached)
+		return;
+
+	sas_ha = pm8001_ha->sas;
+	if (sas_phy->phy) {
+		struct sas_phy *sphy = sas_phy->phy;
+		sphy->negotiated_linkrate = sas_phy->linkrate;
+		sphy->minimum_linkrate = phy->minimum_linkrate;
+		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		sphy->maximum_linkrate = phy->maximum_linkrate;
+		sphy->maximum_linkrate_hw = phy->maximum_linkrate;
+	}
+
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		struct sas_identify_frame *id;
+		id = (struct sas_identify_frame *)phy->frame_rcvd;
+		id->dev_type = phy->identify.device_type;
+		id->initiator_bits = SAS_PROTOCOL_ALL;
+		id->target_bits = phy->identify.target_port_protocols;
+	} else if (phy->phy_type & PORT_TYPE_SATA) {
+		/*Nothing*/
+	}
+	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("phy %d byte dmaded.\n", i));
+
+	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
+	pm8001_ha->sas->notify_port_event(sas_phy, PORTE_BYTES_DMAED);
+}
+
+/* Get the link rate speed  */
+static void get_lrate_mode(struct pm8001_phy *phy, u8 link_rate)
+{
+	struct sas_phy *sas_phy = phy->sas_phy.phy;
+
+	switch (link_rate) {
+	case PHY_SPEED_60:
+		phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
+		break;
+	case PHY_SPEED_30:
+		phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	case PHY_SPEED_15:
+		phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	}
+	sas_phy->negotiated_linkrate = phy->sas_phy.linkrate;
+	sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_6_0_GBPS;
+	sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+	sas_phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
+	sas_phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+}
+
+/**
+ * asd_get_attached_sas_addr -- extract/generate attached SAS address
+ * @phy: pointer to asd_phy
+ * @sas_addr: pointer to buffer where the SAS address is to be written
+ *
+ * This function extracts the SAS address from an IDENTIFY frame
+ * received.  If OOB is SATA, then a SAS address is generated from the
+ * HA tables.
+ *
+ * LOCKING: the frame_rcvd_lock needs to be held since this parses the frame
+ * buffer.
+ */
+static void pm8001_get_attached_sas_addr(struct pm8001_phy *phy,
+	u8 *sas_addr)
+{
+	if (phy->sas_phy.frame_rcvd[0] == 0x34
+		&& phy->sas_phy.oob_mode == SATA_OOB_MODE) {
+		struct pm8001_hba_info *pm8001_ha = phy->sas_phy.ha->lldd_ha;
+		/* FIS device-to-host */
+		u64 addr = be64_to_cpu(*(__be64 *)pm8001_ha->sas_addr);
+		addr += phy->sas_phy.id;
+		*(__be64 *)sas_addr = cpu_to_be64(addr);
+	} else {
+		struct sas_identify_frame *idframe =
+			(void *) phy->sas_phy.frame_rcvd;
+		memcpy(sas_addr, idframe->sas_addr, SAS_ADDR_SIZE);
+	}
+}
+
+/**
+ * pm8001_hw_event_ack_req- For PM8001,some events need to acknowage to FW.
+ * @pm8001_ha: our hba card information
+ * @Qnum: the outbound queue message number.
+ * @SEA: source of event to ack
+ * @port_id: port id.
+ * @phyId: phy id.
+ * @param0: parameter 0.
+ * @param1: parameter 1.
+ */
+static void pm8001_hw_event_ack_req(struct pm8001_hba_info *pm8001_ha,
+	u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0, u32 param1)
+{
+	struct hw_event_ack_req	 payload;
+	u32 opc = OPC_INB_SAS_HW_EVENT_ACK;
+
+	struct inbound_queue_table *circularQ;
+
+	memset((u8 *)&payload, 0, sizeof(payload));
+	circularQ = &pm8001_ha->inbnd_q_tbl[Qnum];
+	payload.tag = 1;
+	payload.sea_phyid_portid = cpu_to_le32(((SEA & 0xFFFF) << 8) |
+		((phyId & 0x0F) << 4) | (port_id & 0x0F));
+	payload.param0 = cpu_to_le32(param0);
+	payload.param1 = cpu_to_le32(param1);
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+}
+
+static int pm8001_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha,
+	u32 phyId, u32 phy_op);
+
+/**
+ * hw_event_sas_phy_up -FW tells me a SAS phy up event.
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+static void
+hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct hw_event_resp *pPayload =
+		(struct hw_event_resp *)(piomb + 4);
+	u32 lr_evt_status_phyid_portid =
+		le32_to_cpu(pPayload->lr_evt_status_phyid_portid);
+	u8 link_rate =
+		(u8)((lr_evt_status_phyid_portid & 0xF0000000) >> 28);
+	u8 phy_id =
+		(u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4);
+	struct sas_ha_struct *sas_ha = pm8001_ha->sas;
+	struct pm8001_phy *phy = &pm8001_ha->phy[phy_id];
+	unsigned long flags;
+	u8 deviceType = pPayload->sas_identify.dev_type;
+
+	PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk("HW_EVENT_SAS_PHY_UP \n"));
+
+	switch (deviceType) {
+	case SAS_PHY_UNUSED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("device type no device.\n"));
+		break;
+	case SAS_END_DEVICE:
+		PM8001_MSG_DBG(pm8001_ha, pm8001_printk("end device.\n"));
+		pm8001_chip_phy_ctl_req(pm8001_ha, phy_id,
+			PHY_NOTIFY_ENABLE_SPINUP);
+		get_lrate_mode(phy, link_rate);
+		break;
+	case SAS_EDGE_EXPANDER_DEVICE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("expander device.\n"));
+		get_lrate_mode(phy, link_rate);
+		break;
+	case SAS_FANOUT_EXPANDER_DEVICE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("fanout expander device.\n"));
+		get_lrate_mode(phy, link_rate);
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("unkown device type(%x)\n", deviceType));
+		break;
+	}
+	phy->phy_type |= PORT_TYPE_SAS;
+	phy->identify.device_type = deviceType;
+	phy->phy_attached = 1;
+	if (phy->identify.device_type == SAS_END_DEV)
+		phy->identify.target_port_protocols = SAS_PROTOCOL_SSP;
+	else if (phy->identify.device_type != NO_DEVICE)
+		phy->identify.target_port_protocols = SAS_PROTOCOL_SMP;
+	phy->sas_phy.oob_mode = SAS_OOB_MODE;
+	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	memcpy(phy->frame_rcvd, &pPayload->sas_identify,
+		sizeof(struct sas_identify_frame)-4);
+	phy->frame_rcvd_size = sizeof(struct sas_identify_frame) - 4;
+	pm8001_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+	if (pm8001_ha->flags == PM8001F_RUN_TIME)
+		mdelay(200);/*delay a moment to wait disk to spinup*/
+	pm8001_bytes_dmaed(pm8001_ha, phy_id);
+}
+
+/**
+ * hw_event_sata_phy_up -FW tells me a SATA phy up event.
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+static void
+hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct hw_event_resp *pPayload =
+		(struct hw_event_resp *)(piomb + 4);
+	u32 lr_evt_status_phyid_portid =
+		le32_to_cpu(pPayload->lr_evt_status_phyid_portid);
+	u8 link_rate =
+		(u8)((lr_evt_status_phyid_portid & 0xF0000000) >> 28);
+	u8 phy_id =
+		(u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4);
+	struct sas_ha_struct *sas_ha = pm8001_ha->sas;
+	struct pm8001_phy *phy = &pm8001_ha->phy[phy_id];
+	unsigned long flags;
+	get_lrate_mode(phy, link_rate);
+	phy->phy_type |= PORT_TYPE_SATA;
+	phy->phy_attached = 1;
+	phy->sas_phy.oob_mode = SATA_OOB_MODE;
+	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4),
+		sizeof(struct dev_to_host_fis));
+	phy->frame_rcvd_size = sizeof(struct dev_to_host_fis);
+	phy->identify.target_port_protocols = SAS_PROTOCOL_SATA;
+	phy->identify.device_type = SATA_DEV;
+	pm8001_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+	pm8001_bytes_dmaed(pm8001_ha, phy_id);
+}
+
+/**
+ * hw_event_phy_down -we should notify the libsas the phy is down.
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+static void
+hw_event_phy_down(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct hw_event_resp *pPayload =
+		(struct hw_event_resp *)(piomb + 4);
+	u32 lr_evt_status_phyid_portid =
+		le32_to_cpu(pPayload->lr_evt_status_phyid_portid);
+	u8 port_id = (u8)(lr_evt_status_phyid_portid & 0x0000000F);
+	u8 phy_id =
+		(u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4);
+	u32 npip_portstate = le32_to_cpu(pPayload->npip_portstate);
+	u8 portstate = (u8)(npip_portstate & 0x0000000F);
+
+	switch (portstate) {
+	case PORT_VALID:
+		break;
+	case PORT_INVALID:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" PortInvalid portID %d \n", port_id));
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" Last phy Down and port invalid\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
+			port_id, phy_id, 0, 0);
+		break;
+	case PORT_IN_RESET:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" PortInReset portID %d \n", port_id));
+		break;
+	case PORT_NOT_ESTABLISHED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" phy Down and PORT_NOT_ESTABLISHED\n"));
+		break;
+	case PORT_LOSTCOMM:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" phy Down and PORT_LOSTCOMM\n"));
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" Last phy Down and port invalid\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
+			port_id, phy_id, 0, 0);
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" phy Down and(default) = %x\n",
+			portstate));
+		break;
+
+	}
+}
+
+/**
+ * mpi_reg_resp -process register device ID response.
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ *
+ * when sas layer find a device it will notify LLDD, then the driver register
+ * the domain device to FW, this event is the return device ID which the FW
+ * has assigned, from now,inter-communication with FW is no longer using the
+ * SAS address, use device ID which FW assigned.
+ */
+static int mpi_reg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	u32 status;
+	u32 device_id;
+	u32 htag;
+	struct pm8001_ccb_info *ccb;
+	struct pm8001_device *pm8001_dev;
+	struct dev_reg_resp *registerRespPayload =
+		(struct dev_reg_resp *)(piomb + 4);
+
+	htag = le32_to_cpu(registerRespPayload->tag);
+	ccb = &pm8001_ha->ccb_info[registerRespPayload->tag];
+	pm8001_dev = ccb->device;
+	status = le32_to_cpu(registerRespPayload->status);
+	device_id = le32_to_cpu(registerRespPayload->device_id);
+	PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk(" register device is status = %d\n", status));
+	switch (status) {
+	case DEVREG_SUCCESS:
+		PM8001_MSG_DBG(pm8001_ha, pm8001_printk("DEVREG_SUCCESS\n"));
+		pm8001_dev->device_id = device_id;
+		break;
+	case DEVREG_FAILURE_OUT_OF_RESOURCE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("DEVREG_FAILURE_OUT_OF_RESOURCE\n"));
+		break;
+	case DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED:
+		PM8001_MSG_DBG(pm8001_ha,
+		   pm8001_printk("DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED\n"));
+		break;
+	case DEVREG_FAILURE_INVALID_PHY_ID:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("DEVREG_FAILURE_INVALID_PHY_ID\n"));
+		break;
+	case DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED:
+		PM8001_MSG_DBG(pm8001_ha,
+		   pm8001_printk("DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED\n"));
+		break;
+	case DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE\n"));
+		break;
+	case DEVREG_FAILURE_PORT_NOT_VALID_STATE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("DEVREG_FAILURE_PORT_NOT_VALID_STATE\n"));
+		break;
+	case DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID:
+		PM8001_MSG_DBG(pm8001_ha,
+		       pm8001_printk("DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID\n"));
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+		 pm8001_printk("DEVREG_FAILURE_DEVICE_TYPE_NOT_UNSORPORTED\n"));
+		break;
+	}
+	complete(pm8001_dev->dcompletion);
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, htag);
+	return 0;
+}
+
+static int mpi_dereg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	u32 status;
+	u32 device_id;
+	struct dev_reg_resp *registerRespPayload =
+		(struct dev_reg_resp *)(piomb + 4);
+
+	status = le32_to_cpu(registerRespPayload->status);
+	device_id = le32_to_cpu(registerRespPayload->device_id);
+	if (status != 0)
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(" deregister device failed ,status = %x"
+			", device_id = %x\n", status, device_id));
+	return 0;
+}
+
+static int
+mpi_fw_flash_update_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	u32 status;
+	struct fw_control_ex	fw_control_context;
+	struct fw_flash_Update_resp *ppayload =
+		(struct fw_flash_Update_resp *)(piomb + 4);
+	u32 tag = le32_to_cpu(ppayload->tag);
+	struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[tag];
+	status = le32_to_cpu(ppayload->status);
+	memcpy(&fw_control_context,
+		ccb->fw_control_context,
+		sizeof(fw_control_context));
+	switch (status) {
+	case FLASH_UPDATE_COMPLETE_PENDING_REBOOT:
+		PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk(": FLASH_UPDATE_COMPLETE_PENDING_REBOOT\n"));
+		break;
+	case FLASH_UPDATE_IN_PROGRESS:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_IN_PROGRESS\n"));
+		break;
+	case FLASH_UPDATE_HDR_ERR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_HDR_ERR\n"));
+		break;
+	case FLASH_UPDATE_OFFSET_ERR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_OFFSET_ERR\n"));
+		break;
+	case FLASH_UPDATE_CRC_ERR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_CRC_ERR\n"));
+		break;
+	case FLASH_UPDATE_LENGTH_ERR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_LENGTH_ERR\n"));
+		break;
+	case FLASH_UPDATE_HW_ERR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_HW_ERR\n"));
+		break;
+	case FLASH_UPDATE_DNLD_NOT_SUPPORTED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_DNLD_NOT_SUPPORTED\n"));
+		break;
+	case FLASH_UPDATE_DISABLED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk(": FLASH_UPDATE_DISABLED\n"));
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("No matched status = %d\n", status));
+		break;
+	}
+	ccb->fw_control_context->fw_control->retcode = status;
+	pci_free_consistent(pm8001_ha->pdev,
+			fw_control_context.len,
+			fw_control_context.virtAddr,
+			fw_control_context.phys_addr);
+	complete(pm8001_ha->nvmd_completion);
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, tag);
+	return 0;
+}
+
+static int
+mpi_general_event(struct pm8001_hba_info *pm8001_ha , void *piomb)
+{
+	u32 status;
+	int i;
+	struct general_event_resp *pPayload =
+		(struct general_event_resp *)(piomb + 4);
+	status = le32_to_cpu(pPayload->status);
+	PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk(" status = 0x%x\n", status));
+	for (i = 0; i < GENERAL_EVENT_PAYLOAD; i++)
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("inb_IOMB_payload[0x%x] 0x%x, \n", i,
+			pPayload->inb_IOMB_payload[i]));
+	return 0;
+}
+
+static int
+mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	struct sas_task *t;
+	struct pm8001_ccb_info *ccb;
+	unsigned long flags;
+	u32 status ;
+	u32 tag, scp;
+	struct task_status_struct *ts;
+
+	struct task_abort_resp *pPayload =
+		(struct task_abort_resp *)(piomb + 4);
+	ccb = &pm8001_ha->ccb_info[pPayload->tag];
+	t = ccb->task;
+	ts = &t->task_status;
+
+	if (t == NULL)
+		return -1;
+
+	status = le32_to_cpu(pPayload->status);
+	tag = le32_to_cpu(pPayload->tag);
+	scp = le32_to_cpu(pPayload->scp);
+	PM8001_IO_DBG(pm8001_ha,
+		pm8001_printk(" status = 0x%x\n", status));
+	if (status != 0)
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("task abort failed tag = 0x%x,"
+			" scp= 0x%x\n", tag, scp));
+	switch (status) {
+	case IO_SUCCESS:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_SUCCESS\n"));
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_GOOD;
+		break;
+	case IO_NOT_VALID:
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("IO_NOT_VALID\n"));
+		ts->resp = TMF_RESP_FUNC_FAILED;
+		break;
+	}
+	spin_lock_irqsave(&t->task_state_lock, flags);
+	t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+	t->task_state_flags |= SAS_TASK_STATE_DONE;
+	spin_unlock_irqrestore(&t->task_state_lock, flags);
+	pm8001_ccb_task_free(pm8001_ha, t, ccb, pPayload->tag);
+	mb();
+	t->task_done(t);
+	return 0;
+}
+
+/**
+ * mpi_hw_event -The hw event has come.
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
+{
+	unsigned long flags;
+	struct hw_event_resp *pPayload =
+		(struct hw_event_resp *)(piomb + 4);
+	u32 lr_evt_status_phyid_portid =
+		le32_to_cpu(pPayload->lr_evt_status_phyid_portid);
+	u8 port_id = (u8)(lr_evt_status_phyid_portid & 0x0000000F);
+	u8 phy_id =
+		(u8)((lr_evt_status_phyid_portid & 0x000000F0) >> 4);
+	u16 eventType =
+		(u16)((lr_evt_status_phyid_portid & 0x00FFFF00) >> 8);
+	u8 status =
+		(u8)((lr_evt_status_phyid_portid & 0x0F000000) >> 24);
+	struct sas_ha_struct *sas_ha = pm8001_ha->sas;
+	struct pm8001_phy *phy = &pm8001_ha->phy[phy_id];
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+	PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk("outbound queue HW event & event type : "));
+	switch (eventType) {
+	case HW_EVENT_PHY_START_STATUS:
+		PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk("HW_EVENT_PHY_START_STATUS"
+			" status = %x\n", status));
+		if (status == 0) {
+			phy->phy_state = 1;
+			if (pm8001_ha->flags == PM8001F_RUN_TIME)
+				complete(phy->enable_completion);
+		}
+		break;
+	case HW_EVENT_SAS_PHY_UP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PHY_START_STATUS \n"));
+		hw_event_sas_phy_up(pm8001_ha, piomb);
+		break;
+	case HW_EVENT_SATA_PHY_UP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_SATA_PHY_UP \n"));
+		hw_event_sata_phy_up(pm8001_ha, piomb);
+		break;
+	case HW_EVENT_PHY_STOP_STATUS:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PHY_STOP_STATUS "
+			"status = %x\n", status));
+		if (status == 0)
+			phy->phy_state = 0;
+		break;
+	case HW_EVENT_SATA_SPINUP_HOLD:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_SATA_SPINUP_HOLD \n"));
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		break;
+	case HW_EVENT_PHY_DOWN:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PHY_DOWN \n"));
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		phy->phy_attached = 0;
+		phy->phy_state = 0;
+		hw_event_phy_down(pm8001_ha, piomb);
+		break;
+	case HW_EVENT_PORT_INVALID:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PORT_INVALID\n"));
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	/* the broadcast change primitive received, tell the LIBSAS this event
+	to revalidate the sas domain*/
+	case HW_EVENT_BROADCAST_CHANGE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_BROADCAST_CHANGE\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_BROADCAST_CHANGE,
+			port_id, phy_id, 1, 0);
+		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
+		sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE;
+		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
+		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		break;
+	case HW_EVENT_PHY_ERROR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PHY_ERROR\n"));
+		sas_phy_disconnected(&phy->sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		break;
+	case HW_EVENT_BROADCAST_EXP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_BROADCAST_EXP\n"));
+		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
+		sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP;
+		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
+		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		break;
+	case HW_EVENT_LINK_ERR_INVALID_DWORD:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_LINK_ERR_INVALID_DWORD\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_LINK_ERR_INVALID_DWORD, port_id, phy_id, 0, 0);
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_LINK_ERR_DISPARITY_ERROR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_LINK_ERR_DISPARITY_ERROR\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_LINK_ERR_DISPARITY_ERROR,
+			port_id, phy_id, 0, 0);
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_LINK_ERR_CODE_VIOLATION:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_LINK_ERR_CODE_VIOLATION\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_LINK_ERR_CODE_VIOLATION,
+			port_id, phy_id, 0, 0);
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH:
+		PM8001_MSG_DBG(pm8001_ha,
+		      pm8001_printk("HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH,
+			port_id, phy_id, 0, 0);
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_MALFUNCTION:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_MALFUNCTION\n"));
+		break;
+	case HW_EVENT_BROADCAST_SES:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_BROADCAST_SES\n"));
+		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
+		sas_phy->sas_prim = HW_EVENT_BROADCAST_SES;
+		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
+		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		break;
+	case HW_EVENT_INBOUND_CRC_ERROR:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_INBOUND_CRC_ERROR\n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_INBOUND_CRC_ERROR,
+			port_id, phy_id, 0, 0);
+		break;
+	case HW_EVENT_HARD_RESET_RECEIVED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_HARD_RESET_RECEIVED\n"));
+		sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+		break;
+	case HW_EVENT_ID_FRAME_TIMEOUT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_ID_FRAME_TIMEOUT\n"));
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_LINK_ERR_PHY_RESET_FAILED:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_LINK_ERR_PHY_RESET_FAILED \n"));
+		pm8001_hw_event_ack_req(pm8001_ha, 0,
+			HW_EVENT_LINK_ERR_PHY_RESET_FAILED,
+			port_id, phy_id, 0, 0);
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_PORT_RESET_TIMER_TMO:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PORT_RESET_TIMER_TMO \n"));
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_PORT_RECOVERY_TIMER_TMO:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PORT_RECOVERY_TIMER_TMO \n"));
+		sas_phy_disconnected(sas_phy);
+		phy->phy_attached = 0;
+		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		break;
+	case HW_EVENT_PORT_RECOVER:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PORT_RECOVER \n"));
+		break;
+	case HW_EVENT_PORT_RESET_COMPLETE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("HW_EVENT_PORT_RESET_COMPLETE \n"));
+		break;
+	case EVENT_BROADCAST_ASYNCH_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("EVENT_BROADCAST_ASYNCH_EVENT\n"));
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("Unknown event type = %x\n", eventType));
+		break;
+	}
+	return 0;
+}
+
+/**
+ * process_one_iomb - process one outbound Queue memory block
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb)
+{
+	u32 pHeader = (u32)*(u32 *)piomb;
+	u8 opc = (u8)((le32_to_cpu(pHeader)) & 0xFFF);
+
+	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("process_one_iomb:\n"));
+
+	switch (opc) {
+	case OPC_OUB_ECHO:
+		PM8001_MSG_DBG(pm8001_ha, pm8001_printk("OPC_OUB_ECHO \n"));
+		break;
+	case OPC_OUB_HW_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_HW_EVENT \n"));
+		mpi_hw_event(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SSP_COMP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SSP_COMP \n"));
+		mpi_ssp_completion(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SMP_COMP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SMP_COMP \n"));
+		mpi_smp_completion(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_LOCAL_PHY_CNTRL:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_LOCAL_PHY_CNTRL\n"));
+		mpi_local_phy_ctl(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_DEV_REGIST:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_DEV_REGIST \n"));
+		mpi_reg_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_DEREG_DEV:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("unresgister the deviece \n"));
+		mpi_dereg_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_GET_DEV_HANDLE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GET_DEV_HANDLE \n"));
+		break;
+	case OPC_OUB_SATA_COMP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SATA_COMP \n"));
+		mpi_sata_completion(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SATA_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SATA_EVENT \n"));
+		mpi_sata_event(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SSP_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SSP_EVENT\n"));
+		mpi_ssp_event(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_DEV_HANDLE_ARRIV:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_DEV_HANDLE_ARRIV\n"));
+		/*This is for target*/
+		break;
+	case OPC_OUB_SSP_RECV_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SSP_RECV_EVENT\n"));
+		/*This is for target*/
+		break;
+	case OPC_OUB_DEV_INFO:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_DEV_INFO\n"));
+		break;
+	case OPC_OUB_FW_FLASH_UPDATE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_FW_FLASH_UPDATE\n"));
+		mpi_fw_flash_update_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_GPIO_RESPONSE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GPIO_RESPONSE\n"));
+		break;
+	case OPC_OUB_GPIO_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GPIO_EVENT\n"));
+		break;
+	case OPC_OUB_GENERAL_EVENT:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GENERAL_EVENT\n"));
+		mpi_general_event(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SSP_ABORT_RSP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SSP_ABORT_RSP\n"));
+		mpi_task_abort_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SATA_ABORT_RSP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SATA_ABORT_RSP\n"));
+		mpi_task_abort_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SAS_DIAG_MODE_START_END:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SAS_DIAG_MODE_START_END\n"));
+		break;
+	case OPC_OUB_SAS_DIAG_EXECUTE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SAS_DIAG_EXECUTE\n"));
+		break;
+	case OPC_OUB_GET_TIME_STAMP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GET_TIME_STAMP\n"));
+		break;
+	case OPC_OUB_SAS_HW_EVENT_ACK:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SAS_HW_EVENT_ACK\n"));
+		break;
+	case OPC_OUB_PORT_CONTROL:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_PORT_CONTROL\n"));
+		break;
+	case OPC_OUB_SMP_ABORT_RSP:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SMP_ABORT_RSP\n"));
+		mpi_task_abort_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_GET_NVMD_DATA:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GET_NVMD_DATA\n"));
+		mpi_get_nvmd_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_SET_NVMD_DATA:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SET_NVMD_DATA\n"));
+		mpi_set_nvmd_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_DEVICE_HANDLE_REMOVAL:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_DEVICE_HANDLE_REMOVAL\n"));
+		break;
+	case OPC_OUB_SET_DEVICE_STATE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SET_DEVICE_STATE\n"));
+		mpi_set_dev_state_resp(pm8001_ha, piomb);
+		break;
+	case OPC_OUB_GET_DEVICE_STATE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_GET_DEVICE_STATE\n"));
+		break;
+	case OPC_OUB_SET_DEV_INFO:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SET_DEV_INFO\n"));
+		break;
+	case OPC_OUB_SAS_RE_INITIALIZE:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("OPC_OUB_SAS_RE_INITIALIZE\n"));
+		break;
+	default:
+		PM8001_MSG_DBG(pm8001_ha,
+			pm8001_printk("Unknown outbound Queue IOMB OPC = %x\n",
+			opc));
+		break;
+	}
+}
+
+static int process_oq(struct pm8001_hba_info *pm8001_ha)
+{
+	struct outbound_queue_table *circularQ;
+	void *pMsg1 = NULL;
+	u8 bc = 0;
+	u32 ret = MPI_IO_STATUS_FAIL, processedMsgCount = 0;
+
+	circularQ = &pm8001_ha->outbnd_q_tbl[0];
+	do {
+		ret = mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc);
+		if (MPI_IO_STATUS_SUCCESS == ret) {
+			/* process the outbound message */
+			process_one_iomb(pm8001_ha, (void *)((u8 *)pMsg1 - 4));
+			/* free the message from the outbound circular buffer */
+			mpi_msg_free_set(pm8001_ha, circularQ, bc);
+			processedMsgCount++;
+		}
+		if (MPI_IO_STATUS_BUSY == ret) {
+			u32 producer_idx;
+			/* Update the producer index from SPC */
+			producer_idx = pm8001_read_32(circularQ->pi_virt);
+			circularQ->producer_index = cpu_to_le32(producer_idx);
+			if (circularQ->producer_index ==
+				circularQ->consumer_idx)
+				/* OQ is empty */
+				break;
+		}
+	} while (100 > processedMsgCount);/*end message processing if hit the
+	count*/
+	return ret;
+}
+
+/* PCI_DMA_... to our direction translation. */
+static const u8 data_dir_flags[] = {
+	[PCI_DMA_BIDIRECTIONAL] = DATA_DIR_BYRECIPIENT,/* UNSPECIFIED */
+	[PCI_DMA_TODEVICE]	= DATA_DIR_OUT,/* OUTBOUND */
+	[PCI_DMA_FROMDEVICE]	= DATA_DIR_IN,/* INBOUND */
+	[PCI_DMA_NONE]		= DATA_DIR_NONE,/* NO TRANSFER */
+};
+static void
+pm8001_chip_make_sg(struct scatterlist *scatter, int nr, void *prd)
+{
+	int i;
+	struct scatterlist *sg;
+	struct pm8001_prd *buf_prd = prd;
+
+	for_each_sg(scatter, sg, nr, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->im_len.len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd->im_len.e = 0;
+		buf_prd++;
+	}
+}
+
+static void build_smp_cmd(u32 deviceID, u32 hTag, struct smp_req *psmp_cmd)
+{
+	psmp_cmd->tag = cpu_to_le32(hTag);
+	psmp_cmd->device_id = cpu_to_le32(deviceID);
+	psmp_cmd->len_ip_ir = cpu_to_le32(1|(1 << 1));
+}
+
+/**
+ * pm8001_chip_smp_req - send a SMP task to FW
+ * @pm8001_ha: our hba card information.
+ * @ccb: the ccb information this request used.
+ */
+static int pm8001_chip_smp_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	int elem, rc;
+	struct sas_task *task = ccb->task;
+	struct domain_device *dev = task->dev;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	struct scatterlist *sg_req, *sg_resp;
+	u32 req_len, resp_len;
+	struct smp_req smp_cmd;
+	u32 opc;
+	struct inbound_queue_table *circularQ;
+
+	memset(&smp_cmd, 0, sizeof(smp_cmd));
+	/*
+	 * DMA-map SMP request, response buffers
+	 */
+	sg_req = &task->smp_task.smp_req;
+	elem = dma_map_sg(pm8001_ha->dev, sg_req, 1, PCI_DMA_TODEVICE);
+	if (!elem)
+		return -ENOMEM;
+	req_len = sg_dma_len(sg_req);
+
+	sg_resp = &task->smp_task.smp_resp;
+	elem = dma_map_sg(pm8001_ha->dev, sg_resp, 1, PCI_DMA_FROMDEVICE);
+	if (!elem) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+	resp_len = sg_dma_len(sg_resp);
+	/* must be in dwords */
+	if ((req_len & 0x3) || (resp_len & 0x3)) {
+		rc = -EINVAL;
+		goto err_out_2;
+	}
+
+	opc = OPC_INB_SMP_REQUEST;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	smp_cmd.tag = cpu_to_le32(ccb->ccb_tag);
+	smp_cmd.long_smp_req.long_req_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req));
+	smp_cmd.long_smp_req.long_req_size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_req)-4);
+	smp_cmd.long_smp_req.long_resp_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_resp));
+	smp_cmd.long_smp_req.long_resp_size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_resp)-4);
+	build_smp_cmd(pm8001_dev->device_id, smp_cmd.tag, &smp_cmd);
+	mpi_build_cmd(pm8001_ha, circularQ, opc, (u32 *)&smp_cmd);
+	return 0;
+
+err_out_2:
+	dma_unmap_sg(pm8001_ha->dev, &ccb->task->smp_task.smp_resp, 1,
+			PCI_DMA_FROMDEVICE);
+err_out:
+	dma_unmap_sg(pm8001_ha->dev, &ccb->task->smp_task.smp_req, 1,
+			PCI_DMA_TODEVICE);
+	return rc;
+}
+
+/**
+ * pm8001_chip_ssp_io_req - send a SSP task to FW
+ * @pm8001_ha: our hba card information.
+ * @ccb: the ccb information this request used.
+ */
+static int pm8001_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	struct sas_task *task = ccb->task;
+	struct domain_device *dev = task->dev;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	struct ssp_ini_io_start_req ssp_cmd;
+	u32 tag = ccb->ccb_tag;
+	__le64 phys_addr;
+	struct inbound_queue_table *circularQ;
+	u32 opc = OPC_INB_SSPINIIOSTART;
+	memset(&ssp_cmd, 0, sizeof(ssp_cmd));
+	memcpy(ssp_cmd.ssp_iu.lun, task->ssp_task.LUN, 8);
+	ssp_cmd.dir_m_tlr = data_dir_flags[task->data_dir] << 8 | 0x0;/*0 for
+	SAS 1.1 compatible TLR*/
+	ssp_cmd.data_len = cpu_to_le32(task->total_xfer_len);
+	ssp_cmd.device_id = cpu_to_le32(pm8001_dev->device_id);
+	ssp_cmd.tag = cpu_to_le32(tag);
+	if (task->ssp_task.enable_first_burst)
+		ssp_cmd.ssp_iu.efb_prio_attr |= 0x80;
+	ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_prio << 3);
+	ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_attr & 7);
+	memcpy(ssp_cmd.ssp_iu.cdb, task->ssp_task.cdb, 16);
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+
+	/* fill in PRD (scatter/gather) table, if any */
+	if (task->num_scatter > 1) {
+		pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd);
+		phys_addr = cpu_to_le64(ccb->ccb_dma_handle +
+				offsetof(struct pm8001_ccb_info, buf_prd[0]));
+		ssp_cmd.addr_low = lower_32_bits(phys_addr);
+		ssp_cmd.addr_high = upper_32_bits(phys_addr);
+		ssp_cmd.esgl = cpu_to_le32(1<<31);
+	} else if (task->num_scatter == 1) {
+		__le64 dma_addr = cpu_to_le64(sg_dma_address(task->scatter));
+		ssp_cmd.addr_low = lower_32_bits(dma_addr);
+		ssp_cmd.addr_high = upper_32_bits(dma_addr);
+		ssp_cmd.len = cpu_to_le32(task->total_xfer_len);
+		ssp_cmd.esgl = 0;
+	} else if (task->num_scatter == 0) {
+		ssp_cmd.addr_low = 0;
+		ssp_cmd.addr_high = 0;
+		ssp_cmd.len = cpu_to_le32(task->total_xfer_len);
+		ssp_cmd.esgl = 0;
+	}
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &ssp_cmd);
+	return 0;
+}
+
+static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	struct sas_task *task = ccb->task;
+	struct domain_device *dev = task->dev;
+	struct pm8001_device *pm8001_ha_dev = dev->lldd_dev;
+	u32 tag = ccb->ccb_tag;
+	struct sata_start_req sata_cmd;
+	u32 hdr_tag, ncg_tag = 0;
+	__le64 phys_addr;
+	u32 ATAP = 0x0;
+	u32 dir;
+	struct inbound_queue_table *circularQ;
+	u32  opc = OPC_INB_SATA_HOST_OPSTART;
+	memset(&sata_cmd, 0, sizeof(sata_cmd));
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	if (task->data_dir == PCI_DMA_NONE) {
+		ATAP = 0x04;  /* no data*/
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("no data \n"));
+	} else if (likely(!task->ata_task.device_control_reg_update)) {
+		if (task->ata_task.dma_xfer) {
+			ATAP = 0x06; /* DMA */
+			PM8001_IO_DBG(pm8001_ha, pm8001_printk("DMA \n"));
+		} else {
+			ATAP = 0x05; /* PIO*/
+			PM8001_IO_DBG(pm8001_ha, pm8001_printk("PIO \n"));
+		}
+		if (task->ata_task.use_ncq &&
+			dev->sata_dev.command_set != ATAPI_COMMAND_SET) {
+			ATAP = 0x07; /* FPDMA */
+			PM8001_IO_DBG(pm8001_ha, pm8001_printk("FPDMA \n"));
+		}
+	}
+	if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag))
+		ncg_tag = cpu_to_le32(hdr_tag);
+	dir = data_dir_flags[task->data_dir] << 8;
+	sata_cmd.tag = cpu_to_le32(tag);
+	sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
+	sata_cmd.data_len = cpu_to_le32(task->total_xfer_len);
+	sata_cmd.ncqtag_atap_dir_m =
+		cpu_to_le32(((ncg_tag & 0xff)<<16)|((ATAP & 0x3f) << 10) | dir);
+	sata_cmd.sata_fis = task->ata_task.fis;
+	if (likely(!task->ata_task.device_control_reg_update))
+		sata_cmd.sata_fis.flags |= 0x80;/* C=1: update ATA cmd reg */
+	sata_cmd.sata_fis.flags &= 0xF0;/* PM_PORT field shall be 0 */
+	/* fill in PRD (scatter/gather) table, if any */
+	if (task->num_scatter > 1) {
+		pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd);
+		phys_addr = cpu_to_le64(ccb->ccb_dma_handle +
+				offsetof(struct pm8001_ccb_info, buf_prd[0]));
+		sata_cmd.addr_low = lower_32_bits(phys_addr);
+		sata_cmd.addr_high = upper_32_bits(phys_addr);
+		sata_cmd.esgl = cpu_to_le32(1 << 31);
+	} else if (task->num_scatter == 1) {
+		__le64 dma_addr = cpu_to_le64(sg_dma_address(task->scatter));
+		sata_cmd.addr_low = lower_32_bits(dma_addr);
+		sata_cmd.addr_high = upper_32_bits(dma_addr);
+		sata_cmd.len = cpu_to_le32(task->total_xfer_len);
+		sata_cmd.esgl = 0;
+	} else if (task->num_scatter == 0) {
+		sata_cmd.addr_low = 0;
+		sata_cmd.addr_high = 0;
+		sata_cmd.len = cpu_to_le32(task->total_xfer_len);
+		sata_cmd.esgl = 0;
+	}
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd);
+	return 0;
+}
+
+/**
+ * pm8001_chip_phy_start_req - start phy via PHY_START COMMAND
+ * @pm8001_ha: our hba card information.
+ * @num: the inbound queue number
+ * @phy_id: the phy id which we wanted to start up.
+ */
+static int
+pm8001_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id)
+{
+	struct phy_start_req payload;
+	struct inbound_queue_table *circularQ;
+	u32 tag = 0x01;
+	u32 opcode = OPC_INB_PHYSTART;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memset(&payload, 0, sizeof(payload));
+	payload.tag = cpu_to_le32(tag);
+	/*
+	 ** [0:7]   PHY Identifier
+	 ** [8:11]  link rate 1.5G, 3G, 6G
+	 ** [12:13] link mode 01b SAS mode; 10b SATA mode; 11b both
+	 ** [14]    0b disable spin up hold; 1b enable spin up hold
+	 */
+	payload.ase_sh_lm_slr_phyid = cpu_to_le32(SPINHOLD_DISABLE |
+		LINKMODE_AUTO |	LINKRATE_15 |
+		LINKRATE_30 | LINKRATE_60 | phy_id);
+	payload.sas_identify.dev_type = SAS_END_DEV;
+	payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL;
+	memcpy(payload.sas_identify.sas_addr,
+		pm8001_ha->sas_addr, SAS_ADDR_SIZE);
+	payload.sas_identify.phy_id = phy_id;
+	mpi_build_cmd(pm8001_ha, circularQ, opcode, &payload);
+	return 0;
+}
+
+/**
+ * pm8001_chip_phy_stop_req - start phy via PHY_STOP COMMAND
+ * @pm8001_ha: our hba card information.
+ * @num: the inbound queue number
+ * @phy_id: the phy id which we wanted to start up.
+ */
+static int pm8001_chip_phy_stop_req(struct pm8001_hba_info *pm8001_ha,
+	u8 phy_id)
+{
+	struct phy_stop_req payload;
+	struct inbound_queue_table *circularQ;
+	u32 tag = 0x01;
+	u32 opcode = OPC_INB_PHYSTOP;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memset(&payload, 0, sizeof(payload));
+	payload.tag = cpu_to_le32(tag);
+	payload.phy_id = cpu_to_le32(phy_id);
+	mpi_build_cmd(pm8001_ha, circularQ, opcode, &payload);
+	return 0;
+}
+
+/**
+ * see comments on mpi_reg_resp.
+ */
+static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_device *pm8001_dev, u32 flag)
+{
+	struct reg_dev_req payload;
+	u32	opc;
+	u32 stp_sspsmp_sata = 0x4;
+	struct inbound_queue_table *circularQ;
+	u32 linkrate, phy_id;
+	u32 rc, tag = 0xdeadbeef;
+	struct pm8001_ccb_info *ccb;
+	u8 retryFlag = 0x1;
+	u16 firstBurstSize = 0;
+	u16 ITNT = 2000;
+	struct domain_device *dev = pm8001_dev->sas_device;
+	struct domain_device *parent_dev = dev->parent;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+
+	memset(&payload, 0, sizeof(payload));
+	rc = pm8001_tag_alloc(pm8001_ha, &tag);
+	if (rc)
+		return rc;
+	ccb = &pm8001_ha->ccb_info[tag];
+	ccb->device = pm8001_dev;
+	ccb->ccb_tag = tag;
+	payload.tag = cpu_to_le32(tag);
+	if (flag == 1)
+		stp_sspsmp_sata = 0x02; /*direct attached sata */
+	else {
+		if (pm8001_dev->dev_type == SATA_DEV)
+			stp_sspsmp_sata = 0x00; /* stp*/
+		else if (pm8001_dev->dev_type == SAS_END_DEV ||
+			pm8001_dev->dev_type == EDGE_DEV ||
+			pm8001_dev->dev_type == FANOUT_DEV)
+			stp_sspsmp_sata = 0x01; /*ssp or smp*/
+	}
+	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type))
+		phy_id = parent_dev->ex_dev.ex_phy->phy_id;
+	else
+		phy_id = pm8001_dev->attached_phy;
+	opc = OPC_INB_REG_DEV;
+	linkrate = (pm8001_dev->sas_device->linkrate < dev->port->linkrate) ?
+			pm8001_dev->sas_device->linkrate : dev->port->linkrate;
+	payload.phyid_portid =
+		cpu_to_le32(((pm8001_dev->sas_device->port->id) & 0x0F) |
+		((phy_id & 0x0F) << 4));
+	payload.dtype_dlr_retry = cpu_to_le32((retryFlag & 0x01) |
+		((linkrate & 0x0F) * 0x1000000) |
+		((stp_sspsmp_sata & 0x03) * 0x10000000));
+	payload.firstburstsize_ITNexustimeout =
+		cpu_to_le32(ITNT | (firstBurstSize * 0x10000));
+	memcpy(&payload.sas_addr_hi, pm8001_dev->sas_device->sas_addr,
+		SAS_ADDR_SIZE);
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+	return 0;
+}
+
+/**
+ * see comments on mpi_reg_resp.
+ */
+static int pm8001_chip_dereg_dev_req(struct pm8001_hba_info *pm8001_ha,
+	u32 device_id)
+{
+	struct dereg_dev_req payload;
+	u32 opc = OPC_INB_DEREG_DEV_HANDLE;
+	struct inbound_queue_table *circularQ;
+
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memset((u8 *)&payload, 0, sizeof(payload));
+	payload.tag = 1;
+	payload.device_id = cpu_to_le32(device_id);
+	PM8001_MSG_DBG(pm8001_ha,
+		pm8001_printk("unregister device device_id = %d\n", device_id));
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+	return 0;
+}
+
+/**
+ * pm8001_chip_phy_ctl_req - support the local phy operation
+ * @pm8001_ha: our hba card information.
+ * @num: the inbound queue number
+ * @phy_id: the phy id which we wanted to operate
+ * @phy_op:
+ */
+static int pm8001_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha,
+	u32 phyId, u32 phy_op)
+{
+	struct local_phy_ctl_req payload;
+	struct inbound_queue_table *circularQ;
+	u32 opc = OPC_INB_LOCAL_PHY_CONTROL;
+	memset((u8 *)&payload, 0, sizeof(payload));
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	payload.tag = 1;
+	payload.phyop_phyid =
+		cpu_to_le32(((phy_op & 0xff) << 8) | (phyId & 0x0F));
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+	return 0;
+}
+
+static u32 pm8001_chip_is_our_interupt(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 value;
+#ifdef PM8001_USE_MSIX
+	return 1;
+#endif
+	value = pm8001_cr32(pm8001_ha, 0, MSGU_ODR);
+	if (value)
+		return 1;
+	return 0;
+
+}
+
+/**
+ * pm8001_chip_isr - PM8001 isr handler.
+ * @pm8001_ha: our hba card information.
+ * @irq: irq number.
+ * @stat: stat.
+ */
+static void
+pm8001_chip_isr(struct pm8001_hba_info *pm8001_ha)
+{
+	pm8001_chip_interrupt_disable(pm8001_ha);
+	process_oq(pm8001_ha);
+	pm8001_chip_interrupt_enable(pm8001_ha);
+}
+
+static int send_task_abort(struct pm8001_hba_info *pm8001_ha, u32 opc,
+	u32 dev_id, u8 flag, u32 task_tag, u32 cmd_tag)
+{
+	struct task_abort_req task_abort;
+	struct inbound_queue_table *circularQ;
+
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memset(&task_abort, 0, sizeof(task_abort));
+	if (ABORT_SINGLE == (flag & ABORT_MASK)) {
+		task_abort.abort_all = 0;
+		task_abort.device_id = cpu_to_le32(dev_id);
+		task_abort.tag_to_abort = cpu_to_le32(task_tag);
+		task_abort.tag = cpu_to_le32(cmd_tag);
+	} else if (ABORT_ALL == (flag & ABORT_MASK)) {
+		task_abort.abort_all = cpu_to_le32(1);
+		task_abort.device_id = cpu_to_le32(dev_id);
+		task_abort.tag = cpu_to_le32(cmd_tag);
+	}
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort);
+	return 0;
+}
+
+/**
+ * pm8001_chip_abort_task - SAS abort task when error or exception happened.
+ * @task: the task we wanted to aborted.
+ * @flag: the abort flag.
+ */
+static int pm8001_chip_abort_task(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_device *pm8001_dev, u8 flag, u32 task_tag, u32 cmd_tag)
+{
+	u32 opc, device_id;
+	int rc = TMF_RESP_FUNC_FAILED;
+	PM8001_IO_DBG(pm8001_ha, pm8001_printk("Abort tag[%x]", task_tag));
+	if (pm8001_dev->dev_type == SAS_END_DEV)
+		opc = OPC_INB_SSP_ABORT;
+	else if (pm8001_dev->dev_type == SATA_DEV)
+		opc = OPC_INB_SATA_ABORT;
+	else
+		opc = OPC_INB_SMP_ABORT;/* SMP */
+	device_id = pm8001_dev->device_id;
+	rc = send_task_abort(pm8001_ha, opc, device_id, flag,
+		task_tag, cmd_tag);
+	if (rc != TMF_RESP_FUNC_COMPLETE)
+		PM8001_IO_DBG(pm8001_ha, pm8001_printk("rc= %d\n", rc));
+	return rc;
+}
+
+/**
+ * pm8001_chip_ssp_tm_req - built the task managment command.
+ * @pm8001_ha: our hba card information.
+ * @ccb: the ccb information.
+ * @tmf: task management function.
+ */
+static int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf)
+{
+	struct sas_task *task = ccb->task;
+	struct domain_device *dev = task->dev;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	u32 opc = OPC_INB_SSPINITMSTART;
+	struct inbound_queue_table *circularQ;
+	struct ssp_ini_tm_start_req sspTMCmd;
+
+	memset(&sspTMCmd, 0, sizeof(sspTMCmd));
+	sspTMCmd.device_id = cpu_to_le32(pm8001_dev->device_id);
+	sspTMCmd.relate_tag = cpu_to_le32(tmf->tag_of_task_to_be_managed);
+	sspTMCmd.tmf = cpu_to_le32(tmf->tmf);
+	sspTMCmd.ds_ads_m = cpu_to_le32(1 << 2);
+	memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8);
+	sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag);
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd);
+	return 0;
+}
+
+static int pm8001_chip_get_nvmd_req(struct pm8001_hba_info *pm8001_ha,
+	void *payload)
+{
+	u32 opc = OPC_INB_GET_NVMD_DATA;
+	u32 nvmd_type;
+	u32 rc;
+	u32 tag;
+	struct pm8001_ccb_info *ccb;
+	struct inbound_queue_table *circularQ;
+	struct get_nvm_data_req nvmd_req;
+	struct fw_control_ex *fw_control_context;
+	struct pm8001_ioctl_payload *ioctl_payload = payload;
+
+	nvmd_type = ioctl_payload->minor_function;
+	fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL);
+	fw_control_context->usrAddr = (u8 *)&ioctl_payload->func_specific[0];
+	fw_control_context->len = ioctl_payload->length;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memset(&nvmd_req, 0, sizeof(nvmd_req));
+	rc = pm8001_tag_alloc(pm8001_ha, &tag);
+	if (rc)
+		return rc;
+	ccb = &pm8001_ha->ccb_info[tag];
+	ccb->ccb_tag = tag;
+	ccb->fw_control_context = fw_control_context;
+	nvmd_req.tag = cpu_to_le32(tag);
+
+	switch (nvmd_type) {
+	case TWI_DEVICE: {
+		u32 twi_addr, twi_page_size;
+		twi_addr = 0xa8;
+		twi_page_size = 2;
+
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | twi_addr << 16 |
+			twi_page_size << 8 | TWI_DEVICE);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	}
+	case C_SEEPROM: {
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | C_SEEPROM);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	}
+	case VPD_FLASH: {
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | VPD_FLASH);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	}
+	case EXPAN_ROM: {
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | EXPAN_ROM);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	}
+	default:
+		break;
+	}
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &nvmd_req);
+	return 0;
+}
+
+static int pm8001_chip_set_nvmd_req(struct pm8001_hba_info *pm8001_ha,
+	void *payload)
+{
+	u32 opc = OPC_INB_SET_NVMD_DATA;
+	u32 nvmd_type;
+	u32 rc;
+	u32 tag;
+	struct pm8001_ccb_info *ccb;
+	struct inbound_queue_table *circularQ;
+	struct set_nvm_data_req nvmd_req;
+	struct fw_control_ex *fw_control_context;
+	struct pm8001_ioctl_payload *ioctl_payload = payload;
+
+	nvmd_type = ioctl_payload->minor_function;
+	fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL);
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	memcpy(pm8001_ha->memoryMap.region[NVMD].virt_ptr,
+		ioctl_payload->func_specific,
+		ioctl_payload->length);
+	memset(&nvmd_req, 0, sizeof(nvmd_req));
+	rc = pm8001_tag_alloc(pm8001_ha, &tag);
+	if (rc)
+		return rc;
+	ccb = &pm8001_ha->ccb_info[tag];
+	ccb->fw_control_context = fw_control_context;
+	ccb->ccb_tag = tag;
+	nvmd_req.tag = cpu_to_le32(tag);
+	switch (nvmd_type) {
+	case TWI_DEVICE: {
+		u32 twi_addr, twi_page_size;
+		twi_addr = 0xa8;
+		twi_page_size = 2;
+		nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98);
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | twi_addr << 16 |
+			twi_page_size << 8 | TWI_DEVICE);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	}
+	case C_SEEPROM:
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | C_SEEPROM);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	case VPD_FLASH:
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | VPD_FLASH);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	case EXPAN_ROM:
+		nvmd_req.len_ir_vpdd = cpu_to_le32(IPMode | EXPAN_ROM);
+		nvmd_req.resp_len = cpu_to_le32(ioctl_payload->length);
+		nvmd_req.reserved[0] = cpu_to_le32(0xFEDCBA98);
+		nvmd_req.resp_addr_hi =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_hi);
+		nvmd_req.resp_addr_lo =
+		    cpu_to_le32(pm8001_ha->memoryMap.region[NVMD].phys_addr_lo);
+		break;
+	default:
+		break;
+	}
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &nvmd_req);
+	return 0;
+}
+
+/**
+ * pm8001_chip_fw_flash_update_build - support the firmware update operation
+ * @pm8001_ha: our hba card information.
+ * @fw_flash_updata_info: firmware flash update param
+ */
+static int
+pm8001_chip_fw_flash_update_build(struct pm8001_hba_info *pm8001_ha,
+	void *fw_flash_updata_info, u32 tag)
+{
+	struct fw_flash_Update_req payload;
+	struct fw_flash_updata_info *info;
+	struct inbound_queue_table *circularQ;
+	u32 opc = OPC_INB_FW_FLASH_UPDATE;
+
+	memset((u8 *)&payload, 0, sizeof(struct fw_flash_Update_req));
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	info = fw_flash_updata_info;
+	payload.tag = cpu_to_le32(tag);
+	payload.cur_image_len = cpu_to_le32(info->cur_image_len);
+	payload.cur_image_offset = cpu_to_le32(info->cur_image_offset);
+	payload.total_image_len = cpu_to_le32(info->total_image_len);
+	payload.len = info->sgl.im_len.len ;
+	payload.sgl_addr_lo = lower_32_bits(info->sgl.addr);
+	payload.sgl_addr_hi = upper_32_bits(info->sgl.addr);
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+	return 0;
+}
+
+static int
+pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha,
+	void *payload)
+{
+	struct fw_flash_updata_info flash_update_info;
+	struct fw_control_info *fw_control;
+	struct fw_control_ex *fw_control_context;
+	u32 rc;
+	u32 tag;
+	struct pm8001_ccb_info *ccb;
+	void *buffer = NULL;
+	dma_addr_t phys_addr;
+	u32 phys_addr_hi;
+	u32 phys_addr_lo;
+	struct pm8001_ioctl_payload *ioctl_payload = payload;
+
+	fw_control_context = kzalloc(sizeof(struct fw_control_ex), GFP_KERNEL);
+	fw_control = (struct fw_control_info *)&ioctl_payload->func_specific[0];
+	if (fw_control->len != 0) {
+		if (pm8001_mem_alloc(pm8001_ha->pdev,
+			(void **)&buffer,
+			&phys_addr,
+			&phys_addr_hi,
+			&phys_addr_lo,
+			fw_control->len, 0) != 0) {
+				PM8001_FAIL_DBG(pm8001_ha,
+					pm8001_printk("Mem alloc failure\n"));
+				return -ENOMEM;
+		}
+	}
+	memset((void *)buffer, 0, fw_control->len);
+	memcpy((void *)buffer, fw_control->buffer, fw_control->len);
+	flash_update_info.sgl.addr = cpu_to_le64(phys_addr);
+	flash_update_info.sgl.im_len.len = cpu_to_le32(fw_control->len);
+	flash_update_info.sgl.im_len.e = 0;
+	flash_update_info.cur_image_offset = fw_control->offset;
+	flash_update_info.cur_image_len = fw_control->len;
+	flash_update_info.total_image_len = fw_control->size;
+	fw_control_context->fw_control = fw_control;
+	fw_control_context->virtAddr = buffer;
+	fw_control_context->len = fw_control->len;
+	rc = pm8001_tag_alloc(pm8001_ha, &tag);
+	if (rc)
+		return rc;
+	ccb = &pm8001_ha->ccb_info[tag];
+	ccb->fw_control_context = fw_control_context;
+	ccb->ccb_tag = tag;
+	pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info, tag);
+	return 0;
+}
+
+static int
+pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_device *pm8001_dev, u32 state)
+{
+	struct set_dev_state_req payload;
+	struct inbound_queue_table *circularQ;
+	struct pm8001_ccb_info *ccb;
+	u32 rc;
+	u32 tag;
+	u32 opc = OPC_INB_SET_DEVICE_STATE;
+	memset((u8 *)&payload, 0, sizeof(payload));
+	rc = pm8001_tag_alloc(pm8001_ha, &tag);
+	if (rc)
+		return -1;
+	ccb = &pm8001_ha->ccb_info[tag];
+	ccb->ccb_tag = tag;
+	ccb->device = pm8001_dev;
+	circularQ = &pm8001_ha->inbnd_q_tbl[0];
+	payload.tag = cpu_to_le32(tag);
+	payload.device_id = cpu_to_le32(pm8001_dev->device_id);
+	payload.nds = cpu_to_le32(state);
+	mpi_build_cmd(pm8001_ha, circularQ, opc, &payload);
+	return 0;
+
+}
+
+const struct pm8001_dispatch pm8001_8001_dispatch = {
+	.name			= "pmc8001",
+	.chip_init		= pm8001_chip_init,
+	.chip_soft_rst		= pm8001_chip_soft_rst,
+	.chip_rst		= pm8001_hw_chip_rst,
+	.chip_iounmap		= pm8001_chip_iounmap,
+	.isr			= pm8001_chip_isr,
+	.is_our_interupt	= pm8001_chip_is_our_interupt,
+	.isr_process_oq		= process_oq,
+	.interrupt_enable 	= pm8001_chip_interrupt_enable,
+	.interrupt_disable	= pm8001_chip_interrupt_disable,
+	.make_prd		= pm8001_chip_make_sg,
+	.smp_req		= pm8001_chip_smp_req,
+	.ssp_io_req		= pm8001_chip_ssp_io_req,
+	.sata_req		= pm8001_chip_sata_req,
+	.phy_start_req		= pm8001_chip_phy_start_req,
+	.phy_stop_req		= pm8001_chip_phy_stop_req,
+	.reg_dev_req		= pm8001_chip_reg_dev_req,
+	.dereg_dev_req		= pm8001_chip_dereg_dev_req,
+	.phy_ctl_req		= pm8001_chip_phy_ctl_req,
+	.task_abort		= pm8001_chip_abort_task,
+	.ssp_tm_req		= pm8001_chip_ssp_tm_req,
+	.get_nvmd_req		= pm8001_chip_get_nvmd_req,
+	.set_nvmd_req		= pm8001_chip_set_nvmd_req,
+	.fw_flash_update_req	= pm8001_chip_fw_flash_update_req,
+	.set_dev_state_req	= pm8001_chip_set_dev_state_req,
+};
+
diff --git a/drivers/scsi/pm8001/pm8001_hwi.h b/drivers/scsi/pm8001/pm8001_hwi.h
new file mode 100644
index 000000000000..3690a2ba0eb2
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_hwi.h
@@ -0,0 +1,1011 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+#ifndef _PMC8001_REG_H_
+#define _PMC8001_REG_H_
+
+#include <linux/types.h>
+#include <scsi/libsas.h>
+
+
+/* for Request Opcode of IOMB */
+#define OPC_INB_ECHO				1	/* 0x000 */
+#define OPC_INB_PHYSTART			4	/* 0x004 */
+#define OPC_INB_PHYSTOP				5	/* 0x005 */
+#define OPC_INB_SSPINIIOSTART			6	/* 0x006 */
+#define OPC_INB_SSPINITMSTART			7	/* 0x007 */
+#define OPC_INB_SSPINIEXTIOSTART		8	/* 0x008 */
+#define OPC_INB_DEV_HANDLE_ACCEPT		9	/* 0x009 */
+#define OPC_INB_SSPTGTIOSTART			10	/* 0x00A */
+#define OPC_INB_SSPTGTRSPSTART			11	/* 0x00B */
+#define OPC_INB_SSPINIEDCIOSTART		12	/* 0x00C */
+#define OPC_INB_SSPINIEXTEDCIOSTART		13	/* 0x00D */
+#define OPC_INB_SSPTGTEDCIOSTART		14	/* 0x00E */
+#define OPC_INB_SSP_ABORT			15	/* 0x00F */
+#define OPC_INB_DEREG_DEV_HANDLE		16	/* 0x010 */
+#define OPC_INB_GET_DEV_HANDLE			17	/* 0x011 */
+#define OPC_INB_SMP_REQUEST			18	/* 0x012 */
+/* SMP_RESPONSE is removed */
+#define OPC_INB_SMP_RESPONSE			19	/* 0x013 */
+#define OPC_INB_SMP_ABORT			20	/* 0x014 */
+#define OPC_INB_REG_DEV				22	/* 0x016 */
+#define OPC_INB_SATA_HOST_OPSTART		23	/* 0x017 */
+#define OPC_INB_SATA_ABORT			24	/* 0x018 */
+#define OPC_INB_LOCAL_PHY_CONTROL		25	/* 0x019 */
+#define OPC_INB_GET_DEV_INFO			26	/* 0x01A */
+#define OPC_INB_FW_FLASH_UPDATE			32	/* 0x020 */
+#define OPC_INB_GPIO				34	/* 0x022 */
+#define OPC_INB_SAS_DIAG_MODE_START_END		35	/* 0x023 */
+#define OPC_INB_SAS_DIAG_EXECUTE		36	/* 0x024 */
+#define OPC_INB_SAS_HW_EVENT_ACK		37	/* 0x025 */
+#define OPC_INB_GET_TIME_STAMP			38	/* 0x026 */
+#define OPC_INB_PORT_CONTROL			39	/* 0x027 */
+#define OPC_INB_GET_NVMD_DATA			40	/* 0x028 */
+#define OPC_INB_SET_NVMD_DATA			41	/* 0x029 */
+#define OPC_INB_SET_DEVICE_STATE		42	/* 0x02A */
+#define OPC_INB_GET_DEVICE_STATE		43	/* 0x02B */
+#define OPC_INB_SET_DEV_INFO			44	/* 0x02C */
+#define OPC_INB_SAS_RE_INITIALIZE		45	/* 0x02D */
+
+/* for Response Opcode of IOMB */
+#define OPC_OUB_ECHO				1	/* 0x001 */
+#define OPC_OUB_HW_EVENT			4	/* 0x004 */
+#define OPC_OUB_SSP_COMP			5	/* 0x005 */
+#define OPC_OUB_SMP_COMP			6	/* 0x006 */
+#define OPC_OUB_LOCAL_PHY_CNTRL			7	/* 0x007 */
+#define OPC_OUB_DEV_REGIST			10	/* 0x00A */
+#define OPC_OUB_DEREG_DEV			11	/* 0x00B */
+#define OPC_OUB_GET_DEV_HANDLE			12	/* 0x00C */
+#define OPC_OUB_SATA_COMP			13	/* 0x00D */
+#define OPC_OUB_SATA_EVENT			14	/* 0x00E */
+#define OPC_OUB_SSP_EVENT			15	/* 0x00F */
+#define OPC_OUB_DEV_HANDLE_ARRIV		16	/* 0x010 */
+/* SMP_RECEIVED Notification is removed */
+#define OPC_OUB_SMP_RECV_EVENT			17	/* 0x011 */
+#define OPC_OUB_SSP_RECV_EVENT			18	/* 0x012 */
+#define OPC_OUB_DEV_INFO			19	/* 0x013 */
+#define OPC_OUB_FW_FLASH_UPDATE			20	/* 0x014 */
+#define OPC_OUB_GPIO_RESPONSE			22	/* 0x016 */
+#define OPC_OUB_GPIO_EVENT			23	/* 0x017 */
+#define OPC_OUB_GENERAL_EVENT			24	/* 0x018 */
+#define OPC_OUB_SSP_ABORT_RSP			26	/* 0x01A */
+#define OPC_OUB_SATA_ABORT_RSP			27	/* 0x01B */
+#define OPC_OUB_SAS_DIAG_MODE_START_END		28	/* 0x01C */
+#define OPC_OUB_SAS_DIAG_EXECUTE		29	/* 0x01D */
+#define OPC_OUB_GET_TIME_STAMP			30	/* 0x01E */
+#define OPC_OUB_SAS_HW_EVENT_ACK		31	/* 0x01F */
+#define OPC_OUB_PORT_CONTROL			32	/* 0x020 */
+#define OPC_OUB_SKIP_ENTRY			33	/* 0x021 */
+#define OPC_OUB_SMP_ABORT_RSP			34	/* 0x022 */
+#define OPC_OUB_GET_NVMD_DATA			35	/* 0x023 */
+#define OPC_OUB_SET_NVMD_DATA			36	/* 0x024 */
+#define OPC_OUB_DEVICE_HANDLE_REMOVAL		37	/* 0x025 */
+#define OPC_OUB_SET_DEVICE_STATE		38	/* 0x026 */
+#define OPC_OUB_GET_DEVICE_STATE		39	/* 0x027 */
+#define OPC_OUB_SET_DEV_INFO			40	/* 0x028 */
+#define OPC_OUB_SAS_RE_INITIALIZE		41	/* 0x029 */
+
+/* for phy start*/
+#define SPINHOLD_DISABLE		(0x00 << 14)
+#define SPINHOLD_ENABLE			(0x01 << 14)
+#define LINKMODE_SAS			(0x01 << 12)
+#define LINKMODE_DSATA			(0x02 << 12)
+#define LINKMODE_AUTO			(0x03 << 12)
+#define LINKRATE_15			(0x01 << 8)
+#define LINKRATE_30			(0x02 << 8)
+#define LINKRATE_60			(0x04 << 8)
+
+struct mpi_msg_hdr{
+	__le32	header;	/* Bits [11:0]  - Message operation code */
+	/* Bits [15:12] - Message Category */
+	/* Bits [21:16] - Outboundqueue ID for the
+	operation completion message */
+	/* Bits [23:22] - Reserved */
+	/* Bits [28:24] - Buffer Count, indicates how
+	many buffer are allocated for the massage */
+	/* Bits [30:29] - Reserved */
+	/* Bits [31] - Message Valid bit */
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of PHY Start Command
+ * use to describe enable the phy (64 bytes)
+ */
+struct phy_start_req {
+	__le32	tag;
+	__le32	ase_sh_lm_slr_phyid;
+	struct sas_identify_frame sas_identify;
+	u32	reserved[5];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of PHY Start Command
+ * use to disable the phy (64 bytes)
+ */
+struct phy_stop_req {
+	__le32	tag;
+	__le32	phy_id;
+	u32	reserved[13];
+} __attribute__((packed, aligned(4)));
+
+
+/* set device bits fis - device to host */
+struct  set_dev_bits_fis {
+	u8	fis_type;	/* 0xA1*/
+	u8	n_i_pmport;
+	/* b7 : n Bit. Notification bit. If set device needs attention. */
+	/* b6 : i Bit. Interrupt Bit */
+	/* b5-b4: reserved2 */
+	/* b3-b0: PM Port */
+	u8 	status;
+	u8	error;
+	u32	_r_a;
+} __attribute__ ((packed));
+/* PIO setup FIS - device to host */
+struct  pio_setup_fis {
+	u8	fis_type;	/* 0x5f */
+	u8	i_d_pmPort;
+	/* b7 : reserved */
+	/* b6 : i bit. Interrupt bit */
+	/* b5 : d bit. data transfer direction. set to 1 for device to host
+	xfer */
+	/* b4 : reserved */
+	/* b3-b0: PM Port */
+	u8	status;
+	u8	error;
+	u8	lbal;
+	u8	lbam;
+	u8	lbah;
+	u8	device;
+	u8	lbal_exp;
+	u8	lbam_exp;
+	u8	lbah_exp;
+	u8	_r_a;
+	u8	sector_count;
+	u8	sector_count_exp;
+	u8	_r_b;
+	u8	e_status;
+	u8	_r_c[2];
+	u8	transfer_count;
+} __attribute__ ((packed));
+
+/*
+ * brief the data structure of SATA Completion Response
+ * use to discribe the sata task response (64 bytes)
+ */
+struct sata_completion_resp {
+	__le32	tag;
+	__le32	status;
+	__le32	param;
+	u32	sata_resp[12];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of SAS HW Event Notification
+ * use to alert the host about the hardware event(64 bytes)
+ */
+struct hw_event_resp {
+	__le32	lr_evt_status_phyid_portid;
+	__le32	evt_param;
+	__le32	npip_portstate;
+	struct sas_identify_frame	sas_identify;
+	struct dev_to_host_fis	sata_fis;
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of  REGISTER DEVICE Command
+ * use to describe MPI REGISTER DEVICE Command (64 bytes)
+ */
+
+struct reg_dev_req {
+	__le32	tag;
+	__le32	phyid_portid;
+	__le32	dtype_dlr_retry;
+	__le32	firstburstsize_ITNexustimeout;
+	u32	sas_addr_hi;
+	u32	sas_addr_low;
+	__le32	upper_device_id;
+	u32	reserved[8];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of  DEREGISTER DEVICE Command
+ * use to request spc to remove all internal resources associated
+ * with the device id (64 bytes)
+ */
+
+struct dereg_dev_req {
+	__le32	tag;
+	__le32	device_id;
+	u32	reserved[13];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of DEVICE_REGISTRATION Response
+ * use to notify the completion of the device registration  (64 bytes)
+ */
+
+struct dev_reg_resp {
+	__le32	tag;
+	__le32	status;
+	__le32	device_id;
+	u32	reserved[12];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of Local PHY Control Command
+ * use to issue PHY CONTROL to local phy (64 bytes)
+ */
+struct local_phy_ctl_req {
+	__le32	tag;
+	__le32	phyop_phyid;
+	u32	reserved1[13];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of Local Phy Control Response
+ * use to describe MPI Local Phy Control Response (64 bytes)
+ */
+struct local_phy_ctl_resp {
+	__le32	tag;
+	__le32	phyop_phyid;
+	__le32	status;
+	u32	reserved[12];
+} __attribute__((packed, aligned(4)));
+
+
+#define OP_BITS 0x0000FF00
+#define ID_BITS 0x0000000F
+
+/*
+ * brief the data structure of PORT Control Command
+ * use to control port properties (64 bytes)
+ */
+
+struct port_ctl_req {
+	__le32	tag;
+	__le32	portop_portid;
+	__le32	param0;
+	__le32	param1;
+	u32	reserved1[11];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of HW Event Ack Command
+ * use to acknowledge receive HW event (64 bytes)
+ */
+
+struct hw_event_ack_req {
+	__le32	tag;
+	__le32	sea_phyid_portid;
+	__le32	param0;
+	__le32	param1;
+	u32	reserved1[11];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of SSP Completion Response
+ * use to indicate a SSP Completion  (n bytes)
+ */
+struct ssp_completion_resp {
+	__le32	tag;
+	__le32	status;
+	__le32	param;
+	__le32	ssptag_rescv_rescpad;
+	struct ssp_response_iu  ssp_resp_iu;
+	__le32	residual_count;
+} __attribute__((packed, aligned(4)));
+
+
+#define SSP_RESCV_BIT	0x00010000
+
+/*
+ * brief the data structure of SATA EVNET esponse
+ * use to indicate a SATA Completion  (64 bytes)
+ */
+
+struct sata_event_resp {
+	__le32	tag;
+	__le32	event;
+	__le32	port_id;
+	__le32	device_id;
+	u32	reserved[11];
+} __attribute__((packed, aligned(4)));
+
+/*
+ * brief the data structure of SSP EVNET esponse
+ * use to indicate a SSP Completion  (64 bytes)
+ */
+
+struct ssp_event_resp {
+	__le32	tag;
+	__le32	event;
+	__le32	port_id;
+	__le32	device_id;
+	u32	reserved[11];
+} __attribute__((packed, aligned(4)));
+
+/**
+ * brief the data structure of General Event Notification Response
+ * use to describe MPI General Event Notification Response (64 bytes)
+ */
+struct general_event_resp {
+	__le32	status;
+	__le32	inb_IOMB_payload[14];
+} __attribute__((packed, aligned(4)));
+
+
+#define GENERAL_EVENT_PAYLOAD	14
+#define OPCODE_BITS	0x00000fff
+
+/*
+ * brief the data structure of SMP Request Command
+ * use to describe MPI SMP REQUEST Command (64 bytes)
+ */
+struct smp_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	len_ip_ir;
+	/* Bits [0]  - Indirect response */
+	/* Bits [1] - Indirect Payload */
+	/* Bits [15:2] - Reserved */
+	/* Bits [23:16] - direct payload Len */
+	/* Bits [31:24] - Reserved */
+	u8	smp_req16[16];
+	union {
+		u8	smp_req[32];
+		struct {
+			__le64 long_req_addr;/* sg dma address, LE */
+			__le32 long_req_size;/* LE */
+			u32	_r_a;
+			__le64 long_resp_addr;/* sg dma address, LE */
+			__le32 long_resp_size;/* LE */
+			u32	_r_b;
+			} long_smp_req;/* sequencer extension */
+	};
+} __attribute__((packed, aligned(4)));
+/*
+ * brief the data structure of SMP Completion Response
+ * use to describe MPI SMP Completion Response (64 bytes)
+ */
+struct smp_completion_resp {
+	__le32	tag;
+	__le32	status;
+	__le32	param;
+	__le32	_r_a[12];
+} __attribute__((packed, aligned(4)));
+
+/*
+ *brief the data structure of SSP SMP SATA Abort Command
+ * use to describe MPI SSP SMP & SATA Abort Command (64 bytes)
+ */
+struct task_abort_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	tag_to_abort;
+	__le32	abort_all;
+	u32	reserved[11];
+} __attribute__((packed, aligned(4)));
+
+/* These flags used for SSP SMP & SATA Abort */
+#define ABORT_MASK		0x3
+#define ABORT_SINGLE		0x0
+#define ABORT_ALL		0x1
+
+/**
+ * brief the data structure of SSP SATA SMP Abort Response
+ * use to describe SSP SMP & SATA Abort Response ( 64 bytes)
+ */
+struct task_abort_resp {
+	__le32	tag;
+	__le32	status;
+	__le32	scp;
+	u32	reserved[12];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of SAS Diagnostic Start/End Command
+ * use to describe MPI SAS Diagnostic Start/End Command (64 bytes)
+ */
+struct sas_diag_start_end_req {
+	__le32	tag;
+	__le32	operation_phyid;
+	u32	reserved[13];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of SAS Diagnostic Execute Command
+ * use to describe MPI SAS Diagnostic Execute Command (64 bytes)
+ */
+struct sas_diag_execute_req{
+	__le32	tag;
+	__le32	cmdtype_cmddesc_phyid;
+	__le32	pat1_pat2;
+	__le32	threshold;
+	__le32	codepat_errmsk;
+	__le32	pmon;
+	__le32	pERF1CTL;
+	u32	reserved[8];
+} __attribute__((packed, aligned(4)));
+
+
+#define SAS_DIAG_PARAM_BYTES 24
+
+/*
+ * brief the data structure of Set Device State Command
+ * use to describe MPI Set Device State Command (64 bytes)
+ */
+struct set_dev_state_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	nds;
+	u32	reserved[12];
+} __attribute__((packed, aligned(4)));
+
+
+/*
+ * brief the data structure of SATA Start Command
+ * use to describe MPI SATA IO Start Command (64 bytes)
+ */
+
+struct sata_start_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	data_len;
+	__le32	ncqtag_atap_dir_m;
+	struct host_to_dev_fis	sata_fis;
+	u32	reserved1;
+	u32	reserved2;
+	u32	addr_low;
+	u32	addr_high;
+	__le32	len;
+	__le32	esgl;
+} __attribute__((packed, aligned(4)));
+
+/**
+ * brief the data structure of SSP INI TM Start Command
+ * use to describe MPI SSP INI TM Start Command (64 bytes)
+ */
+struct ssp_ini_tm_start_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	relate_tag;
+	__le32	tmf;
+	u8	lun[8];
+	__le32	ds_ads_m;
+	u32	reserved[8];
+} __attribute__((packed, aligned(4)));
+
+
+struct ssp_info_unit {
+	u8	lun[8];/* SCSI Logical Unit Number */
+	u8	reserved1;/* reserved */
+	u8	efb_prio_attr;
+	/* B7   : enabledFirstBurst */
+	/* B6-3 : taskPriority */
+	/* B2-0 : taskAttribute */
+	u8	reserved2;	/* reserved */
+	u8	additional_cdb_len;
+	/* B7-2 : additional_cdb_len */
+	/* B1-0 : reserved */
+	u8	cdb[16];/* The SCSI CDB up to 16 bytes length */
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of SSP INI IO Start Command
+ * use to describe MPI SSP INI IO Start Command (64 bytes)
+ */
+struct ssp_ini_io_start_req {
+	__le32	tag;
+	__le32	device_id;
+	__le32	data_len;
+	__le32	dir_m_tlr;
+	struct ssp_info_unit	ssp_iu;
+	__le32	addr_low;
+	__le32	addr_high;
+	__le32	len;
+	__le32	esgl;
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of Firmware download
+ * use to describe MPI FW DOWNLOAD Command (64 bytes)
+ */
+struct fw_flash_Update_req {
+	__le32	tag;
+	__le32	cur_image_offset;
+	__le32	cur_image_len;
+	__le32	total_image_len;
+	u32	reserved0[7];
+	__le32	sgl_addr_lo;
+	__le32	sgl_addr_hi;
+	__le32	len;
+	__le32	ext_reserved;
+} __attribute__((packed, aligned(4)));
+
+
+#define FWFLASH_IOMB_RESERVED_LEN 0x07
+/**
+ * brief the data structure of FW_FLASH_UPDATE Response
+ * use to describe MPI FW_FLASH_UPDATE Response (64 bytes)
+ *
+ */
+struct fw_flash_Update_resp {
+	dma_addr_t	tag;
+	__le32	status;
+	u32	reserved[13];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of Get NVM Data Command
+ * use to get data from NVM in HBA(64 bytes)
+ */
+struct get_nvm_data_req {
+	__le32	tag;
+	__le32	len_ir_vpdd;
+	__le32	vpd_offset;
+	u32	reserved[8];
+	__le32	resp_addr_lo;
+	__le32	resp_addr_hi;
+	__le32	resp_len;
+	u32	reserved1;
+} __attribute__((packed, aligned(4)));
+
+
+struct set_nvm_data_req {
+	__le32	tag;
+	__le32	len_ir_vpdd;
+	__le32	vpd_offset;
+	u32	reserved[8];
+	__le32	resp_addr_lo;
+	__le32	resp_addr_hi;
+	__le32	resp_len;
+	u32	reserved1;
+} __attribute__((packed, aligned(4)));
+
+
+#define TWI_DEVICE	0x0
+#define C_SEEPROM	0x1
+#define VPD_FLASH	0x4
+#define AAP1_RDUMP	0x5
+#define IOP_RDUMP	0x6
+#define EXPAN_ROM	0x7
+
+#define IPMode		0x80000000
+#define NVMD_TYPE	0x0000000F
+#define NVMD_STAT	0x0000FFFF
+#define NVMD_LEN	0xFF000000
+/**
+ * brief the data structure of Get NVMD Data Response
+ * use to describe MPI Get NVMD Data Response (64 bytes)
+ */
+struct get_nvm_data_resp {
+	__le32		tag;
+	__le32		ir_tda_bn_dps_das_nvm;
+	__le32		dlen_status;
+	__le32		nvm_data[12];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of SAS Diagnostic Start/End Response
+ * use to describe MPI SAS Diagnostic Start/End Response (64 bytes)
+ *
+ */
+struct sas_diag_start_end_resp {
+	__le32		tag;
+	__le32		status;
+	u32		reserved[13];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of SAS Diagnostic Execute Response
+ * use to describe MPI SAS Diagnostic Execute Response (64 bytes)
+ *
+ */
+struct sas_diag_execute_resp {
+	__le32		tag;
+	__le32		cmdtype_cmddesc_phyid;
+	__le32		Status;
+	__le32		ReportData;
+	u32		reserved[11];
+} __attribute__((packed, aligned(4)));
+
+
+/**
+ * brief the data structure of Set Device State Response
+ * use to describe MPI Set Device State Response (64 bytes)
+ *
+ */
+struct set_dev_state_resp {
+	__le32		tag;
+	__le32		status;
+	__le32		device_id;
+	__le32		pds_nds;
+	u32		reserved[11];
+} __attribute__((packed, aligned(4)));
+
+
+#define NDS_BITS 0x0F
+#define PDS_BITS 0xF0
+
+/*
+ * HW Events type
+ */
+
+#define HW_EVENT_RESET_START			0x01
+#define HW_EVENT_CHIP_RESET_COMPLETE		0x02
+#define HW_EVENT_PHY_STOP_STATUS		0x03
+#define HW_EVENT_SAS_PHY_UP			0x04
+#define HW_EVENT_SATA_PHY_UP			0x05
+#define HW_EVENT_SATA_SPINUP_HOLD		0x06
+#define HW_EVENT_PHY_DOWN			0x07
+#define HW_EVENT_PORT_INVALID			0x08
+#define HW_EVENT_BROADCAST_CHANGE		0x09
+#define HW_EVENT_PHY_ERROR			0x0A
+#define HW_EVENT_BROADCAST_SES			0x0B
+#define HW_EVENT_INBOUND_CRC_ERROR		0x0C
+#define HW_EVENT_HARD_RESET_RECEIVED		0x0D
+#define HW_EVENT_MALFUNCTION			0x0E
+#define HW_EVENT_ID_FRAME_TIMEOUT		0x0F
+#define HW_EVENT_BROADCAST_EXP			0x10
+#define HW_EVENT_PHY_START_STATUS		0x11
+#define HW_EVENT_LINK_ERR_INVALID_DWORD		0x12
+#define HW_EVENT_LINK_ERR_DISPARITY_ERROR	0x13
+#define HW_EVENT_LINK_ERR_CODE_VIOLATION	0x14
+#define HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH	0x15
+#define HW_EVENT_LINK_ERR_PHY_RESET_FAILED	0x16
+#define HW_EVENT_PORT_RECOVERY_TIMER_TMO	0x17
+#define HW_EVENT_PORT_RECOVER			0x18
+#define HW_EVENT_PORT_RESET_TIMER_TMO		0x19
+#define HW_EVENT_PORT_RESET_COMPLETE		0x20
+#define EVENT_BROADCAST_ASYNCH_EVENT		0x21
+
+/* port state */
+#define PORT_NOT_ESTABLISHED			0x00
+#define PORT_VALID				0x01
+#define PORT_LOSTCOMM				0x02
+#define PORT_IN_RESET				0x04
+#define PORT_INVALID				0x08
+
+/*
+ * SSP/SMP/SATA IO Completion Status values
+ */
+
+#define IO_SUCCESS				0x00
+#define IO_ABORTED				0x01
+#define IO_OVERFLOW				0x02
+#define IO_UNDERFLOW				0x03
+#define IO_FAILED				0x04
+#define IO_ABORT_RESET				0x05
+#define IO_NOT_VALID				0x06
+#define IO_NO_DEVICE				0x07
+#define IO_ILLEGAL_PARAMETER			0x08
+#define IO_LINK_FAILURE				0x09
+#define IO_PROG_ERROR				0x0A
+#define IO_EDC_IN_ERROR				0x0B
+#define IO_EDC_OUT_ERROR			0x0C
+#define IO_ERROR_HW_TIMEOUT			0x0D
+#define IO_XFER_ERROR_BREAK			0x0E
+#define IO_XFER_ERROR_PHY_NOT_READY		0x0F
+#define IO_OPEN_CNX_ERROR_PROTOCOL_NOT_SUPPORTED	0x10
+#define IO_OPEN_CNX_ERROR_ZONE_VIOLATION		0x11
+#define IO_OPEN_CNX_ERROR_BREAK				0x12
+#define IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS			0x13
+#define IO_OPEN_CNX_ERROR_BAD_DESTINATION		0x14
+#define IO_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED	0x15
+#define IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY		0x16
+#define IO_OPEN_CNX_ERROR_WRONG_DESTINATION		0x17
+#define IO_OPEN_CNX_ERROR_UNKNOWN_ERROR			0x18
+#define IO_XFER_ERROR_NAK_RECEIVED			0x19
+#define IO_XFER_ERROR_ACK_NAK_TIMEOUT			0x1A
+#define IO_XFER_ERROR_PEER_ABORTED			0x1B
+#define IO_XFER_ERROR_RX_FRAME				0x1C
+#define IO_XFER_ERROR_DMA				0x1D
+#define IO_XFER_ERROR_CREDIT_TIMEOUT			0x1E
+#define IO_XFER_ERROR_SATA_LINK_TIMEOUT			0x1F
+#define IO_XFER_ERROR_SATA				0x20
+#define IO_XFER_ERROR_ABORTED_DUE_TO_SRST		0x22
+#define IO_XFER_ERROR_REJECTED_NCQ_MODE			0x21
+#define IO_XFER_ERROR_ABORTED_NCQ_MODE			0x23
+#define IO_XFER_OPEN_RETRY_TIMEOUT			0x24
+#define IO_XFER_SMP_RESP_CONNECTION_ERROR		0x25
+#define IO_XFER_ERROR_UNEXPECTED_PHASE			0x26
+#define IO_XFER_ERROR_XFER_RDY_OVERRUN			0x27
+#define IO_XFER_ERROR_XFER_RDY_NOT_EXPECTED		0x28
+
+#define IO_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT		0x30
+#define IO_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NAK	0x31
+#define IO_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK	0x32
+
+#define IO_XFER_ERROR_OFFSET_MISMATCH			0x34
+#define IO_XFER_ERROR_XFER_ZERO_DATA_LEN		0x35
+#define IO_XFER_CMD_FRAME_ISSUED			0x36
+#define IO_ERROR_INTERNAL_SMP_RESOURCE			0x37
+#define IO_PORT_IN_RESET				0x38
+#define IO_DS_NON_OPERATIONAL				0x39
+#define IO_DS_IN_RECOVERY				0x3A
+#define IO_TM_TAG_NOT_FOUND				0x3B
+#define IO_XFER_PIO_SETUP_ERROR				0x3C
+#define IO_SSP_EXT_IU_ZERO_LEN_ERROR			0x3D
+#define IO_DS_IN_ERROR					0x3E
+#define IO_OPEN_CNX_ERROR_HW_RESOURCE_BUSY		0x3F
+#define IO_ABORT_IN_PROGRESS				0x40
+#define IO_ABORT_DELAYED				0x41
+#define IO_INVALID_LENGTH				0x42
+
+/* WARNING: This error code must always be the last number.
+ * If you add error code, modify this code also
+ * It is used as an index
+ */
+#define IO_ERROR_UNKNOWN_GENERIC			0x43
+
+/* MSGU CONFIGURATION  TABLE*/
+
+#define SPC_MSGU_CFG_TABLE_UPDATE		0x01/* Inbound doorbell bit0 */
+#define SPC_MSGU_CFG_TABLE_RESET		0x02/* Inbound doorbell bit1 */
+#define SPC_MSGU_CFG_TABLE_FREEZE		0x04/* Inbound doorbell bit2 */
+#define SPC_MSGU_CFG_TABLE_UNFREEZE		0x08/* Inbound doorbell bit4 */
+#define MSGU_IBDB_SET				0x04
+#define MSGU_HOST_INT_STATUS			0x08
+#define MSGU_HOST_INT_MASK			0x0C
+#define MSGU_IOPIB_INT_STATUS			0x18
+#define MSGU_IOPIB_INT_MASK			0x1C
+#define MSGU_IBDB_CLEAR				0x20/* RevB - Host not use */
+#define MSGU_MSGU_CONTROL			0x24
+#define MSGU_ODR				0x3C/* RevB */
+#define MSGU_ODCR				0x40/* RevB */
+#define MSGU_SCRATCH_PAD_0			0x44
+#define MSGU_SCRATCH_PAD_1			0x48
+#define MSGU_SCRATCH_PAD_2			0x4C
+#define MSGU_SCRATCH_PAD_3			0x50
+#define MSGU_HOST_SCRATCH_PAD_0			0x54
+#define MSGU_HOST_SCRATCH_PAD_1			0x58
+#define MSGU_HOST_SCRATCH_PAD_2			0x5C
+#define MSGU_HOST_SCRATCH_PAD_3			0x60
+#define MSGU_HOST_SCRATCH_PAD_4			0x64
+#define MSGU_HOST_SCRATCH_PAD_5			0x68
+#define MSGU_HOST_SCRATCH_PAD_6			0x6C
+#define MSGU_HOST_SCRATCH_PAD_7			0x70
+#define MSGU_ODMR				0x74/* RevB */
+
+/* bit definition for ODMR register */
+#define ODMR_MASK_ALL				0xFFFFFFFF/* mask all
+					interrupt vector */
+#define ODMR_CLEAR_ALL				0/* clear all
+					interrupt vector */
+/* bit definition for ODCR register */
+#define ODCR_CLEAR_ALL		0xFFFFFFFF   /* mask all
+					interrupt vector*/
+/* MSIX Interupts */
+#define MSIX_TABLE_OFFSET		0x2000
+#define MSIX_TABLE_ELEMENT_SIZE		0x10
+#define MSIX_INTERRUPT_CONTROL_OFFSET	0xC
+#define MSIX_TABLE_BASE	  (MSIX_TABLE_OFFSET + MSIX_INTERRUPT_CONTROL_OFFSET)
+#define MSIX_INTERRUPT_DISABLE		0x1
+#define MSIX_INTERRUPT_ENABLE		0x0
+
+
+/* state definition for Scratch Pad1 register */
+#define SCRATCH_PAD1_POR		0x00  /* power on reset state */
+#define SCRATCH_PAD1_SFR		0x01  /* soft reset state */
+#define SCRATCH_PAD1_ERR		0x02  /* error state */
+#define SCRATCH_PAD1_RDY		0x03  /* ready state */
+#define SCRATCH_PAD1_RST		0x04  /* soft reset toggle flag */
+#define SCRATCH_PAD1_AAP1RDY_RST	0x08  /* AAP1 ready for soft reset */
+#define SCRATCH_PAD1_STATE_MASK		0xFFFFFFF0   /* ScratchPad1
+ Mask, bit1-0 State, bit2 Soft Reset, bit3 FW RDY for Soft Reset */
+#define SCRATCH_PAD1_RESERVED		0x000003F8   /* Scratch Pad1
+ Reserved bit 3 to 9 */
+
+ /* state definition for Scratch Pad2 register */
+#define SCRATCH_PAD2_POR		0x00  /* power on state */
+#define SCRATCH_PAD2_SFR		0x01  /* soft reset state */
+#define SCRATCH_PAD2_ERR		0x02  /* error state */
+#define SCRATCH_PAD2_RDY		0x03  /* ready state */
+#define SCRATCH_PAD2_FWRDY_RST		0x04  /* FW ready for soft reset flag*/
+#define SCRATCH_PAD2_IOPRDY_RST		0x08  /* IOP ready for soft reset */
+#define SCRATCH_PAD2_STATE_MASK		0xFFFFFFF4 /* ScratchPad 2
+ Mask, bit1-0 State */
+#define SCRATCH_PAD2_RESERVED		0x000003FC   /* Scratch Pad1
+ Reserved bit 2 to 9 */
+
+#define SCRATCH_PAD_ERROR_MASK		0xFFFFFC00   /* Error mask bits */
+#define SCRATCH_PAD_STATE_MASK		0x00000003   /* State Mask bits */
+
+/* main configuration offset - byte offset */
+#define MAIN_SIGNATURE_OFFSET		0x00/* DWORD 0x00 */
+#define MAIN_INTERFACE_REVISION		0x04/* DWORD 0x01 */
+#define MAIN_FW_REVISION		0x08/* DWORD 0x02 */
+#define MAIN_MAX_OUTSTANDING_IO_OFFSET	0x0C/* DWORD 0x03 */
+#define MAIN_MAX_SGL_OFFSET		0x10/* DWORD 0x04 */
+#define MAIN_CNTRL_CAP_OFFSET		0x14/* DWORD 0x05 */
+#define MAIN_GST_OFFSET			0x18/* DWORD 0x06 */
+#define MAIN_IBQ_OFFSET			0x1C/* DWORD 0x07 */
+#define MAIN_OBQ_OFFSET			0x20/* DWORD 0x08 */
+#define MAIN_IQNPPD_HPPD_OFFSET		0x24/* DWORD 0x09 */
+#define MAIN_OB_HW_EVENT_PID03_OFFSET	0x28/* DWORD 0x0A */
+#define MAIN_OB_HW_EVENT_PID47_OFFSET	0x2C/* DWORD 0x0B */
+#define MAIN_OB_NCQ_EVENT_PID03_OFFSET	0x30/* DWORD 0x0C */
+#define MAIN_OB_NCQ_EVENT_PID47_OFFSET	0x34/* DWORD 0x0D */
+#define MAIN_TITNX_EVENT_PID03_OFFSET	0x38/* DWORD 0x0E */
+#define MAIN_TITNX_EVENT_PID47_OFFSET	0x3C/* DWORD 0x0F */
+#define MAIN_OB_SSP_EVENT_PID03_OFFSET	0x40/* DWORD 0x10 */
+#define MAIN_OB_SSP_EVENT_PID47_OFFSET	0x44/* DWORD 0x11 */
+#define MAIN_OB_SMP_EVENT_PID03_OFFSET	0x48/* DWORD 0x12 */
+#define MAIN_OB_SMP_EVENT_PID47_OFFSET	0x4C/* DWORD 0x13 */
+#define MAIN_EVENT_LOG_ADDR_HI		0x50/* DWORD 0x14 */
+#define MAIN_EVENT_LOG_ADDR_LO		0x54/* DWORD 0x15 */
+#define MAIN_EVENT_LOG_BUFF_SIZE	0x58/* DWORD 0x16 */
+#define MAIN_EVENT_LOG_OPTION		0x5C/* DWORD 0x17 */
+#define MAIN_IOP_EVENT_LOG_ADDR_HI	0x60/* DWORD 0x18 */
+#define MAIN_IOP_EVENT_LOG_ADDR_LO	0x64/* DWORD 0x19 */
+#define MAIN_IOP_EVENT_LOG_BUFF_SIZE	0x68/* DWORD 0x1A */
+#define MAIN_IOP_EVENT_LOG_OPTION	0x6C/* DWORD 0x1B */
+#define MAIN_FATAL_ERROR_INTERRUPT	0x70/* DWORD 0x1C */
+#define MAIN_FATAL_ERROR_RDUMP0_OFFSET	0x74/* DWORD 0x1D */
+#define MAIN_FATAL_ERROR_RDUMP0_LENGTH	0x78/* DWORD 0x1E */
+#define MAIN_FATAL_ERROR_RDUMP1_OFFSET	0x7C/* DWORD 0x1F */
+#define MAIN_FATAL_ERROR_RDUMP1_LENGTH	0x80/* DWORD 0x20 */
+#define MAIN_HDA_FLAGS_OFFSET		0x84/* DWORD 0x21 */
+#define MAIN_ANALOG_SETUP_OFFSET	0x88/* DWORD 0x22 */
+
+/* Gereral Status Table offset - byte offset */
+#define GST_GSTLEN_MPIS_OFFSET		0x00
+#define GST_IQ_FREEZE_STATE0_OFFSET	0x04
+#define GST_IQ_FREEZE_STATE1_OFFSET	0x08
+#define GST_MSGUTCNT_OFFSET		0x0C
+#define GST_IOPTCNT_OFFSET		0x10
+#define GST_PHYSTATE_OFFSET		0x18
+#define GST_PHYSTATE0_OFFSET		0x18
+#define GST_PHYSTATE1_OFFSET		0x1C
+#define GST_PHYSTATE2_OFFSET		0x20
+#define GST_PHYSTATE3_OFFSET		0x24
+#define GST_PHYSTATE4_OFFSET		0x28
+#define GST_PHYSTATE5_OFFSET		0x2C
+#define GST_PHYSTATE6_OFFSET		0x30
+#define GST_PHYSTATE7_OFFSET		0x34
+#define GST_RERRINFO_OFFSET		0x44
+
+/* General Status Table - MPI state */
+#define GST_MPI_STATE_UNINIT		0x00
+#define GST_MPI_STATE_INIT		0x01
+#define GST_MPI_STATE_TERMINATION	0x02
+#define GST_MPI_STATE_ERROR		0x03
+#define GST_MPI_STATE_MASK		0x07
+
+#define MBIC_NMI_ENABLE_VPE0_IOP	0x000418
+#define MBIC_NMI_ENABLE_VPE0_AAP1	0x000418
+/* PCIE registers - BAR2(0x18), BAR1(win) 0x010000 */
+#define PCIE_EVENT_INTERRUPT_ENABLE	0x003040
+#define PCIE_EVENT_INTERRUPT		0x003044
+#define PCIE_ERROR_INTERRUPT_ENABLE	0x003048
+#define PCIE_ERROR_INTERRUPT		0x00304C
+/* signature defintion for host scratch pad0 register */
+#define SPC_SOFT_RESET_SIGNATURE	0x252acbcd
+/* Signature for Soft Reset */
+
+/* SPC Reset register - BAR4(0x20), BAR2(win) (need dynamic mapping) */
+#define SPC_REG_RESET			0x000000/* reset register */
+
+/* bit difination for SPC_RESET register */
+#define   SPC_REG_RESET_OSSP		0x00000001
+#define   SPC_REG_RESET_RAAE		0x00000002
+#define   SPC_REG_RESET_PCS_SPBC	0x00000004
+#define   SPC_REG_RESET_PCS_IOP_SS	0x00000008
+#define   SPC_REG_RESET_PCS_AAP1_SS	0x00000010
+#define   SPC_REG_RESET_PCS_AAP2_SS	0x00000020
+#define   SPC_REG_RESET_PCS_LM		0x00000040
+#define   SPC_REG_RESET_PCS		0x00000080
+#define   SPC_REG_RESET_GSM		0x00000100
+#define   SPC_REG_RESET_DDR2		0x00010000
+#define   SPC_REG_RESET_BDMA_CORE	0x00020000
+#define   SPC_REG_RESET_BDMA_SXCBI	0x00040000
+#define   SPC_REG_RESET_PCIE_AL_SXCBI	0x00080000
+#define   SPC_REG_RESET_PCIE_PWR	0x00100000
+#define   SPC_REG_RESET_PCIE_SFT	0x00200000
+#define   SPC_REG_RESET_PCS_SXCBI	0x00400000
+#define   SPC_REG_RESET_LMS_SXCBI	0x00800000
+#define   SPC_REG_RESET_PMIC_SXCBI	0x01000000
+#define   SPC_REG_RESET_PMIC_CORE	0x02000000
+#define   SPC_REG_RESET_PCIE_PC_SXCBI	0x04000000
+#define   SPC_REG_RESET_DEVICE		0x80000000
+
+/* registers for BAR Shifting - BAR2(0x18), BAR1(win) */
+#define SPC_IBW_AXI_TRANSLATION_LOW	0x003258
+
+#define MBIC_AAP1_ADDR_BASE		0x060000
+#define MBIC_IOP_ADDR_BASE		0x070000
+#define GSM_ADDR_BASE			0x0700000
+/* Dynamic map through Bar4 - 0x00700000 */
+#define GSM_CONFIG_RESET		0x00000000
+#define RAM_ECC_DB_ERR			0x00000018
+#define GSM_READ_ADDR_PARITY_INDIC	0x00000058
+#define GSM_WRITE_ADDR_PARITY_INDIC	0x00000060
+#define GSM_WRITE_DATA_PARITY_INDIC	0x00000068
+#define GSM_READ_ADDR_PARITY_CHECK	0x00000038
+#define GSM_WRITE_ADDR_PARITY_CHECK	0x00000040
+#define GSM_WRITE_DATA_PARITY_CHECK	0x00000048
+
+#define RB6_ACCESS_REG			0x6A0000
+#define HDAC_EXEC_CMD			0x0002
+#define HDA_C_PA			0xcb
+#define HDA_SEQ_ID_BITS			0x00ff0000
+#define HDA_GSM_OFFSET_BITS		0x00FFFFFF
+#define MBIC_AAP1_ADDR_BASE		0x060000
+#define MBIC_IOP_ADDR_BASE		0x070000
+#define GSM_ADDR_BASE			0x0700000
+#define SPC_TOP_LEVEL_ADDR_BASE		0x000000
+#define GSM_CONFIG_RESET_VALUE          0x00003b00
+#define GPIO_ADDR_BASE                  0x00090000
+#define GPIO_GPIO_0_0UTPUT_CTL_OFFSET   0x0000010c
+
+/* RB6 offset */
+#define SPC_RB6_OFFSET			0x80C0
+/* Magic number of  soft reset for RB6 */
+#define RB6_MAGIC_NUMBER_RST		0x1234
+
+/* Device Register status */
+#define DEVREG_SUCCESS					0x00
+#define DEVREG_FAILURE_OUT_OF_RESOURCE			0x01
+#define DEVREG_FAILURE_DEVICE_ALREADY_REGISTERED	0x02
+#define DEVREG_FAILURE_INVALID_PHY_ID			0x03
+#define DEVREG_FAILURE_PHY_ID_ALREADY_REGISTERED	0x04
+#define DEVREG_FAILURE_PORT_ID_OUT_OF_RANGE		0x05
+#define DEVREG_FAILURE_PORT_NOT_VALID_STATE		0x06
+#define DEVREG_FAILURE_DEVICE_TYPE_NOT_VALID		0x07
+
+#endif
+
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
new file mode 100644
index 000000000000..811b5d36d5f0
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -0,0 +1,888 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#include "pm8001_sas.h"
+#include "pm8001_chips.h"
+
+static struct scsi_transport_template *pm8001_stt;
+
+static const struct pm8001_chip_info pm8001_chips[] = {
+	[chip_8001] = {  8, &pm8001_8001_dispatch,},
+};
+static int pm8001_id;
+
+LIST_HEAD(hba_list);
+
+/**
+ * The main structure which LLDD must register for scsi core.
+ */
+static struct scsi_host_template pm8001_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= pm8001_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.scan_finished		= pm8001_scan_finished,
+	.scan_start		= pm8001_scan_start,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.eh_device_reset_handler = sas_eh_device_reset_handler,
+	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
+	.slave_alloc		= pm8001_slave_alloc,
+	.target_destroy		= sas_target_destroy,
+	.ioctl			= sas_ioctl,
+	.shost_attrs		= pm8001_host_attrs,
+};
+
+/**
+ * Sas layer call this function to execute specific task.
+ */
+static struct sas_domain_function_template pm8001_transport_ops = {
+	.lldd_dev_found		= pm8001_dev_found,
+	.lldd_dev_gone		= pm8001_dev_gone,
+
+	.lldd_execute_task	= pm8001_queue_command,
+	.lldd_control_phy	= pm8001_phy_control,
+
+	.lldd_abort_task	= pm8001_abort_task,
+	.lldd_abort_task_set	= pm8001_abort_task_set,
+	.lldd_clear_aca		= pm8001_clear_aca,
+	.lldd_clear_task_set	= pm8001_clear_task_set,
+	.lldd_I_T_nexus_reset   = pm8001_I_T_nexus_reset,
+	.lldd_lu_reset		= pm8001_lu_reset,
+	.lldd_query_task	= pm8001_query_task,
+};
+
+/**
+ *pm8001_phy_init - initiate our adapter phys
+ *@pm8001_ha: our hba structure.
+ *@phy_id: phy id.
+ */
+static void __devinit pm8001_phy_init(struct pm8001_hba_info *pm8001_ha,
+	int phy_id)
+{
+	struct pm8001_phy *phy = &pm8001_ha->phy[phy_id];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	phy->phy_state = 0;
+	phy->pm8001_ha = pm8001_ha;
+	sas_phy->enabled = (phy_id < pm8001_ha->chip->n_phy) ? 1 : 0;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTOCOL_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+	sas_phy->id = phy_id;
+	sas_phy->sas_addr = &pm8001_ha->sas_addr[0];
+	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
+	sas_phy->ha = (struct sas_ha_struct *)pm8001_ha->shost->hostdata;
+	sas_phy->lldd_phy = phy;
+}
+
+/**
+ *pm8001_free - free hba
+ *@pm8001_ha:	our hba structure.
+ *
+ */
+static void pm8001_free(struct pm8001_hba_info *pm8001_ha)
+{
+	int i;
+	struct pm8001_wq *wq;
+
+	if (!pm8001_ha)
+		return;
+
+	for (i = 0; i < USI_MAX_MEMCNT; i++) {
+		if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) {
+			pci_free_consistent(pm8001_ha->pdev,
+				pm8001_ha->memoryMap.region[i].element_size,
+				pm8001_ha->memoryMap.region[i].virt_ptr,
+				pm8001_ha->memoryMap.region[i].phys_addr);
+			}
+	}
+	PM8001_CHIP_DISP->chip_iounmap(pm8001_ha);
+	if (pm8001_ha->shost)
+		scsi_host_put(pm8001_ha->shost);
+	list_for_each_entry(wq, &pm8001_ha->wq_list, entry)
+		cancel_delayed_work(&wq->work_q);
+	kfree(pm8001_ha->tags);
+	kfree(pm8001_ha);
+}
+
+#ifdef PM8001_USE_TASKLET
+static void pm8001_tasklet(unsigned long opaque)
+{
+	struct pm8001_hba_info *pm8001_ha;
+	pm8001_ha = (struct pm8001_hba_info *)opaque;;
+	if (unlikely(!pm8001_ha))
+		BUG_ON(1);
+	PM8001_CHIP_DISP->isr(pm8001_ha);
+}
+#endif
+
+
+ /**
+  * pm8001_interrupt - when HBA originate a interrupt,we should invoke this
+  * dispatcher to handle each case.
+  * @irq: irq number.
+  * @opaque: the passed general host adapter struct
+  */
+static irqreturn_t pm8001_interrupt(int irq, void *opaque)
+{
+	struct pm8001_hba_info *pm8001_ha;
+	irqreturn_t ret = IRQ_HANDLED;
+	struct sas_ha_struct *sha = opaque;
+	pm8001_ha = sha->lldd_ha;
+	if (unlikely(!pm8001_ha))
+		return IRQ_NONE;
+	if (!PM8001_CHIP_DISP->is_our_interupt(pm8001_ha))
+		return IRQ_NONE;
+#ifdef PM8001_USE_TASKLET
+	tasklet_schedule(&pm8001_ha->tasklet);
+#else
+	ret = PM8001_CHIP_DISP->isr(pm8001_ha);
+#endif
+	return ret;
+}
+
+/**
+ * pm8001_alloc - initiate our hba structure and 6 DMAs area.
+ * @pm8001_ha:our hba structure.
+ *
+ */
+static int __devinit pm8001_alloc(struct pm8001_hba_info *pm8001_ha)
+{
+	int i;
+	spin_lock_init(&pm8001_ha->lock);
+	for (i = 0; i < pm8001_ha->chip->n_phy; i++)
+		pm8001_phy_init(pm8001_ha, i);
+
+	pm8001_ha->tags = kmalloc(sizeof(*pm8001_ha->tags)*PM8001_MAX_DEVICES,
+		GFP_KERNEL);
+
+	/* MPI Memory region 1 for AAP Event Log for fw */
+	pm8001_ha->memoryMap.region[AAP1].num_elements = 1;
+	pm8001_ha->memoryMap.region[AAP1].element_size = PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->memoryMap.region[AAP1].total_len = PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->memoryMap.region[AAP1].alignment = 32;
+
+	/* MPI Memory region 2 for IOP Event Log for fw */
+	pm8001_ha->memoryMap.region[IOP].num_elements = 1;
+	pm8001_ha->memoryMap.region[IOP].element_size = PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->memoryMap.region[IOP].total_len = PM8001_EVENT_LOG_SIZE;
+	pm8001_ha->memoryMap.region[IOP].alignment = 32;
+
+	/* MPI Memory region 3 for consumer Index of inbound queues */
+	pm8001_ha->memoryMap.region[CI].num_elements = 1;
+	pm8001_ha->memoryMap.region[CI].element_size = 4;
+	pm8001_ha->memoryMap.region[CI].total_len = 4;
+	pm8001_ha->memoryMap.region[CI].alignment = 4;
+
+	/* MPI Memory region 4 for producer Index of outbound queues */
+	pm8001_ha->memoryMap.region[PI].num_elements = 1;
+	pm8001_ha->memoryMap.region[PI].element_size = 4;
+	pm8001_ha->memoryMap.region[PI].total_len = 4;
+	pm8001_ha->memoryMap.region[PI].alignment = 4;
+
+	/* MPI Memory region 5 inbound queues */
+	pm8001_ha->memoryMap.region[IB].num_elements = 256;
+	pm8001_ha->memoryMap.region[IB].element_size = 64;
+	pm8001_ha->memoryMap.region[IB].total_len = 256 * 64;
+	pm8001_ha->memoryMap.region[IB].alignment = 64;
+
+	/* MPI Memory region 6 inbound queues */
+	pm8001_ha->memoryMap.region[OB].num_elements = 256;
+	pm8001_ha->memoryMap.region[OB].element_size = 64;
+	pm8001_ha->memoryMap.region[OB].total_len = 256 * 64;
+	pm8001_ha->memoryMap.region[OB].alignment = 64;
+
+	/* Memory region write DMA*/
+	pm8001_ha->memoryMap.region[NVMD].num_elements = 1;
+	pm8001_ha->memoryMap.region[NVMD].element_size = 4096;
+	pm8001_ha->memoryMap.region[NVMD].total_len = 4096;
+	/* Memory region for devices*/
+	pm8001_ha->memoryMap.region[DEV_MEM].num_elements = 1;
+	pm8001_ha->memoryMap.region[DEV_MEM].element_size = PM8001_MAX_DEVICES *
+		sizeof(struct pm8001_device);
+	pm8001_ha->memoryMap.region[DEV_MEM].total_len = PM8001_MAX_DEVICES *
+		sizeof(struct pm8001_device);
+
+	/* Memory region for ccb_info*/
+	pm8001_ha->memoryMap.region[CCB_MEM].num_elements = 1;
+	pm8001_ha->memoryMap.region[CCB_MEM].element_size = PM8001_MAX_CCB *
+		sizeof(struct pm8001_ccb_info);
+	pm8001_ha->memoryMap.region[CCB_MEM].total_len = PM8001_MAX_CCB *
+		sizeof(struct pm8001_ccb_info);
+
+	for (i = 0; i < USI_MAX_MEMCNT; i++) {
+		if (pm8001_mem_alloc(pm8001_ha->pdev,
+			&pm8001_ha->memoryMap.region[i].virt_ptr,
+			&pm8001_ha->memoryMap.region[i].phys_addr,
+			&pm8001_ha->memoryMap.region[i].phys_addr_hi,
+			&pm8001_ha->memoryMap.region[i].phys_addr_lo,
+			pm8001_ha->memoryMap.region[i].total_len,
+			pm8001_ha->memoryMap.region[i].alignment) != 0) {
+				PM8001_FAIL_DBG(pm8001_ha,
+					pm8001_printk("Mem%d alloc failed\n",
+					i));
+				goto err_out;
+		}
+	}
+
+	pm8001_ha->devices = pm8001_ha->memoryMap.region[DEV_MEM].virt_ptr;
+	for (i = 0; i < PM8001_MAX_DEVICES; i++) {
+		pm8001_ha->devices[i].dev_type = NO_DEVICE;
+		pm8001_ha->devices[i].id = i;
+		pm8001_ha->devices[i].device_id = PM8001_MAX_DEVICES;
+		pm8001_ha->devices[i].running_req = 0;
+	}
+	pm8001_ha->ccb_info = pm8001_ha->memoryMap.region[CCB_MEM].virt_ptr;
+	for (i = 0; i < PM8001_MAX_CCB; i++) {
+		pm8001_ha->ccb_info[i].ccb_dma_handle =
+			pm8001_ha->memoryMap.region[CCB_MEM].phys_addr +
+			i * sizeof(struct pm8001_ccb_info);
+		++pm8001_ha->tags_num;
+	}
+	pm8001_ha->flags = PM8001F_INIT_TIME;
+	/* Initialize tags */
+	pm8001_tag_init(pm8001_ha);
+	return 0;
+err_out:
+	return 1;
+}
+
+/**
+ * pm8001_ioremap - remap the pci high physical address to kernal virtual
+ * address so that we can access them.
+ * @pm8001_ha:our hba structure.
+ */
+static int pm8001_ioremap(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 bar;
+	u32 logicalBar = 0;
+	struct pci_dev *pdev;
+
+	pdev = pm8001_ha->pdev;
+	/* map pci mem (PMC pci base 0-3)*/
+	for (bar = 0; bar < 6; bar++) {
+		/*
+		** logical BARs for SPC:
+		** bar 0 and 1 - logical BAR0
+		** bar 2 and 3 - logical BAR1
+		** bar4 - logical BAR2
+		** bar5 - logical BAR3
+		** Skip the appropriate assignments:
+		*/
+		if ((bar == 1) || (bar == 3))
+			continue;
+		if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+			pm8001_ha->io_mem[logicalBar].membase =
+				pci_resource_start(pdev, bar);
+			pm8001_ha->io_mem[logicalBar].membase &=
+				(u32)PCI_BASE_ADDRESS_MEM_MASK;
+			pm8001_ha->io_mem[logicalBar].memsize =
+				pci_resource_len(pdev, bar);
+			pm8001_ha->io_mem[logicalBar].memvirtaddr =
+				ioremap(pm8001_ha->io_mem[logicalBar].membase,
+				pm8001_ha->io_mem[logicalBar].memsize);
+			PM8001_INIT_DBG(pm8001_ha,
+				pm8001_printk("PCI: bar %d, logicalBar %d "
+				"virt_addr=%lx,len=%d\n", bar, logicalBar,
+				(unsigned long)
+				pm8001_ha->io_mem[logicalBar].memvirtaddr,
+				pm8001_ha->io_mem[logicalBar].memsize));
+		} else {
+			pm8001_ha->io_mem[logicalBar].membase	= 0;
+			pm8001_ha->io_mem[logicalBar].memsize	= 0;
+			pm8001_ha->io_mem[logicalBar].memvirtaddr = 0;
+		}
+		logicalBar++;
+	}
+	return 0;
+}
+
+/**
+ * pm8001_pci_alloc - initialize our ha card structure
+ * @pdev: pci device.
+ * @ent: ent
+ * @shost: scsi host struct which has been initialized before.
+ */
+static struct pm8001_hba_info *__devinit
+pm8001_pci_alloc(struct pci_dev *pdev, u32 chip_id, struct Scsi_Host *shost)
+{
+	struct pm8001_hba_info *pm8001_ha;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+
+	pm8001_ha = sha->lldd_ha;
+	if (!pm8001_ha)
+		return NULL;
+
+	pm8001_ha->pdev = pdev;
+	pm8001_ha->dev = &pdev->dev;
+	pm8001_ha->chip_id = chip_id;
+	pm8001_ha->chip = &pm8001_chips[pm8001_ha->chip_id];
+	pm8001_ha->irq = pdev->irq;
+	pm8001_ha->sas = sha;
+	pm8001_ha->shost = shost;
+	pm8001_ha->id = pm8001_id++;
+	INIT_LIST_HEAD(&pm8001_ha->wq_list);
+	pm8001_ha->logging_level = 0x01;
+	sprintf(pm8001_ha->name, "%s%d", DRV_NAME, pm8001_ha->id);
+#ifdef PM8001_USE_TASKLET
+	tasklet_init(&pm8001_ha->tasklet, pm8001_tasklet,
+		(unsigned long)pm8001_ha);
+#endif
+	pm8001_ioremap(pm8001_ha);
+	if (!pm8001_alloc(pm8001_ha))
+		return pm8001_ha;
+	pm8001_free(pm8001_ha);
+	return NULL;
+}
+
+/**
+ * pci_go_44 - pm8001 specified, its DMA is 44 bit rather than 64 bit
+ * @pdev: pci device.
+ */
+static int pci_go_44(struct pci_dev *pdev)
+{
+	int rc;
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(44))) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(44));
+		if (rc) {
+			rc = pci_set_consistent_dma_mask(pdev,
+				DMA_BIT_MASK(32));
+			if (rc) {
+				dev_printk(KERN_ERR, &pdev->dev,
+					"44-bit DMA enable failed\n");
+				return rc;
+			}
+		}
+	} else {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				"32-bit DMA enable failed\n");
+			return rc;
+		}
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				"32-bit consistent DMA enable failed\n");
+			return rc;
+		}
+	}
+	return rc;
+}
+
+/**
+ * pm8001_prep_sas_ha_init - allocate memory in general hba struct && init them.
+ * @shost: scsi host which has been allocated outside.
+ * @chip_info: our ha struct.
+ */
+static int __devinit pm8001_prep_sas_ha_init(struct Scsi_Host * shost,
+	const struct pm8001_chip_info *chip_info)
+{
+	int phy_nr, port_nr;
+	struct asd_sas_phy **arr_phy;
+	struct asd_sas_port **arr_port;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	phy_nr = chip_info->n_phy;
+	port_nr = phy_nr;
+	memset(sha, 0x00, sizeof(*sha));
+	arr_phy = kcalloc(phy_nr, sizeof(void *), GFP_KERNEL);
+	if (!arr_phy)
+		goto exit;
+	arr_port = kcalloc(port_nr, sizeof(void *), GFP_KERNEL);
+	if (!arr_port)
+		goto exit_free2;
+
+	sha->sas_phy = arr_phy;
+	sha->sas_port = arr_port;
+	sha->lldd_ha = kzalloc(sizeof(struct pm8001_hba_info), GFP_KERNEL);
+	if (!sha->lldd_ha)
+		goto exit_free1;
+
+	shost->transportt = pm8001_stt;
+	shost->max_id = PM8001_MAX_DEVICES;
+	shost->max_lun = 8;
+	shost->max_channel = 0;
+	shost->unique_id = pm8001_id;
+	shost->max_cmd_len = 16;
+	shost->can_queue = PM8001_CAN_QUEUE;
+	shost->cmd_per_lun = 32;
+	return 0;
+exit_free1:
+	kfree(arr_port);
+exit_free2:
+	kfree(arr_phy);
+exit:
+	return -1;
+}
+
+/**
+ * pm8001_post_sas_ha_init - initialize general hba struct defined in libsas
+ * @shost: scsi host which has been allocated outside
+ * @chip_info: our ha struct.
+ */
+static void  __devinit pm8001_post_sas_ha_init(struct Scsi_Host *shost,
+	const struct pm8001_chip_info *chip_info)
+{
+	int i = 0;
+	struct pm8001_hba_info *pm8001_ha;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	pm8001_ha = sha->lldd_ha;
+	for (i = 0; i < chip_info->n_phy; i++) {
+		sha->sas_phy[i] = &pm8001_ha->phy[i].sas_phy;
+		sha->sas_port[i] = &pm8001_ha->port[i].sas_port;
+	}
+	sha->sas_ha_name = DRV_NAME;
+	sha->dev = pm8001_ha->dev;
+
+	sha->lldd_module = THIS_MODULE;
+	sha->sas_addr = &pm8001_ha->sas_addr[0];
+	sha->num_phys = chip_info->n_phy;
+	sha->lldd_max_execute_num = 1;
+	sha->lldd_queue_size = PM8001_CAN_QUEUE;
+	sha->core.shost = shost;
+}
+
+/**
+ * pm8001_init_sas_add - initialize sas address
+ * @chip_info: our ha struct.
+ *
+ * Currently we just set the fixed SAS address to our HBA,for manufacture,
+ * it should read from the EEPROM
+ */
+static void pm8001_init_sas_add(struct pm8001_hba_info *pm8001_ha)
+{
+	u8 i;
+#ifdef PM8001_READ_VPD
+	DECLARE_COMPLETION_ONSTACK(completion);
+	pm8001_ha->nvmd_completion = &completion;
+	PM8001_CHIP_DISP->get_nvmd_req(pm8001_ha, 0, 0);
+	wait_for_completion(&completion);
+	for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
+		memcpy(&pm8001_ha->phy[i].dev_sas_addr, pm8001_ha->sas_addr,
+			SAS_ADDR_SIZE);
+		PM8001_INIT_DBG(pm8001_ha,
+			pm8001_printk("phy %d sas_addr = %x \n", i,
+			(u64)pm8001_ha->phy[i].dev_sas_addr));
+	}
+#else
+	for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
+		pm8001_ha->phy[i].dev_sas_addr = 0x500e004010000004ULL;
+		pm8001_ha->phy[i].dev_sas_addr =
+			cpu_to_be64((u64)
+				(*(u64 *)&pm8001_ha->phy[i].dev_sas_addr));
+	}
+	memcpy(pm8001_ha->sas_addr, &pm8001_ha->phy[0].dev_sas_addr,
+		SAS_ADDR_SIZE);
+#endif
+}
+
+#ifdef PM8001_USE_MSIX
+/**
+ * pm8001_setup_msix - enable MSI-X interrupt
+ * @chip_info: our ha struct.
+ * @irq_handler: irq_handler
+ */
+static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha,
+	irq_handler_t irq_handler)
+{
+	u32 i = 0, j = 0;
+	u32 number_of_intr = 1;
+	int flag = 0;
+	u32 max_entry;
+	int rc;
+	max_entry = sizeof(pm8001_ha->msix_entries) /
+		sizeof(pm8001_ha->msix_entries[0]);
+	flag |= IRQF_DISABLED;
+	for (i = 0; i < max_entry ; i++)
+		pm8001_ha->msix_entries[i].entry = i;
+	rc = pci_enable_msix(pm8001_ha->pdev, pm8001_ha->msix_entries,
+		number_of_intr);
+	pm8001_ha->number_of_intr = number_of_intr;
+	if (!rc) {
+		for (i = 0; i < number_of_intr; i++) {
+			if (request_irq(pm8001_ha->msix_entries[i].vector,
+				irq_handler, flag, DRV_NAME,
+				SHOST_TO_SAS_HA(pm8001_ha->shost))) {
+				for (j = 0; j < i; j++)
+					free_irq(
+					pm8001_ha->msix_entries[j].vector,
+					SHOST_TO_SAS_HA(pm8001_ha->shost));
+				pci_disable_msix(pm8001_ha->pdev);
+				break;
+			}
+		}
+	}
+	return rc;
+}
+#endif
+
+/**
+ * pm8001_request_irq - register interrupt
+ * @chip_info: our ha struct.
+ */
+static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha)
+{
+	struct pci_dev *pdev;
+	irq_handler_t irq_handler = pm8001_interrupt;
+	u32 rc;
+
+	pdev = pm8001_ha->pdev;
+
+#ifdef PM8001_USE_MSIX
+	if (pci_find_capability(pdev, PCI_CAP_ID_MSIX))
+		return pm8001_setup_msix(pm8001_ha, irq_handler);
+	else
+		goto intx;
+#endif
+
+intx:
+	/* intialize the INT-X interrupt */
+	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME,
+		SHOST_TO_SAS_HA(pm8001_ha->shost));
+	return rc;
+}
+
+/**
+ * pm8001_pci_probe - probe supported device
+ * @pdev: pci device which kernel has been prepared for.
+ * @ent: pci device id
+ *
+ * This function is the main initialization function, when register a new
+ * pci driver it is invoked, all struct an hardware initilization should be done
+ * here, also, register interrupt
+ */
+static int __devinit pm8001_pci_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	unsigned int rc;
+	u32	pci_reg;
+	struct pm8001_hba_info *pm8001_ha;
+	struct Scsi_Host *shost = NULL;
+	const struct pm8001_chip_info *chip;
+
+	dev_printk(KERN_INFO, &pdev->dev,
+		"pm8001: driver version %s\n", DRV_VERSION);
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_out_enable;
+	pci_set_master(pdev);
+	/*
+	 * Enable pci slot busmaster by setting pci command register.
+	 * This is required by FW for Cyclone card.
+	 */
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_reg);
+	pci_reg |= 0x157;
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_reg);
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out_disable;
+	rc = pci_go_44(pdev);
+	if (rc)
+		goto err_out_regions;
+
+	shost = scsi_host_alloc(&pm8001_sht, sizeof(void *));
+	if (!shost) {
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+	chip = &pm8001_chips[ent->driver_data];
+	SHOST_TO_SAS_HA(shost) =
+		kcalloc(1, sizeof(struct sas_ha_struct), GFP_KERNEL);
+	if (!SHOST_TO_SAS_HA(shost)) {
+		rc = -ENOMEM;
+		goto err_out_free_host;
+	}
+
+	rc = pm8001_prep_sas_ha_init(shost, chip);
+	if (rc) {
+		rc = -ENOMEM;
+		goto err_out_free;
+	}
+	pci_set_drvdata(pdev, SHOST_TO_SAS_HA(shost));
+	pm8001_ha = pm8001_pci_alloc(pdev, chip_8001, shost);
+	if (!pm8001_ha) {
+		rc = -ENOMEM;
+		goto err_out_free;
+	}
+	list_add_tail(&pm8001_ha->list, &hba_list);
+	PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd);
+	rc = PM8001_CHIP_DISP->chip_init(pm8001_ha);
+	if (rc)
+		goto err_out_ha_free;
+
+	rc = scsi_add_host(shost, &pdev->dev);
+	if (rc)
+		goto err_out_ha_free;
+	rc = pm8001_request_irq(pm8001_ha);
+	if (rc)
+		goto err_out_shost;
+
+	PM8001_CHIP_DISP->interrupt_enable(pm8001_ha);
+	pm8001_init_sas_add(pm8001_ha);
+	pm8001_post_sas_ha_init(shost, chip);
+	rc = sas_register_ha(SHOST_TO_SAS_HA(shost));
+	if (rc)
+		goto err_out_shost;
+	scsi_scan_host(pm8001_ha->shost);
+	return 0;
+
+err_out_shost:
+	scsi_remove_host(pm8001_ha->shost);
+err_out_ha_free:
+	pm8001_free(pm8001_ha);
+err_out_free:
+	kfree(SHOST_TO_SAS_HA(shost));
+err_out_free_host:
+	kfree(shost);
+err_out_regions:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+err_out_enable:
+	return rc;
+}
+
+static void __devexit pm8001_pci_remove(struct pci_dev *pdev)
+{
+	struct sas_ha_struct *sha = pci_get_drvdata(pdev);
+	struct pm8001_hba_info *pm8001_ha;
+	int i;
+	pm8001_ha = sha->lldd_ha;
+	pci_set_drvdata(pdev, NULL);
+	sas_unregister_ha(sha);
+	sas_remove_host(pm8001_ha->shost);
+	list_del(&pm8001_ha->list);
+	scsi_remove_host(pm8001_ha->shost);
+	PM8001_CHIP_DISP->interrupt_disable(pm8001_ha);
+	PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd);
+
+#ifdef PM8001_USE_MSIX
+	for (i = 0; i < pm8001_ha->number_of_intr; i++)
+		synchronize_irq(pm8001_ha->msix_entries[i].vector);
+	for (i = 0; i < pm8001_ha->number_of_intr; i++)
+		free_irq(pm8001_ha->msix_entries[i].vector, sha);
+	pci_disable_msix(pdev);
+#else
+	free_irq(pm8001_ha->irq, sha);
+#endif
+#ifdef PM8001_USE_TASKLET
+	tasklet_kill(&pm8001_ha->tasklet);
+#endif
+	pm8001_free(pm8001_ha);
+	kfree(sha->sas_phy);
+	kfree(sha->sas_port);
+	kfree(sha);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+/**
+ * pm8001_pci_suspend - power management suspend main entry point
+ * @pdev: PCI device struct
+ * @state: PM state change to (usually PCI_D3)
+ *
+ * Returns 0 success, anything else error.
+ */
+static int pm8001_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct sas_ha_struct *sha = pci_get_drvdata(pdev);
+	struct pm8001_hba_info *pm8001_ha;
+	int i , pos;
+	u32 device_state;
+	pm8001_ha = sha->lldd_ha;
+	flush_scheduled_work();
+	scsi_block_requests(pm8001_ha->shost);
+	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+	if (pos == 0) {
+		printk(KERN_ERR " PCI PM not supported\n");
+		return -ENODEV;
+	}
+	PM8001_CHIP_DISP->interrupt_disable(pm8001_ha);
+	PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd);
+#ifdef PM8001_USE_MSIX
+	for (i = 0; i < pm8001_ha->number_of_intr; i++)
+		synchronize_irq(pm8001_ha->msix_entries[i].vector);
+	for (i = 0; i < pm8001_ha->number_of_intr; i++)
+		free_irq(pm8001_ha->msix_entries[i].vector, sha);
+	pci_disable_msix(pdev);
+#else
+	free_irq(pm8001_ha->irq, sha);
+#endif
+#ifdef PM8001_USE_TASKLET
+	tasklet_kill(&pm8001_ha->tasklet);
+#endif
+	device_state = pci_choose_state(pdev, state);
+	pm8001_printk("pdev=0x%p, slot=%s, entering "
+		      "operating state [D%d]\n", pdev,
+		      pm8001_ha->name, device_state);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, device_state);
+	return 0;
+}
+
+/**
+ * pm8001_pci_resume - power management resume main entry point
+ * @pdev: PCI device struct
+ *
+ * Returns 0 success, anything else error.
+ */
+static int pm8001_pci_resume(struct pci_dev *pdev)
+{
+	struct sas_ha_struct *sha = pci_get_drvdata(pdev);
+	struct pm8001_hba_info *pm8001_ha;
+	int rc;
+	u32 device_state;
+	pm8001_ha = sha->lldd_ha;
+	device_state = pdev->current_state;
+
+	pm8001_printk("pdev=0x%p, slot=%s, resuming from previous "
+		"operating state [D%d]\n", pdev, pm8001_ha->name, device_state);
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_enable_wake(pdev, PCI_D0, 0);
+	pci_restore_state(pdev);
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		pm8001_printk("slot=%s Enable device failed during resume\n",
+			      pm8001_ha->name);
+		goto err_out_enable;
+	}
+
+	pci_set_master(pdev);
+	rc = pci_go_44(pdev);
+	if (rc)
+		goto err_out_disable;
+
+	PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha, 0x252acbcd);
+	rc = PM8001_CHIP_DISP->chip_init(pm8001_ha);
+	if (rc)
+		goto err_out_disable;
+	PM8001_CHIP_DISP->interrupt_disable(pm8001_ha);
+	rc = pm8001_request_irq(pm8001_ha);
+	if (rc)
+		goto err_out_disable;
+	#ifdef PM8001_USE_TASKLET
+	tasklet_init(&pm8001_ha->tasklet, pm8001_tasklet,
+		    (unsigned long)pm8001_ha);
+	#endif
+	PM8001_CHIP_DISP->interrupt_enable(pm8001_ha);
+	scsi_unblock_requests(pm8001_ha->shost);
+	return 0;
+
+err_out_disable:
+	scsi_remove_host(pm8001_ha->shost);
+	pci_disable_device(pdev);
+err_out_enable:
+	return rc;
+}
+
+static struct pci_device_id __devinitdata pm8001_pci_table[] = {
+	{
+		PCI_VDEVICE(PMC_Sierra, 0x8001), chip_8001
+	},
+	{
+		PCI_DEVICE(0x117c, 0x0042),
+		.driver_data = chip_8001
+	},
+	{} /* terminate list */
+};
+
+static struct pci_driver pm8001_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= pm8001_pci_table,
+	.probe		= pm8001_pci_probe,
+	.remove		= __devexit_p(pm8001_pci_remove),
+	.suspend	= pm8001_pci_suspend,
+	.resume		= pm8001_pci_resume,
+};
+
+/**
+ *	pm8001_init - initialize scsi transport template
+ */
+static int __init pm8001_init(void)
+{
+	int rc;
+	pm8001_id = 0;
+	pm8001_stt = sas_domain_attach_transport(&pm8001_transport_ops);
+	if (!pm8001_stt)
+		return -ENOMEM;
+	rc = pci_register_driver(&pm8001_pci_driver);
+	if (rc)
+		goto err_out;
+	return 0;
+err_out:
+	sas_release_transport(pm8001_stt);
+	return rc;
+}
+
+static void __exit pm8001_exit(void)
+{
+	pci_unregister_driver(&pm8001_pci_driver);
+	sas_release_transport(pm8001_stt);
+}
+
+module_init(pm8001_init);
+module_exit(pm8001_exit);
+
+MODULE_AUTHOR("Jack Wang <jack_wang@usish.com>");
+MODULE_DESCRIPTION("PMC-Sierra PM8001 SAS/SATA controller driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, pm8001_pci_table);
+
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
new file mode 100644
index 000000000000..7bf30fa6963a
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -0,0 +1,1104 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#include "pm8001_sas.h"
+
+/**
+ * pm8001_find_tag - from sas task to find out  tag that belongs to this task
+ * @task: the task sent to the LLDD
+ * @tag: the found tag associated with the task
+ */
+static int pm8001_find_tag(struct sas_task *task, u32 *tag)
+{
+	if (task->lldd_task) {
+		struct pm8001_ccb_info *ccb;
+		ccb = task->lldd_task;
+		*tag = ccb->ccb_tag;
+		return 1;
+	}
+	return 0;
+}
+
+/**
+  * pm8001_tag_clear - clear the tags bitmap
+  * @pm8001_ha: our hba struct
+  * @tag: the found tag associated with the task
+  */
+static void pm8001_tag_clear(struct pm8001_hba_info *pm8001_ha, u32 tag)
+{
+	void *bitmap = pm8001_ha->tags;
+	clear_bit(tag, bitmap);
+}
+
+static void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag)
+{
+	pm8001_tag_clear(pm8001_ha, tag);
+}
+
+static void pm8001_tag_set(struct pm8001_hba_info *pm8001_ha, u32 tag)
+{
+	void *bitmap = pm8001_ha->tags;
+	set_bit(tag, bitmap);
+}
+
+/**
+  * pm8001_tag_alloc - allocate a empty tag for task used.
+  * @pm8001_ha: our hba struct
+  * @tag_out: the found empty tag .
+  */
+inline int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out)
+{
+	unsigned int index, tag;
+	void *bitmap = pm8001_ha->tags;
+
+	index = find_first_zero_bit(bitmap, pm8001_ha->tags_num);
+	tag = index;
+	if (tag >= pm8001_ha->tags_num)
+		return -SAS_QUEUE_FULL;
+	pm8001_tag_set(pm8001_ha, tag);
+	*tag_out = tag;
+	return 0;
+}
+
+void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha)
+{
+	int i;
+	for (i = 0; i < pm8001_ha->tags_num; ++i)
+		pm8001_tag_clear(pm8001_ha, i);
+}
+
+ /**
+  * pm8001_mem_alloc - allocate memory for pm8001.
+  * @pdev: pci device.
+  * @virt_addr: the allocated virtual address
+  * @pphys_addr_hi: the physical address high byte address.
+  * @pphys_addr_lo: the physical address low byte address.
+  * @mem_size: memory size.
+  */
+int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr,
+	dma_addr_t *pphys_addr, u32 *pphys_addr_hi,
+	u32 *pphys_addr_lo, u32 mem_size, u32 align)
+{
+	caddr_t mem_virt_alloc;
+	dma_addr_t mem_dma_handle;
+	u64 phys_align;
+	u64 align_offset = 0;
+	if (align)
+		align_offset = (dma_addr_t)align - 1;
+	mem_virt_alloc =
+		pci_alloc_consistent(pdev, mem_size + align, &mem_dma_handle);
+	if (!mem_virt_alloc) {
+		pm8001_printk("memory allocation error\n");
+		return -1;
+	}
+	memset((void *)mem_virt_alloc, 0, mem_size+align);
+	*pphys_addr = mem_dma_handle;
+	phys_align = (*pphys_addr + align_offset) & ~align_offset;
+	*virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr;
+	*pphys_addr_hi = upper_32_bits(phys_align);
+	*pphys_addr_lo = lower_32_bits(phys_align);
+	return 0;
+}
+/**
+  * pm8001_find_ha_by_dev - from domain device which come from sas layer to
+  * find out our hba struct.
+  * @dev: the domain device which from sas layer.
+  */
+static
+struct pm8001_hba_info *pm8001_find_ha_by_dev(struct domain_device *dev)
+{
+	struct sas_ha_struct *sha = dev->port->ha;
+	struct pm8001_hba_info *pm8001_ha = sha->lldd_ha;
+	return pm8001_ha;
+}
+
+/**
+  * pm8001_phy_control - this function should be registered to
+  * sas_domain_function_template to provide libsas used, note: this is just
+  * control the HBA phy rather than other expander phy if you want control
+  * other phy, you should use SMP command.
+  * @sas_phy: which phy in HBA phys.
+  * @func: the operation.
+  * @funcdata: always NULL.
+  */
+int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+	void *funcdata)
+{
+	int rc = 0, phy_id = sas_phy->id;
+	struct pm8001_hba_info *pm8001_ha = NULL;
+	struct sas_phy_linkrates *rates;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	pm8001_ha = sas_phy->ha->lldd_ha;
+	pm8001_ha->phy[phy_id].enable_completion = &completion;
+	switch (func) {
+	case PHY_FUNC_SET_LINK_RATE:
+		rates = funcdata;
+		if (rates->minimum_linkrate) {
+			pm8001_ha->phy[phy_id].minimum_linkrate =
+				rates->minimum_linkrate;
+		}
+		if (rates->maximum_linkrate) {
+			pm8001_ha->phy[phy_id].maximum_linkrate =
+				rates->maximum_linkrate;
+		}
+		if (pm8001_ha->phy[phy_id].phy_state == 0) {
+			PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id);
+			wait_for_completion(&completion);
+		}
+		PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id,
+					      PHY_LINK_RESET);
+		break;
+	case PHY_FUNC_HARD_RESET:
+		if (pm8001_ha->phy[phy_id].phy_state == 0) {
+			PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id);
+			wait_for_completion(&completion);
+		}
+		PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id,
+					      PHY_HARD_RESET);
+		break;
+	case PHY_FUNC_LINK_RESET:
+		if (pm8001_ha->phy[phy_id].phy_state == 0) {
+			PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id);
+			wait_for_completion(&completion);
+		}
+		PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id,
+					      PHY_LINK_RESET);
+		break;
+	case PHY_FUNC_RELEASE_SPINUP_HOLD:
+		PM8001_CHIP_DISP->phy_ctl_req(pm8001_ha, phy_id,
+					      PHY_LINK_RESET);
+		break;
+	case PHY_FUNC_DISABLE:
+		PM8001_CHIP_DISP->phy_stop_req(pm8001_ha, phy_id);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+	msleep(300);
+	return rc;
+}
+
+int pm8001_slave_alloc(struct scsi_device *scsi_dev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+	if (dev_is_sata(dev)) {
+		/* We don't need to rescan targets
+		* if REPORT_LUNS request is failed
+		*/
+		if (scsi_dev->lun > 0)
+			return -ENXIO;
+		scsi_dev->tagged_supported = 1;
+	}
+	return sas_slave_alloc(scsi_dev);
+}
+
+/**
+  * pm8001_scan_start - we should enable all HBA phys by sending the phy_start
+  * command to HBA.
+  * @shost: the scsi host data.
+  */
+void pm8001_scan_start(struct Scsi_Host *shost)
+{
+	int i;
+	struct pm8001_hba_info *pm8001_ha;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	pm8001_ha = sha->lldd_ha;
+	for (i = 0; i < pm8001_ha->chip->n_phy; ++i)
+		PM8001_CHIP_DISP->phy_start_req(pm8001_ha, i);
+}
+
+int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time)
+{
+	/* give the phy enabling interrupt event time to come in (1s
+	* is empirically about all it takes) */
+	if (time < HZ)
+		return 0;
+	/* Wait for discovery to finish */
+	scsi_flush_work(shost);
+	return 1;
+}
+
+/**
+  * pm8001_task_prep_smp - the dispatcher function, prepare data for smp task
+  * @pm8001_ha: our hba card information
+  * @ccb: the ccb which attached to smp task
+  */
+static int pm8001_task_prep_smp(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	return PM8001_CHIP_DISP->smp_req(pm8001_ha, ccb);
+}
+
+u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag)
+{
+	struct ata_queued_cmd *qc = task->uldd_task;
+	if (qc) {
+		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
+			qc->tf.command == ATA_CMD_FPDMA_READ) {
+			*tag = qc->tag;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/**
+  * pm8001_task_prep_ata - the dispatcher function, prepare data for sata task
+  * @pm8001_ha: our hba card information
+  * @ccb: the ccb which attached to sata task
+  */
+static int pm8001_task_prep_ata(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	return PM8001_CHIP_DISP->sata_req(pm8001_ha, ccb);
+}
+
+/**
+  * pm8001_task_prep_ssp_tm - the dispatcher function, prepare task management data
+  * @pm8001_ha: our hba card information
+  * @ccb: the ccb which attached to TM
+  * @tmf: the task management IU
+  */
+static int pm8001_task_prep_ssp_tm(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf)
+{
+	return PM8001_CHIP_DISP->ssp_tm_req(pm8001_ha, ccb, tmf);
+}
+
+/**
+  * pm8001_task_prep_ssp - the dispatcher function,prepare ssp data for ssp task
+  * @pm8001_ha: our hba card information
+  * @ccb: the ccb which attached to ssp task
+  */
+static int pm8001_task_prep_ssp(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_ccb_info *ccb)
+{
+	return PM8001_CHIP_DISP->ssp_io_req(pm8001_ha, ccb);
+}
+int pm8001_slave_configure(struct scsi_device *sdev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(sdev);
+	int ret = sas_slave_configure(sdev);
+	if (ret)
+		return ret;
+	if (dev_is_sata(dev)) {
+	#ifdef PM8001_DISABLE_NCQ
+		struct ata_port *ap = dev->sata_dev.ap;
+		struct ata_device *adev = ap->link.device;
+		adev->flags |= ATA_DFLAG_NCQ_OFF;
+		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
+	#endif
+	}
+	return 0;
+}
+/**
+  * pm8001_task_exec -execute the task which come from upper level, send the
+  * command or data to DMA area and then increase CI,for queuecommand(ssp),
+  * it is from upper layer and for smp command,it is from libsas,
+  * for ata command it is from libata.
+  * @task: the task to be execute.
+  * @num: if can_queue great than 1, the task can be queued up. for SMP task,
+  * we always execute one one time.
+  * @gfp_flags: gfp_flags.
+  * @is tmf: if it is task management task.
+  * @tmf: the task management IU
+  */
+#define DEV_IS_GONE(pm8001_dev)	\
+	((!pm8001_dev || (pm8001_dev->dev_type == NO_DEVICE)))
+static int pm8001_task_exec(struct sas_task *task, const int num,
+	gfp_t gfp_flags, int is_tmf, struct pm8001_tmf_task *tmf)
+{
+	struct domain_device *dev = task->dev;
+	struct pm8001_hba_info *pm8001_ha;
+	struct pm8001_device *pm8001_dev;
+	struct sas_task *t = task;
+	struct pm8001_ccb_info *ccb;
+	u32 tag = 0xdeadbeef, rc, n_elem = 0;
+	u32 n = num;
+	unsigned long flags = 0;
+
+	if (!dev->port) {
+		struct task_status_struct *tsm = &t->task_status;
+		tsm->resp = SAS_TASK_UNDELIVERED;
+		tsm->stat = SAS_PHY_DOWN;
+		if (dev->dev_type != SATA_DEV)
+			t->task_done(t);
+		return 0;
+	}
+	pm8001_ha = pm8001_find_ha_by_dev(task->dev);
+	PM8001_IO_DBG(pm8001_ha, pm8001_printk("pm8001_task_exec device \n "));
+	spin_lock_irqsave(&pm8001_ha->lock, flags);
+	do {
+		dev = t->dev;
+		pm8001_dev = dev->lldd_dev;
+		if (DEV_IS_GONE(pm8001_dev)) {
+			if (pm8001_dev) {
+				PM8001_IO_DBG(pm8001_ha,
+					pm8001_printk("device %d not ready.\n",
+					pm8001_dev->device_id));
+			} else {
+				PM8001_IO_DBG(pm8001_ha,
+					pm8001_printk("device %016llx not "
+					"ready.\n", SAS_ADDR(dev->sas_addr)));
+			}
+		rc = SAS_PHY_DOWN;
+			goto out_done;
+		}
+		rc = pm8001_tag_alloc(pm8001_ha, &tag);
+		if (rc)
+			goto err_out;
+		ccb = &pm8001_ha->ccb_info[tag];
+
+		if (!sas_protocol_ata(t->task_proto)) {
+			if (t->num_scatter) {
+				n_elem = dma_map_sg(pm8001_ha->dev,
+					t->scatter,
+					t->num_scatter,
+					t->data_dir);
+				if (!n_elem) {
+					rc = -ENOMEM;
+					goto err_out;
+				}
+			}
+		} else {
+			n_elem = t->num_scatter;
+		}
+
+		t->lldd_task = NULL;
+		ccb->n_elem = n_elem;
+		ccb->ccb_tag = tag;
+		ccb->task = t;
+		switch (t->task_proto) {
+		case SAS_PROTOCOL_SMP:
+			rc = pm8001_task_prep_smp(pm8001_ha, ccb);
+			break;
+		case SAS_PROTOCOL_SSP:
+			if (is_tmf)
+				rc = pm8001_task_prep_ssp_tm(pm8001_ha,
+					ccb, tmf);
+			else
+				rc = pm8001_task_prep_ssp(pm8001_ha, ccb);
+			break;
+		case SAS_PROTOCOL_SATA:
+		case SAS_PROTOCOL_STP:
+		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+			rc = pm8001_task_prep_ata(pm8001_ha, ccb);
+			break;
+		default:
+			dev_printk(KERN_ERR, pm8001_ha->dev,
+				"unknown sas_task proto: 0x%x\n",
+				t->task_proto);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc) {
+			PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk("rc is %x\n", rc));
+			goto err_out_tag;
+		}
+		t->lldd_task = ccb;
+		/* TODO: select normal or high priority */
+		spin_lock(&t->task_state_lock);
+		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
+		spin_unlock(&t->task_state_lock);
+		pm8001_dev->running_req++;
+		if (n > 1)
+			t = list_entry(t->list.next, struct sas_task, list);
+	} while (--n);
+	rc = 0;
+	goto out_done;
+
+err_out_tag:
+	pm8001_tag_free(pm8001_ha, tag);
+err_out:
+	dev_printk(KERN_ERR, pm8001_ha->dev, "pm8001 exec failed[%d]!\n", rc);
+	if (!sas_protocol_ata(t->task_proto))
+		if (n_elem)
+			dma_unmap_sg(pm8001_ha->dev, t->scatter, n_elem,
+				t->data_dir);
+out_done:
+	spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+	return rc;
+}
+
+/**
+  * pm8001_queue_command - register for upper layer used, all IO commands sent
+  * to HBA are from this interface.
+  * @task: the task to be execute.
+  * @num: if can_queue great than 1, the task can be queued up. for SMP task,
+  * we always execute one one time
+  * @gfp_flags: gfp_flags
+  */
+int pm8001_queue_command(struct sas_task *task, const int num,
+		gfp_t gfp_flags)
+{
+	return pm8001_task_exec(task, num, gfp_flags, 0, NULL);
+}
+
+void pm8001_ccb_free(struct pm8001_hba_info *pm8001_ha, u32 ccb_idx)
+{
+	pm8001_tag_clear(pm8001_ha, ccb_idx);
+}
+
+/**
+  * pm8001_ccb_task_free - free the sg for ssp and smp command, free the ccb.
+  * @pm8001_ha: our hba card information
+  * @ccb: the ccb which attached to ssp task
+  * @task: the task to be free.
+  * @ccb_idx: ccb index.
+  */
+void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha,
+	struct sas_task *task, struct pm8001_ccb_info *ccb, u32 ccb_idx)
+{
+	if (!ccb->task)
+		return;
+	if (!sas_protocol_ata(task->task_proto))
+		if (ccb->n_elem)
+			dma_unmap_sg(pm8001_ha->dev, task->scatter,
+				task->num_scatter, task->data_dir);
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		dma_unmap_sg(pm8001_ha->dev, &task->smp_task.smp_resp, 1,
+			PCI_DMA_FROMDEVICE);
+		dma_unmap_sg(pm8001_ha->dev, &task->smp_task.smp_req, 1,
+			PCI_DMA_TODEVICE);
+		break;
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SSP:
+	default:
+		/* do nothing */
+		break;
+	}
+	task->lldd_task = NULL;
+	ccb->task = NULL;
+	ccb->ccb_tag = 0xFFFFFFFF;
+	pm8001_ccb_free(pm8001_ha, ccb_idx);
+}
+
+ /**
+  * pm8001_alloc_dev - find the empty pm8001_device structure, allocate and
+  * return it for use.
+  * @pm8001_ha: our hba card information
+  */
+struct pm8001_device *pm8001_alloc_dev(struct pm8001_hba_info *pm8001_ha)
+{
+	u32 dev;
+	for (dev = 0; dev < PM8001_MAX_DEVICES; dev++) {
+		if (pm8001_ha->devices[dev].dev_type == NO_DEVICE) {
+			pm8001_ha->devices[dev].id = dev;
+			return &pm8001_ha->devices[dev];
+		}
+	}
+	if (dev == PM8001_MAX_DEVICES) {
+		PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("max support %d devices, ignore ..\n",
+			PM8001_MAX_DEVICES));
+	}
+	return NULL;
+}
+
+static void pm8001_free_dev(struct pm8001_device *pm8001_dev)
+{
+	u32 id = pm8001_dev->id;
+	memset(pm8001_dev, 0, sizeof(*pm8001_dev));
+	pm8001_dev->id = id;
+	pm8001_dev->dev_type = NO_DEVICE;
+	pm8001_dev->device_id = PM8001_MAX_DEVICES;
+	pm8001_dev->sas_device = NULL;
+}
+
+/**
+  * pm8001_dev_found_notify - when libsas find a sas domain device, it should
+  * tell the LLDD that device is found, and then LLDD register this device to
+  * HBA FW by the command "OPC_INB_REG_DEV", after that the HBA will assign
+  * a device ID(according to device's sas address) and returned it to LLDD.from
+  * now on, we communicate with HBA FW with the device ID which HBA assigned
+  * rather than sas address. it is the neccessary step for our HBA but it is
+  * the optional for other HBA driver.
+  * @dev: the device structure which sas layer used.
+  */
+static int pm8001_dev_found_notify(struct domain_device *dev)
+{
+	unsigned long flags = 0;
+	int res = 0;
+	struct pm8001_hba_info *pm8001_ha = NULL;
+	struct domain_device *parent_dev = dev->parent;
+	struct pm8001_device *pm8001_device;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	u32 flag = 0;
+	pm8001_ha = pm8001_find_ha_by_dev(dev);
+	spin_lock_irqsave(&pm8001_ha->lock, flags);
+
+	pm8001_device = pm8001_alloc_dev(pm8001_ha);
+	pm8001_device->sas_device = dev;
+	if (!pm8001_device) {
+		res = -1;
+		goto found_out;
+	}
+	dev->lldd_dev = pm8001_device;
+	pm8001_device->dev_type = dev->dev_type;
+	pm8001_device->dcompletion = &completion;
+	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) {
+		int phy_id;
+		struct ex_phy *phy;
+		for (phy_id = 0; phy_id < parent_dev->ex_dev.num_phys;
+		phy_id++) {
+			phy = &parent_dev->ex_dev.ex_phy[phy_id];
+			if (SAS_ADDR(phy->attached_sas_addr)
+				== SAS_ADDR(dev->sas_addr)) {
+				pm8001_device->attached_phy = phy_id;
+				break;
+			}
+		}
+		if (phy_id == parent_dev->ex_dev.num_phys) {
+			PM8001_FAIL_DBG(pm8001_ha,
+			pm8001_printk("Error: no attached dev:%016llx"
+			" at ex:%016llx.\n", SAS_ADDR(dev->sas_addr),
+				SAS_ADDR(parent_dev->sas_addr)));
+			res = -1;
+		}
+	} else {
+		if (dev->dev_type == SATA_DEV) {
+			pm8001_device->attached_phy =
+				dev->rphy->identify.phy_identifier;
+				flag = 1; /* directly sata*/
+		}
+	} /*register this device to HBA*/
+	PM8001_DISC_DBG(pm8001_ha, pm8001_printk("Found device \n"));
+	PM8001_CHIP_DISP->reg_dev_req(pm8001_ha, pm8001_device, flag);
+	spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+	wait_for_completion(&completion);
+	if (dev->dev_type == SAS_END_DEV)
+		msleep(50);
+	pm8001_ha->flags = PM8001F_RUN_TIME ;
+	return 0;
+found_out:
+	spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+	return res;
+}
+
+int pm8001_dev_found(struct domain_device *dev)
+{
+	return pm8001_dev_found_notify(dev);
+}
+
+/**
+  * pm8001_alloc_task - allocate a task structure for TMF
+  */
+static struct sas_task *pm8001_alloc_task(void)
+{
+	struct sas_task *task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (task) {
+		INIT_LIST_HEAD(&task->list);
+		spin_lock_init(&task->task_state_lock);
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_timer(&task->timer);
+		init_completion(&task->completion);
+	}
+	return task;
+}
+
+static void pm8001_free_task(struct sas_task *task)
+{
+	if (task) {
+		BUG_ON(!list_empty(&task->list));
+		kfree(task);
+	}
+}
+
+static void pm8001_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+static void pm8001_tmf_timedout(unsigned long data)
+{
+	struct sas_task *task = (struct sas_task *)data;
+
+	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	complete(&task->completion);
+}
+
+#define PM8001_TASK_TIMEOUT 20
+/**
+  * pm8001_exec_internal_tmf_task - when errors or exception happened, we may
+  * want to do something, for example abort issued task which result in this
+  * execption, this is by calling this function, note it is also with the task
+  * execute interface.
+  * @dev: the wanted device.
+  * @tmf: which task management wanted to be take.
+  * @para_len: para_len.
+  * @parameter: ssp task parameter.
+  */
+static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
+	void *parameter, u32 para_len, struct pm8001_tmf_task *tmf)
+{
+	int res, retry;
+	struct sas_task *task = NULL;
+	struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev);
+
+	for (retry = 0; retry < 3; retry++) {
+		task = pm8001_alloc_task();
+		if (!task)
+			return -ENOMEM;
+
+		task->dev = dev;
+		task->task_proto = dev->tproto;
+		memcpy(&task->ssp_task, parameter, para_len);
+		task->task_done = pm8001_task_done;
+		task->timer.data = (unsigned long)task;
+		task->timer.function = pm8001_tmf_timedout;
+		task->timer.expires = jiffies + PM8001_TASK_TIMEOUT*HZ;
+		add_timer(&task->timer);
+
+		res = pm8001_task_exec(task, 1, GFP_KERNEL, 1, tmf);
+
+		if (res) {
+			del_timer(&task->timer);
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("Executing internal task "
+				"failed\n"));
+			goto ex_err;
+		}
+		wait_for_completion(&task->completion);
+		res = -TMF_RESP_FUNC_FAILED;
+		/* Even TMF timed out, return direct. */
+		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				PM8001_FAIL_DBG(pm8001_ha,
+					pm8001_printk("TMF task[%x]timeout.\n",
+					tmf->tmf));
+				goto ex_err;
+			}
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+			task->task_status.stat == SAM_GOOD) {
+			res = TMF_RESP_FUNC_COMPLETE;
+			break;
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		task->task_status.stat == SAS_DATA_UNDERRUN) {
+			/* no error, but return the number of bytes of
+			* underrun */
+			res = task->task_status.residual;
+			break;
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+			task->task_status.stat == SAS_DATA_OVERRUN) {
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("Blocked task error.\n"));
+			res = -EMSGSIZE;
+			break;
+		} else {
+			PM8001_IO_DBG(pm8001_ha,
+			pm8001_printk(" Task to dev %016llx response: 0x%x"
+				"status 0x%x\n",
+				SAS_ADDR(dev->sas_addr),
+				task->task_status.resp,
+				task->task_status.stat));
+			pm8001_free_task(task);
+			task = NULL;
+		}
+	}
+ex_err:
+	BUG_ON(retry == 3 && task != NULL);
+	if (task != NULL)
+		pm8001_free_task(task);
+	return res;
+}
+
+static int
+pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
+	struct pm8001_device *pm8001_dev, struct domain_device *dev, u32 flag,
+	u32 task_tag)
+{
+	int res, retry;
+	u32 rc, ccb_tag;
+	struct pm8001_ccb_info *ccb;
+	struct sas_task *task = NULL;
+
+	for (retry = 0; retry < 3; retry++) {
+		task = pm8001_alloc_task();
+		if (!task)
+			return -ENOMEM;
+
+		task->dev = dev;
+		task->task_proto = dev->tproto;
+		task->task_done = pm8001_task_done;
+		task->timer.data = (unsigned long)task;
+		task->timer.function = pm8001_tmf_timedout;
+		task->timer.expires = jiffies + PM8001_TASK_TIMEOUT*HZ;
+		add_timer(&task->timer);
+
+		rc = pm8001_tag_alloc(pm8001_ha, &ccb_tag);
+		if (rc)
+			return rc;
+		ccb = &pm8001_ha->ccb_info[ccb_tag];
+		ccb->device = pm8001_dev;
+		ccb->ccb_tag = ccb_tag;
+		ccb->task = task;
+
+		res = PM8001_CHIP_DISP->task_abort(pm8001_ha,
+			pm8001_dev, flag, task_tag, ccb_tag);
+
+		if (res) {
+			del_timer(&task->timer);
+			PM8001_FAIL_DBG(pm8001_ha,
+				pm8001_printk("Executing internal task "
+				"failed\n"));
+			goto ex_err;
+		}
+		wait_for_completion(&task->completion);
+		res = TMF_RESP_FUNC_FAILED;
+		/* Even TMF timed out, return direct. */
+		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				PM8001_FAIL_DBG(pm8001_ha,
+					pm8001_printk("TMF task timeout.\n"));
+				goto ex_err;
+			}
+		}
+
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+			task->task_status.stat == SAM_GOOD) {
+			res = TMF_RESP_FUNC_COMPLETE;
+			break;
+
+		} else {
+			PM8001_IO_DBG(pm8001_ha,
+				pm8001_printk(" Task to dev %016llx response: "
+					"0x%x status 0x%x\n",
+				SAS_ADDR(dev->sas_addr),
+				task->task_status.resp,
+				task->task_status.stat));
+			pm8001_free_task(task);
+			task = NULL;
+		}
+	}
+ex_err:
+	BUG_ON(retry == 3 && task != NULL);
+	if (task != NULL)
+		pm8001_free_task(task);
+	return res;
+}
+
+/**
+  * pm8001_dev_gone_notify - see the comments for "pm8001_dev_found_notify"
+  * @dev: the device structure which sas layer used.
+  */
+static void pm8001_dev_gone_notify(struct domain_device *dev)
+{
+	unsigned long flags = 0;
+	u32 tag;
+	struct pm8001_hba_info *pm8001_ha;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	u32 device_id = pm8001_dev->device_id;
+	pm8001_ha = pm8001_find_ha_by_dev(dev);
+	spin_lock_irqsave(&pm8001_ha->lock, flags);
+	pm8001_tag_alloc(pm8001_ha, &tag);
+	if (pm8001_dev) {
+		PM8001_DISC_DBG(pm8001_ha,
+			pm8001_printk("found dev[%d:%x] is gone.\n",
+			pm8001_dev->device_id, pm8001_dev->dev_type));
+		if (pm8001_dev->running_req) {
+			spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+			pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev ,
+				dev, 1, 0);
+			spin_lock_irqsave(&pm8001_ha->lock, flags);
+		}
+		PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id);
+		pm8001_free_dev(pm8001_dev);
+	} else {
+		PM8001_DISC_DBG(pm8001_ha,
+			pm8001_printk("Found dev has gone.\n"));
+	}
+	dev->lldd_dev = NULL;
+	spin_unlock_irqrestore(&pm8001_ha->lock, flags);
+}
+
+void pm8001_dev_gone(struct domain_device *dev)
+{
+	pm8001_dev_gone_notify(dev);
+}
+
+static int pm8001_issue_ssp_tmf(struct domain_device *dev,
+	u8 *lun, struct pm8001_tmf_task *tmf)
+{
+	struct sas_ssp_task ssp_task;
+	if (!(dev->tproto & SAS_PROTOCOL_SSP))
+		return TMF_RESP_FUNC_ESUPP;
+
+	strncpy((u8 *)&ssp_task.LUN, lun, 8);
+	return pm8001_exec_internal_tmf_task(dev, &ssp_task, sizeof(ssp_task),
+		tmf);
+}
+
+/**
+  * Standard mandates link reset for ATA  (type 0) and hard reset for
+  * SSP (type 1) , only for RECOVERY
+  */
+int pm8001_I_T_nexus_reset(struct domain_device *dev)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct pm8001_device *pm8001_dev;
+	struct pm8001_hba_info *pm8001_ha;
+	struct sas_phy *phy;
+	if (!dev || !dev->lldd_dev)
+		return -1;
+
+	pm8001_dev = dev->lldd_dev;
+	pm8001_ha = pm8001_find_ha_by_dev(dev);
+	phy = sas_find_local_phy(dev);
+
+	if (dev_is_sata(dev)) {
+		DECLARE_COMPLETION_ONSTACK(completion_setstate);
+		rc = sas_phy_reset(phy, 1);
+		msleep(2000);
+		rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev ,
+			dev, 1, 0);
+		pm8001_dev->setds_completion = &completion_setstate;
+		rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha,
+			pm8001_dev, 0x01);
+		wait_for_completion(&completion_setstate);
+	} else{
+	rc = sas_phy_reset(phy, 1);
+	msleep(2000);
+	}
+	PM8001_EH_DBG(pm8001_ha, pm8001_printk(" for device[%x]:rc=%d\n",
+		pm8001_dev->device_id, rc));
+	return rc;
+}
+
+/* mandatory SAM-3, the task reset the specified LUN*/
+int pm8001_lu_reset(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct pm8001_tmf_task tmf_task;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev);
+	if (dev_is_sata(dev)) {
+		struct sas_phy *phy = sas_find_local_phy(dev);
+		rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev ,
+			dev, 1, 0);
+		rc = sas_phy_reset(phy, 1);
+		rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha,
+			pm8001_dev, 0x01);
+		msleep(2000);
+	} else {
+		tmf_task.tmf = TMF_LU_RESET;
+		rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
+	}
+	/* If failed, fall-through I_T_Nexus reset */
+	PM8001_EH_DBG(pm8001_ha, pm8001_printk("for device[%x]:rc=%d\n",
+		pm8001_dev->device_id, rc));
+	return rc;
+}
+
+/* optional SAM-3 */
+int pm8001_query_task(struct sas_task *task)
+{
+	u32 tag = 0xdeadbeef;
+	int i = 0;
+	struct scsi_lun lun;
+	struct pm8001_tmf_task tmf_task;
+	int rc = TMF_RESP_FUNC_FAILED;
+	if (unlikely(!task || !task->lldd_task || !task->dev))
+		return rc;
+
+	if (task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd *cmnd = task->uldd_task;
+		struct domain_device *dev = task->dev;
+		struct pm8001_hba_info *pm8001_ha =
+			pm8001_find_ha_by_dev(dev);
+
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = pm8001_find_tag(task, &tag);
+		if (rc == 0) {
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+		PM8001_EH_DBG(pm8001_ha, pm8001_printk("Query:["));
+		for (i = 0; i < 16; i++)
+			printk(KERN_INFO "%02x ", cmnd->cmnd[i]);
+		printk(KERN_INFO "]\n");
+		tmf_task.tmf = 	TMF_QUERY_TASK;
+		tmf_task.tag_of_task_to_be_managed = tag;
+
+		rc = pm8001_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
+		switch (rc) {
+		/* The task is still in Lun, release it then */
+		case TMF_RESP_FUNC_SUCC:
+			PM8001_EH_DBG(pm8001_ha,
+				pm8001_printk("The task is still in Lun \n"));
+		/* The task is not in Lun or failed, reset the phy */
+		case TMF_RESP_FUNC_FAILED:
+		case TMF_RESP_FUNC_COMPLETE:
+			PM8001_EH_DBG(pm8001_ha,
+			pm8001_printk("The task is not in Lun or failed,"
+			" reset the phy \n"));
+			break;
+		}
+	}
+	pm8001_printk(":rc= %d\n", rc);
+	return rc;
+}
+
+/*  mandatory SAM-3, still need free task/ccb info, abord the specified task */
+int pm8001_abort_task(struct sas_task *task)
+{
+	unsigned long flags;
+	u32 tag = 0xdeadbeef;
+	u32 device_id;
+	struct domain_device *dev ;
+	struct pm8001_hba_info *pm8001_ha = NULL;
+	struct pm8001_ccb_info *ccb;
+	struct scsi_lun lun;
+	struct pm8001_device *pm8001_dev;
+	struct pm8001_tmf_task tmf_task;
+	int rc = TMF_RESP_FUNC_FAILED;
+	if (unlikely(!task || !task->lldd_task || !task->dev))
+		return rc;
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		rc = TMF_RESP_FUNC_COMPLETE;
+		goto out;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	if (task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd *cmnd = task->uldd_task;
+		dev = task->dev;
+		ccb = task->lldd_task;
+		pm8001_dev = dev->lldd_dev;
+		pm8001_ha = pm8001_find_ha_by_dev(dev);
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = pm8001_find_tag(task, &tag);
+		if (rc == 0) {
+			printk(KERN_INFO "No such tag in %s\n", __func__);
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+		device_id = pm8001_dev->device_id;
+		PM8001_EH_DBG(pm8001_ha,
+		pm8001_printk("abort io to device_id = %d\n", device_id));
+		tmf_task.tmf = 	TMF_ABORT_TASK;
+		tmf_task.tag_of_task_to_be_managed = tag;
+		rc = pm8001_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
+		rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev,
+			pm8001_dev->sas_device, 0, tag);
+	} else if (task->task_proto & SAS_PROTOCOL_SATA ||
+		task->task_proto & SAS_PROTOCOL_STP) {
+		dev = task->dev;
+		pm8001_dev = dev->lldd_dev;
+		pm8001_ha = pm8001_find_ha_by_dev(dev);
+		rc = pm8001_find_tag(task, &tag);
+		if (rc == 0) {
+			printk(KERN_INFO "No such tag in %s\n", __func__);
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+		rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev,
+			pm8001_dev->sas_device, 0, tag);
+	} else if (task->task_proto & SAS_PROTOCOL_SMP) {
+		/* SMP */
+		dev = task->dev;
+		pm8001_dev = dev->lldd_dev;
+		pm8001_ha = pm8001_find_ha_by_dev(dev);
+		rc = pm8001_find_tag(task, &tag);
+		if (rc == 0) {
+			printk(KERN_INFO "No such tag in %s\n", __func__);
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
+		rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev,
+			pm8001_dev->sas_device, 0, tag);
+
+	}
+out:
+	if (rc != TMF_RESP_FUNC_COMPLETE)
+		pm8001_printk("rc= %d\n", rc);
+	return rc;
+}
+
+int pm8001_abort_task_set(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct pm8001_tmf_task tmf_task;
+
+	tmf_task.tmf = TMF_ABORT_TASK_SET;
+	rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
+	return rc;
+}
+
+int pm8001_clear_aca(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct pm8001_tmf_task tmf_task;
+
+	tmf_task.tmf = TMF_CLEAR_ACA;
+	rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
+
+	return rc;
+}
+
+int pm8001_clear_task_set(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct pm8001_tmf_task tmf_task;
+	struct pm8001_device *pm8001_dev = dev->lldd_dev;
+	struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev);
+
+	PM8001_EH_DBG(pm8001_ha,
+		pm8001_printk("I_T_L_Q clear task set[%x]\n",
+		pm8001_dev->device_id));
+	tmf_task.tmf = TMF_CLEAR_TASK_SET;
+	rc = pm8001_issue_ssp_tmf(dev, lun, &tmf_task);
+	return rc;
+}
+
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
new file mode 100644
index 000000000000..14c676bbb533
--- /dev/null
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -0,0 +1,480 @@
+/*
+ * PMC-Sierra SPC 8001 SAS/SATA based host adapters driver
+ *
+ * Copyright (c) 2008-2009 USI Co., Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef _PM8001_SAS_H_
+#define _PM8001_SAS_H_
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <scsi/libsas.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/sas_ata.h>
+#include <asm/atomic.h>
+#include "pm8001_defs.h"
+
+#define DRV_NAME		"pm8001"
+#define DRV_VERSION		"0.1.36"
+#define PM8001_FAIL_LOGGING	0x01 /* libsas EH function logging */
+#define PM8001_INIT_LOGGING	0x02 /* driver init logging */
+#define PM8001_DISC_LOGGING	0x04 /* discovery layer logging */
+#define PM8001_IO_LOGGING	0x08 /* I/O path logging */
+#define PM8001_EH_LOGGING	0x10 /* Error message logging */
+#define PM8001_IOCTL_LOGGING	0x20 /* IOCTL message logging */
+#define PM8001_MSG_LOGGING	0x40 /* misc message logging */
+#define pm8001_printk(format, arg...)	printk(KERN_INFO "%s %d:" format,\
+				__func__, __LINE__, ## arg)
+#define PM8001_CHECK_LOGGING(HBA, LEVEL, CMD)	\
+do {						\
+	if (unlikely(HBA->logging_level & LEVEL))	\
+		do {					\
+			CMD;				\
+		} while (0);				\
+} while (0);
+
+#define PM8001_EH_DBG(HBA, CMD)			\
+	PM8001_CHECK_LOGGING(HBA, PM8001_EH_LOGGING, CMD)
+
+#define PM8001_INIT_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_INIT_LOGGING, CMD)
+
+#define PM8001_DISC_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_DISC_LOGGING, CMD)
+
+#define PM8001_IO_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_IO_LOGGING, CMD)
+
+#define PM8001_FAIL_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_FAIL_LOGGING, CMD)
+
+#define PM8001_IOCTL_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_IOCTL_LOGGING, CMD)
+
+#define PM8001_MSG_DBG(HBA, CMD)		\
+	PM8001_CHECK_LOGGING(HBA, PM8001_MSG_LOGGING, CMD)
+
+
+#define PM8001_USE_TASKLET
+#define PM8001_USE_MSIX
+
+
+#define DEV_IS_EXPANDER(type)	((type == EDGE_DEV) || (type == FANOUT_DEV))
+
+#define PM8001_NAME_LENGTH		32/* generic length of strings */
+extern struct list_head hba_list;
+extern const struct pm8001_dispatch pm8001_8001_dispatch;
+
+struct pm8001_hba_info;
+struct pm8001_ccb_info;
+struct pm8001_device;
+struct pm8001_tmf_task;
+struct pm8001_dispatch {
+	char *name;
+	int (*chip_init)(struct pm8001_hba_info *pm8001_ha);
+	int (*chip_soft_rst)(struct pm8001_hba_info *pm8001_ha, u32 signature);
+	void (*chip_rst)(struct pm8001_hba_info *pm8001_ha);
+	int (*chip_ioremap)(struct pm8001_hba_info *pm8001_ha);
+	void (*chip_iounmap)(struct pm8001_hba_info *pm8001_ha);
+	void (*isr)(struct pm8001_hba_info *pm8001_ha);
+	u32 (*is_our_interupt)(struct pm8001_hba_info *pm8001_ha);
+	int (*isr_process_oq)(struct pm8001_hba_info *pm8001_ha);
+	void (*interrupt_enable)(struct pm8001_hba_info *pm8001_ha);
+	void (*interrupt_disable)(struct pm8001_hba_info *pm8001_ha);
+	void (*make_prd)(struct scatterlist *scatter, int nr, void *prd);
+	int (*smp_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_ccb_info *ccb);
+	int (*ssp_io_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_ccb_info *ccb);
+	int (*sata_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_ccb_info *ccb);
+	int (*phy_start_req)(struct pm8001_hba_info *pm8001_ha,	u8 phy_id);
+	int (*phy_stop_req)(struct pm8001_hba_info *pm8001_ha, u8 phy_id);
+	int (*reg_dev_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_device *pm8001_dev, u32 flag);
+	int (*dereg_dev_req)(struct pm8001_hba_info *pm8001_ha, u32 device_id);
+	int (*phy_ctl_req)(struct pm8001_hba_info *pm8001_ha,
+		u32 phy_id, u32 phy_op);
+	int (*task_abort)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_device *pm8001_dev, u8 flag, u32 task_tag,
+		u32 cmd_tag);
+	int (*ssp_tm_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_ccb_info *ccb, struct pm8001_tmf_task *tmf);
+	int (*get_nvmd_req)(struct pm8001_hba_info *pm8001_ha, void *payload);
+	int (*set_nvmd_req)(struct pm8001_hba_info *pm8001_ha, void *payload);
+	int (*fw_flash_update_req)(struct pm8001_hba_info *pm8001_ha,
+		void *payload);
+	int (*set_dev_state_req)(struct pm8001_hba_info *pm8001_ha,
+		struct pm8001_device *pm8001_dev, u32 state);
+	int (*sas_diag_start_end_req)(struct pm8001_hba_info *pm8001_ha,
+		u32 state);
+	int (*sas_diag_execute_req)(struct pm8001_hba_info *pm8001_ha,
+		u32 state);
+};
+
+struct pm8001_chip_info {
+	u32	n_phy;
+	const struct pm8001_dispatch	*dispatch;
+};
+#define PM8001_CHIP_DISP	(pm8001_ha->chip->dispatch)
+
+struct pm8001_port {
+	struct asd_sas_port	sas_port;
+};
+
+struct pm8001_phy {
+	struct pm8001_hba_info	*pm8001_ha;
+	struct pm8001_port	*port;
+	struct asd_sas_phy	sas_phy;
+	struct sas_identify	identify;
+	struct scsi_device	*sdev;
+	u64			dev_sas_addr;
+	u32			phy_type;
+	struct completion	*enable_completion;
+	u32			frame_rcvd_size;
+	u8			frame_rcvd[32];
+	u8			phy_attached;
+	u8			phy_state;
+	enum sas_linkrate	minimum_linkrate;
+	enum sas_linkrate	maximum_linkrate;
+};
+
+struct pm8001_device {
+	enum sas_dev_type	dev_type;
+	struct domain_device	*sas_device;
+	u32			attached_phy;
+	u32			id;
+	struct completion	*dcompletion;
+	struct completion	*setds_completion;
+	u32			device_id;
+	u32			running_req;
+};
+
+struct pm8001_prd_imt {
+	__le32			len;
+	__le32			e;
+};
+
+struct pm8001_prd {
+	__le64			addr;		/* 64-bit buffer address */
+	struct pm8001_prd_imt	im_len;		/* 64-bit length */
+} __attribute__ ((packed));
+/*
+ * CCB(Command Control Block)
+ */
+struct pm8001_ccb_info {
+	struct list_head	entry;
+	struct sas_task		*task;
+	u32			n_elem;
+	u32			ccb_tag;
+	dma_addr_t		ccb_dma_handle;
+	struct pm8001_device	*device;
+	struct pm8001_prd	buf_prd[PM8001_MAX_DMA_SG];
+	struct fw_control_ex	*fw_control_context;
+};
+
+struct mpi_mem {
+	void			*virt_ptr;
+	dma_addr_t		phys_addr;
+	u32			phys_addr_hi;
+	u32			phys_addr_lo;
+	u32			total_len;
+	u32			num_elements;
+	u32			element_size;
+	u32			alignment;
+};
+
+struct mpi_mem_req {
+	/* The number of element in the  mpiMemory array */
+	u32			count;
+	/* The array of structures that define memroy regions*/
+	struct mpi_mem		region[USI_MAX_MEMCNT];
+};
+
+struct main_cfg_table {
+	u32			signature;
+	u32			interface_rev;
+	u32			firmware_rev;
+	u32			max_out_io;
+	u32			max_sgl;
+	u32			ctrl_cap_flag;
+	u32			gst_offset;
+	u32			inbound_queue_offset;
+	u32			outbound_queue_offset;
+	u32			inbound_q_nppd_hppd;
+	u32			outbound_hw_event_pid0_3;
+	u32			outbound_hw_event_pid4_7;
+	u32			outbound_ncq_event_pid0_3;
+	u32			outbound_ncq_event_pid4_7;
+	u32			outbound_tgt_ITNexus_event_pid0_3;
+	u32			outbound_tgt_ITNexus_event_pid4_7;
+	u32			outbound_tgt_ssp_event_pid0_3;
+	u32			outbound_tgt_ssp_event_pid4_7;
+	u32			outbound_tgt_smp_event_pid0_3;
+	u32			outbound_tgt_smp_event_pid4_7;
+	u32			upper_event_log_addr;
+	u32			lower_event_log_addr;
+	u32			event_log_size;
+	u32			event_log_option;
+	u32			upper_iop_event_log_addr;
+	u32			lower_iop_event_log_addr;
+	u32			iop_event_log_size;
+	u32			iop_event_log_option;
+	u32			fatal_err_interrupt;
+	u32			fatal_err_dump_offset0;
+	u32			fatal_err_dump_length0;
+	u32			fatal_err_dump_offset1;
+	u32			fatal_err_dump_length1;
+	u32			hda_mode_flag;
+	u32			anolog_setup_table_offset;
+};
+struct general_status_table {
+	u32			gst_len_mpistate;
+	u32			iq_freeze_state0;
+	u32			iq_freeze_state1;
+	u32			msgu_tcnt;
+	u32			iop_tcnt;
+	u32			reserved;
+	u32			phy_state[8];
+	u32			reserved1;
+	u32			reserved2;
+	u32			reserved3;
+	u32			recover_err_info[8];
+};
+struct inbound_queue_table {
+	u32			element_pri_size_cnt;
+	u32			upper_base_addr;
+	u32			lower_base_addr;
+	u32			ci_upper_base_addr;
+	u32			ci_lower_base_addr;
+	u32			pi_pci_bar;
+	u32			pi_offset;
+	u32			total_length;
+	void			*base_virt;
+	void			*ci_virt;
+	u32			reserved;
+	__le32			consumer_index;
+	u32			producer_idx;
+};
+struct outbound_queue_table {
+	u32			element_size_cnt;
+	u32			upper_base_addr;
+	u32			lower_base_addr;
+	void			*base_virt;
+	u32			pi_upper_base_addr;
+	u32			pi_lower_base_addr;
+	u32			ci_pci_bar;
+	u32			ci_offset;
+	u32			total_length;
+	void			*pi_virt;
+	u32			interrup_vec_cnt_delay;
+	u32			dinterrup_to_pci_offset;
+	__le32			producer_index;
+	u32			consumer_idx;
+};
+struct pm8001_hba_memspace {
+	void __iomem  		*memvirtaddr;
+	u64			membase;
+	u32			memsize;
+};
+struct pm8001_hba_info {
+	char			name[PM8001_NAME_LENGTH];
+	struct list_head	list;
+	unsigned long		flags;
+	spinlock_t		lock;/* host-wide lock */
+	struct pci_dev		*pdev;/* our device */
+	struct device		*dev;
+	struct pm8001_hba_memspace io_mem[6];
+	struct mpi_mem_req	memoryMap;
+	void __iomem	*msg_unit_tbl_addr;/*Message Unit Table Addr*/
+	void __iomem	*main_cfg_tbl_addr;/*Main Config Table Addr*/
+	void __iomem	*general_stat_tbl_addr;/*General Status Table Addr*/
+	void __iomem	*inbnd_q_tbl_addr;/*Inbound Queue Config Table Addr*/
+	void __iomem	*outbnd_q_tbl_addr;/*Outbound Queue Config Table Addr*/
+	struct main_cfg_table	main_cfg_tbl;
+	struct general_status_table	gs_tbl;
+	struct inbound_queue_table	inbnd_q_tbl[PM8001_MAX_INB_NUM];
+	struct outbound_queue_table	outbnd_q_tbl[PM8001_MAX_OUTB_NUM];
+	u8			sas_addr[SAS_ADDR_SIZE];
+	struct sas_ha_struct	*sas;/* SCSI/SAS glue */
+	struct Scsi_Host	*shost;
+	u32			chip_id;
+	const struct pm8001_chip_info	*chip;
+	struct completion	*nvmd_completion;
+	int			tags_num;
+	unsigned long		*tags;
+	struct pm8001_phy	phy[PM8001_MAX_PHYS];
+	struct pm8001_port	port[PM8001_MAX_PHYS];
+	u32			id;
+	u32			irq;
+	struct pm8001_device	*devices;
+	struct pm8001_ccb_info	*ccb_info;
+#ifdef PM8001_USE_MSIX
+	struct msix_entry	msix_entries[16];/*for msi-x interrupt*/
+	int			number_of_intr;/*will be used in remove()*/
+#endif
+#ifdef PM8001_USE_TASKLET
+	struct tasklet_struct	tasklet;
+#endif
+	struct list_head 	wq_list;
+	u32			logging_level;
+	u32			fw_status;
+	const struct firmware 	*fw_image;
+};
+
+struct pm8001_wq {
+	struct delayed_work work_q;
+	struct pm8001_hba_info *pm8001_ha;
+	void *data;
+	int handler;
+	struct list_head entry;
+};
+
+struct pm8001_fw_image_header {
+	u8 vender_id[8];
+	u8 product_id;
+	u8 hardware_rev;
+	u8 dest_partition;
+	u8 reserved;
+	u8 fw_rev[4];
+	__be32  image_length;
+	__be32 image_crc;
+	__be32 startup_entry;
+} __attribute__((packed, aligned(4)));
+
+/* define task management IU */
+struct pm8001_tmf_task {
+	u8	tmf;
+	u32	tag_of_task_to_be_managed;
+};
+/**
+ * FW Flash Update status values
+ */
+#define FLASH_UPDATE_COMPLETE_PENDING_REBOOT	0x00
+#define FLASH_UPDATE_IN_PROGRESS		0x01
+#define FLASH_UPDATE_HDR_ERR			0x02
+#define FLASH_UPDATE_OFFSET_ERR			0x03
+#define FLASH_UPDATE_CRC_ERR			0x04
+#define FLASH_UPDATE_LENGTH_ERR			0x05
+#define FLASH_UPDATE_HW_ERR			0x06
+#define FLASH_UPDATE_DNLD_NOT_SUPPORTED		0x10
+#define FLASH_UPDATE_DISABLED			0x11
+
+/**
+ * brief param structure for firmware flash update.
+ */
+struct fw_flash_updata_info {
+	u32			cur_image_offset;
+	u32			cur_image_len;
+	u32			total_image_len;
+	struct pm8001_prd	sgl;
+};
+
+struct fw_control_info {
+	u32			retcode;/*ret code (status)*/
+	u32			phase;/*ret code phase*/
+	u32			phaseCmplt;/*percent complete for the current
+	update phase */
+	u32			version;/*Hex encoded firmware version number*/
+	u32			offset;/*Used for downloading firmware	*/
+	u32			len; /*len of buffer*/
+	u32			size;/* Used in OS VPD and Trace get size
+	operations.*/
+	u32			reserved;/* padding required for 64 bit
+	alignment */
+	u8			buffer[1];/* Start of buffer */
+};
+struct fw_control_ex {
+	struct fw_control_info *fw_control;
+	void			*buffer;/* keep buffer pointer to be
+	freed when the responce comes*/
+	void			*virtAddr;/* keep virtual address of the data */
+	void			*usrAddr;/* keep virtual address of the
+	user data */
+	dma_addr_t		phys_addr;
+	u32			len; /* len of buffer  */
+	void			*payload; /* pointer to IOCTL Payload */
+	u8			inProgress;/*if 1 - the IOCTL request is in
+	progress */
+	void			*param1;
+	void			*param2;
+	void			*param3;
+};
+
+/******************** function prototype *********************/
+int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out);
+void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha);
+u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag);
+void pm8001_ccb_free(struct pm8001_hba_info *pm8001_ha, u32 ccb_idx);
+void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha,
+	struct sas_task *task, struct pm8001_ccb_info *ccb, u32 ccb_idx);
+int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+	void *funcdata);
+int pm8001_slave_alloc(struct scsi_device *scsi_dev);
+int pm8001_slave_configure(struct scsi_device *sdev);
+void pm8001_scan_start(struct Scsi_Host *shost);
+int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time);
+int pm8001_queue_command(struct sas_task *task, const int num,
+	gfp_t gfp_flags);
+int pm8001_abort_task(struct sas_task *task);
+int pm8001_abort_task_set(struct domain_device *dev, u8 *lun);
+int pm8001_clear_aca(struct domain_device *dev, u8 *lun);
+int pm8001_clear_task_set(struct domain_device *dev, u8 *lun);
+int pm8001_dev_found(struct domain_device *dev);
+void pm8001_dev_gone(struct domain_device *dev);
+int pm8001_lu_reset(struct domain_device *dev, u8 *lun);
+int pm8001_I_T_nexus_reset(struct domain_device *dev);
+int pm8001_query_task(struct sas_task *task);
+int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr,
+	dma_addr_t *pphys_addr, u32 *pphys_addr_hi, u32 *pphys_addr_lo,
+	u32 mem_size, u32 align);
+
+
+/* ctl shared API */
+extern struct device_attribute *pm8001_host_attrs[];
+
+#endif
+
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b0f0f3851cd4..161fadb291d1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1586,6 +1586,8 @@
 #define PCI_VENDOR_ID_COMPEX		0x11f6
 #define PCI_DEVICE_ID_COMPEX_ENET100VG4	0x0112
 
+#define PCI_VENDOR_ID_PMC_Sierra	0x11f8
+
 #define PCI_VENDOR_ID_RP		0x11fe
 #define PCI_DEVICE_ID_RP32INTF		0x0001
 #define PCI_DEVICE_ID_RP8INTF		0x0002
-- 
cgit v1.2.3


From e881a172dac4d9ea3b2a1540041d872963c269bd Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 15 Oct 2009 17:46:39 -0700
Subject: [SCSI] modify change_queue_depth to take in reason why it is being
 called

This patch modifies scsi_host_template->change_queue_depth so that
it takes an argument indicating why it is being called. This will be
used so that if a LLD needs to do some extra processing when
handling queue fulls or later ramp ups, it can do so.

This is a simple port of the drivers setting a change_queue_depth
callback. In the patch I just have these LLDs adjust the queue depth
if the user was requesting it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>

[Vasu.Dev: v2
	Also converted pmcraid_change_queue_depth and then verified
all modules compile  using "make allmodconfig" for any new build
warnings on X86_64.

	Updated original description after combing two original
patches from Mike to make this patch git bisectable.]
Signed-off-by: Vasu Dev <vasu.dev@intel.com>
[jejb: fixed up 53c700]
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/ata/libata-scsi.c             |  7 ++++++-
 drivers/ata/sata_nv.c                 |  2 +-
 drivers/message/fusion/mptscsih.c     |  9 +++++++--
 drivers/message/fusion/mptscsih.h     |  3 ++-
 drivers/s390/scsi/zfcp_scsi.c         |  6 +++++-
 drivers/scsi/3w-9xxx.c                |  6 +++++-
 drivers/scsi/3w-xxxx.c                |  6 +++++-
 drivers/scsi/53c700.c                 |  7 +++++--
 drivers/scsi/aacraid/linit.c          |  6 +++++-
 drivers/scsi/arcmsr/arcmsr_hba.c      |  5 ++++-
 drivers/scsi/hptiop.c                 |  5 ++++-
 drivers/scsi/ibmvscsi/ibmvfc.c        |  7 ++++++-
 drivers/scsi/ibmvscsi/ibmvscsi.c      |  7 ++++++-
 drivers/scsi/ipr.c                    |  7 ++++++-
 drivers/scsi/libfc/fc_fcp.c           |  5 ++++-
 drivers/scsi/libiscsi.c               |  5 ++++-
 drivers/scsi/libsas/sas_scsi_host.c   |  6 +++++-
 drivers/scsi/megaraid/megaraid_mbox.c |  7 ++++++-
 drivers/scsi/mpt2sas/mpt2sas_scsih.c  | 10 +++++++---
 drivers/scsi/pmcraid.c                |  7 ++++++-
 drivers/scsi/qla2xxx/qla_os.c         |  7 +++++--
 drivers/scsi/scsi_sysfs.c             |  3 ++-
 include/linux/libata.h                |  2 +-
 include/scsi/libfc.h                  |  2 +-
 include/scsi/libiscsi.h               |  3 ++-
 include/scsi/libsas.h                 |  3 ++-
 include/scsi/scsi_host.h              |  8 +++++++-
 27 files changed, 119 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b4ee28dec521..5d52c2fcd076 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1208,6 +1208,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
  *	ata_scsi_change_queue_depth - SCSI callback for queue depth config
  *	@sdev: SCSI device to configure queue depth for
  *	@queue_depth: new queue depth
+ *	@reason: calling context
  *
  *	This is libata standard hostt->change_queue_depth callback.
  *	SCSI will call into this callback when user tries to set queue
@@ -1219,12 +1220,16 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
  *	RETURNS:
  *	Newly configured queue depth.
  */
-int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
+int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth,
+				int reason)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
 	struct ata_device *dev;
 	unsigned long flags;
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (queue_depth < 1 || queue_depth == sdev->queue_depth)
 		return sdev->queue_depth;
 
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 1eb4e020eb5c..0c82d335c55d 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1975,7 +1975,7 @@ static int nv_swncq_slave_config(struct scsi_device *sdev)
 	ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 
 	if (strncmp(model_num, "Maxtor", 6) == 0) {
-		ata_scsi_change_queue_depth(sdev, 1);
+		ata_scsi_change_queue_depth(sdev, 1, SCSI_QDEPTH_DEFAULT);
 		ata_dev_printk(dev, KERN_NOTICE,
 			"Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth);
 	}
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index f68ec48a881e..57752751712b 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2351,11 +2351,12 @@ mptscsih_slave_destroy(struct scsi_device *sdev)
  *	mptscsih_change_queue_depth - This function will set a devices queue depth
  *	@sdev: per scsi_device pointer
  *	@qdepth: requested queue depth
+ *	@reason: calling context
  *
  *	Adding support for new 'change_queue_depth' api.
 */
 int
-mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
+mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 {
 	MPT_SCSI_HOST		*hd = shost_priv(sdev->host);
 	VirtTarget 		*vtarget;
@@ -2367,6 +2368,9 @@ mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 	starget = scsi_target(sdev);
 	vtarget = starget->hostdata;
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (ioc->bus_type == SPI) {
 		if (!(vtarget->tflags & MPT_TARGET_FLAGS_Q_YES))
 			max_depth = 1;
@@ -2433,7 +2437,8 @@ mptscsih_slave_configure(struct scsi_device *sdev)
 		    ioc->name, vtarget->negoFlags, vtarget->maxOffset,
 		    vtarget->minSyncFactor));
 
-	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
+	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH,
+				    SCSI_QDEPTH_DEFAULT);
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"tagged %d, simple %d, ordered %d\n",
 		ioc->name,sdev->tagged_supported, sdev->simple_tags,
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index e0b33e04a33b..45a5ff3eff61 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -128,7 +128,8 @@ extern int mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_F
 extern int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r);
 extern int mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
-extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth);
+extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth,
+				       int reason);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 0e1a34627a2e..ad1154701729 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -29,8 +29,12 @@ char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *fcp_rsp_iu)
 	return fcp_sns_info_ptr;
 }
 
-static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth)
+static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth,
+					int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
 	return sdev->queue_depth;
 }
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 36c21b19e5d7..2d16d49fd3cd 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -186,8 +186,12 @@ static ssize_t twa_show_stats(struct device *dev,
 } /* End twa_show_stats() */
 
 /* This function will set a devices queue depth */
-static int twa_change_queue_depth(struct scsi_device *sdev, int queue_depth)
+static int twa_change_queue_depth(struct scsi_device *sdev, int queue_depth,
+				  int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (queue_depth > TW_Q_LENGTH-2)
 		queue_depth = TW_Q_LENGTH-2;
 	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index faa0fcfed71e..d224294c38fb 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -521,8 +521,12 @@ static ssize_t tw_show_stats(struct device *dev, struct device_attribute *attr,
 } /* End tw_show_stats() */
 
 /* This function will set a devices queue depth */
-static int tw_change_queue_depth(struct scsi_device *sdev, int queue_depth)
+static int tw_change_queue_depth(struct scsi_device *sdev, int queue_depth,
+				 int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (queue_depth > TW_Q_LENGTH-2)
 		queue_depth = TW_Q_LENGTH-2;
 	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index f5a9addb7050..6c60a8060c58 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -175,7 +175,7 @@ STATIC void NCR_700_chip_reset(struct Scsi_Host *host);
 STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt);
 STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt);
 STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt);
-static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth);
+static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth, int reason);
 static int NCR_700_change_queue_type(struct scsi_device *SDpnt, int depth);
 
 STATIC struct device_attribute *NCR_700_dev_attrs[];
@@ -2082,8 +2082,11 @@ NCR_700_slave_destroy(struct scsi_device *SDp)
 }
 
 static int
-NCR_700_change_queue_depth(struct scsi_device *SDp, int depth)
+NCR_700_change_queue_depth(struct scsi_device *SDp, int depth, int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (depth > NCR_700_MAX_TAGS)
 		depth = NCR_700_MAX_TAGS;
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 9b97c3e016fe..e9373a2d14fa 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -472,8 +472,12 @@ static int aac_slave_configure(struct scsi_device *sdev)
  *	total capacity and the queue depth supported by the target device.
  */
 
-static int aac_change_queue_depth(struct scsi_device *sdev, int depth)
+static int aac_change_queue_depth(struct scsi_device *sdev, int depth,
+				  int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (sdev->tagged_supported && (sdev->type == TYPE_DISK) &&
 	    (sdev_channel(sdev) == CONTAINER_CHANNEL)) {
 		struct scsi_device * dev;
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 80aac01b5a6f..47d5d19f8c92 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -98,8 +98,11 @@ static void arcmsr_flush_hbb_cache(struct AdapterControlBlock *acb);
 static const char *arcmsr_info(struct Scsi_Host *);
 static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
 static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev,
-								int queue_depth)
+					  int queue_depth, int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
 		queue_depth = ARCMSR_MAX_CMD_PERLUN;
 	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index a0e7e711ff9d..901a3daeb36b 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -861,10 +861,13 @@ static int hptiop_reset(struct scsi_cmnd *scp)
 }
 
 static int hptiop_adjust_disk_queue_depth(struct scsi_device *sdev,
-						int queue_depth)
+					  int queue_depth, int reason)
 {
 	struct hptiop_hba *hba = (struct hptiop_hba *)sdev->host->hostdata;
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (queue_depth > hba->max_requests)
 		queue_depth = hba->max_requests;
 	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index bc9beb8c587c..87b536a97cb4 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2764,12 +2764,17 @@ static int ibmvfc_slave_configure(struct scsi_device *sdev)
  * ibmvfc_change_queue_depth - Change the device's queue depth
  * @sdev:	scsi device struct
  * @qdepth:	depth to set
+ * @reason:	calling context
  *
  * Return value:
  * 	actual depth set
  **/
-static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth)
+static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth,
+				     int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (qdepth > IBMVFC_MAX_CMDS_PER_LUN)
 		qdepth = IBMVFC_MAX_CMDS_PER_LUN;
 
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index d9b0e9d31983..e475b7957c2d 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1637,12 +1637,17 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev)
  * ibmvscsi_change_queue_depth - Change the device's queue depth
  * @sdev:	scsi device struct
  * @qdepth:	depth to set
+ * @reason:	calling context
  *
  * Return value:
  * 	actual depth set
  **/
-static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
+static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth,
+				       int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN)
 		qdepth = IBMVSCSI_MAX_CMDS_PER_LUN;
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 5f045505a1f4..d40d5c79fff1 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3367,16 +3367,21 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) { return 0; };
  * ipr_change_queue_depth - Change the device's queue depth
  * @sdev:	scsi device struct
  * @qdepth:	depth to set
+ * @reason:	calling context
  *
  * Return value:
  * 	actual depth set
  **/
-static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth)
+static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth,
+				  int reason)
 {
 	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
 	struct ipr_resource_entry *res;
 	unsigned long lock_flags = 0;
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	res = (struct ipr_resource_entry *)sdev->hostdata;
 
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index a67f53a5026c..beaab818d8de 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -2064,8 +2064,11 @@ int fc_slave_alloc(struct scsi_device *sdev)
 }
 EXPORT_SYMBOL(fc_slave_alloc);
 
-int fc_change_queue_depth(struct scsi_device *sdev, int qdepth)
+int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
 	return sdev->queue_depth;
 }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index f1a4246f890c..67d0f3fc8ac0 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1643,8 +1643,11 @@ fault:
 }
 EXPORT_SYMBOL_GPL(iscsi_queuecommand);
 
-int iscsi_change_queue_depth(struct scsi_device *sdev, int depth)
+int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
 	return sdev->queue_depth;
 }
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 1c558d3bce18..14b13196b22d 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -820,10 +820,14 @@ void sas_slave_destroy(struct scsi_device *scsi_dev)
 		ata_port_disable(dev->sata_dev.ap);
 }
 
-int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth)
+int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth,
+			   int reason)
 {
 	int res = min(new_depth, SAS_MAX_QD);
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (scsi_dev->tagged_supported)
 		scsi_adjust_queue_depth(scsi_dev, scsi_get_tag_type(scsi_dev),
 					res);
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 234f0b7eb21c..fd181c2a8ae4 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -335,12 +335,17 @@ static struct device_attribute *megaraid_sdev_attrs[] = {
  * megaraid_change_queue_depth - Change the device's queue depth
  * @sdev:	scsi device struct
  * @qdepth:	depth to set
+ * @reason:	calling context
  *
  * Return value:
  * 	actual depth set
  */
-static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth)
+static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth,
+				       int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (qdepth > MBOX_MAX_SCSI_CMDS)
 		qdepth = MBOX_MAX_SCSI_CMDS;
 	scsi_adjust_queue_depth(sdev, 0, qdepth);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 8dc682f00fd2..55ee014a7e08 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -1099,11 +1099,12 @@ _scsih_build_scatter_gather(struct MPT2SAS_ADAPTER *ioc,
  * _scsih_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
+ * @reason: calling context
  *
  * Returns queue depth.
  */
 static int
-_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
+_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 {
 	struct Scsi_Host *shost = sdev->host;
 	int max_depth;
@@ -1114,6 +1115,9 @@ _scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 	struct _sas_device *sas_device;
 	unsigned long flags;
 
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	max_depth = shost->can_queue;
 
 	/* limit max device queue for SATA to 32 */
@@ -1569,7 +1573,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 		    r_level, raid_device->handle,
 		    (unsigned long long)raid_device->wwid,
 		    raid_device->num_pds, ds);
-		_scsih_change_queue_depth(sdev, qdepth);
+		_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
 		return 0;
 	}
 
@@ -1615,7 +1619,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 			_scsih_display_sata_capabilities(ioc, sas_device, sdev);
 	}
 
-	_scsih_change_queue_depth(sdev, qdepth);
+	_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
 
 	if (ssp_target)
 		sas_read_port_mode_page(sdev);
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index f7c70e2a8224..86d158ee3572 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -278,12 +278,17 @@ static void pmcraid_slave_destroy(struct scsi_device *scsi_dev)
  * pmcraid_change_queue_depth - Change the device's queue depth
  * @scsi_dev: scsi device struct
  * @depth: depth to set
+ * @reason: calling context
  *
  * Return value
  * 	actual depth set
  */
-static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth)
+static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth,
+				      int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	if (depth > PMCRAID_MAX_CMD_PER_LUN)
 		depth = PMCRAID_MAX_CMD_PER_LUN;
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index ecf2a40d70be..d69744a62fe4 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -138,7 +138,7 @@ static int qla2xxx_eh_target_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_bus_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_host_reset(struct scsi_cmnd *);
 
-static int qla2x00_change_queue_depth(struct scsi_device *, int);
+static int qla2x00_change_queue_depth(struct scsi_device *, int, int);
 static int qla2x00_change_queue_type(struct scsi_device *, int);
 
 struct scsi_host_template qla2xxx_driver_template = {
@@ -1235,8 +1235,11 @@ qla2xxx_slave_destroy(struct scsi_device *sdev)
 }
 
 static int
-qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth)
+qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 {
+	if (reason != SCSI_QDEPTH_DEFAULT)
+		return -EOPNOTSUPP;
+
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
 	return sdev->queue_depth;
 }
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 5c7eb63a19d1..a48782866b22 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -766,7 +766,8 @@ sdev_store_queue_depth_rw(struct device *dev, struct device_attribute *attr,
 	if (depth < 1)
 		return -EINVAL;
 
-	retval = sht->change_queue_depth(sdev, depth);
+	retval = sht->change_queue_depth(sdev, depth,
+					 SCSI_QDEPTH_DEFAULT);
 	if (retval < 0)
 		return retval;
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 87698640c091..85df383fd4bd 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1023,7 +1023,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev,
 extern int ata_scsi_slave_config(struct scsi_device *sdev);
 extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
 extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
-				       int queue_depth);
+				       int queue_depth, int reason);
 extern struct ata_device *ata_dev_pair(struct ata_device *adev);
 extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 1662d73d85a7..9617f9365e45 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -919,7 +919,7 @@ int fc_slave_alloc(struct scsi_device *sdev);
 /*
  * Adjust the queue depth.
  */
-int fc_change_queue_depth(struct scsi_device *sdev, int qdepth);
+int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason);
 
 /*
  * Change the tag type.
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index a72edd4eceec..2db2bc26b1e9 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -333,7 +333,8 @@ struct iscsi_host {
 /*
  * scsi host template
  */
-extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth);
+extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth,
+				    int reason);
 extern int iscsi_eh_abort(struct scsi_cmnd *sc);
 extern int iscsi_eh_target_reset(struct scsi_cmnd *sc);
 extern int iscsi_eh_device_reset(struct scsi_cmnd *sc);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index e78d3b62d8ec..9eaa3f05f954 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -634,7 +634,8 @@ extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_alloc(struct scsi_device *);
 extern int sas_slave_configure(struct scsi_device *);
 extern void sas_slave_destroy(struct scsi_device *);
-extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
+extern int sas_change_queue_depth(struct scsi_device *, int new_depth,
+				  int reason);
 extern int sas_change_queue_type(struct scsi_device *, int qt);
 extern int sas_bios_param(struct scsi_device *,
 			  struct block_device *,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 6e728b176904..603054d8f40c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -43,6 +43,12 @@ struct blk_queue_tags;
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
+enum {
+	SCSI_QDEPTH_DEFAULT,	/* default requested change, e.g. from sysfs */
+	SCSI_QDEPTH_QFULL,	/* scsi-ml requested due to queue full */
+	SCSI_QDEPTH_RAMP_UP,	/* scsi-ml requested due to threshhold event */
+};
+
 struct scsi_host_template {
 	struct module *module;
 	const char *name;
@@ -294,7 +300,7 @@ struct scsi_host_template {
 	 *
 	 * Status: OPTIONAL
 	 */
-	int (* change_queue_depth)(struct scsi_device *, int);
+	int (* change_queue_depth)(struct scsi_device *, int, int);
 
 	/*
 	 * Fill in this function to allow the changing of tag types
-- 
cgit v1.2.3


From ea028ac92541ac30bf202ed94cb53eec2ea0c9d6 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 4 Dec 2009 15:55:38 -0500
Subject: nfs41: nfs41: fix state manager deadlock in session reset

If the session is reset during state recovery, the state manager thread can
sleep on the slot_tbl_waitq causing a deadlock.

Add a completion framework to the session.  Have the state manager thread set
a new session state (NFS4CLNT_SESSION_DRAINING) and wait for the session slot
table to drain.

Signal the state manager thread in nfs41_sequence_free_slot when the
NFS4CLNT_SESSION_DRAINING bit is set and the session is drained.

Reported-by: Trond Myklebust <trond@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  1 +
 fs/nfs/nfs4proc.c         | 26 +++++++++++++++++---------
 fs/nfs/nfs4state.c        | 15 +++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e9ecd6bf9257..5c7740178a08 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -45,6 +45,7 @@ enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_SESSION_RESET,
+	NFS4CLNT_SESSION_DRAINING,
 };
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fb62919f7f2c..c1bc9cad5e85 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -361,6 +361,16 @@ void nfs41_sequence_free_slot(const struct nfs_client *clp,
 	}
 	nfs4_free_slot(tbl, res->sr_slotid);
 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+
+	/* Signal state manager thread if session is drained */
+	if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
+		spin_lock(&tbl->slot_tbl_lock);
+		if (tbl->highest_used_slotid == -1) {
+			dprintk("%s COMPLETE: Session Drained\n", __func__);
+			complete(&clp->cl_session->complete);
+		}
+		spin_unlock(&tbl->slot_tbl_lock);
+	}
 }
 
 static void nfs41_sequence_done(struct nfs_client *clp,
@@ -457,15 +467,11 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 
 	spin_lock(&tbl->slot_tbl_lock);
 	if (test_bit(NFS4CLNT_SESSION_RESET, &session->clp->cl_state)) {
-		if (tbl->highest_used_slotid != -1) {
-			rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-			spin_unlock(&tbl->slot_tbl_lock);
-			dprintk("<-- %s: Session reset: draining\n", __func__);
-			return -EAGAIN;
-		}
-
-		/* The slot table is empty; start the reset thread */
-		dprintk("%s Session Reset\n", __func__);
+		/*
+		 * The state manager will wait until the slot table is empty.
+		 * Schedule the reset thread
+		 */
+		dprintk("%s Schedule Session Reset\n", __func__);
 		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
 		nfs4_schedule_state_manager(session->clp);
 		spin_unlock(&tbl->slot_tbl_lock);
@@ -4506,6 +4512,7 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session)
 			1);
 	if (status)
 		return status;
+	init_completion(&session->complete);
 
 	status = nfs4_reset_slot_table(&session->bc_slot_table,
 			session->bc_attrs.max_reqs,
@@ -4608,6 +4615,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	 * nfs_client struct
 	 */
 	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+	init_completion(&session->complete);
 
 	tbl = &session->fc_slot_table;
 	spin_lock_init(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f56f6be5e314..3c1433598b60 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1181,8 +1181,23 @@ static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err)
 
 static int nfs4_reset_session(struct nfs_client *clp)
 {
+	struct nfs4_session *ses = clp->cl_session;
+	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
 	int status;
 
+	INIT_COMPLETION(ses->complete);
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->highest_used_slotid != -1) {
+		set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
+		spin_unlock(&tbl->slot_tbl_lock);
+		status = wait_for_completion_interruptible(&ses->complete);
+		clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
+		if (status) /* -ERESTARTSYS */
+			goto out;
+	} else {
+		spin_unlock(&tbl->slot_tbl_lock);
+	}
+
 	status = nfs4_proc_destroy_session(clp->cl_session);
 	if (status && status != -NFS4ERR_BADSESSION &&
 	    status != -NFS4ERR_DEADSESSION) {
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 320569eabe3b..34fc6be5bfcf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -209,6 +209,7 @@ struct nfs4_session {
 	unsigned long			session_state;
 	u32				hash_alg;
 	u32				ssv_len;
+	struct completion		complete;
 
 	/* The fore and back channel */
 	struct nfs4_channel_attrs	fc_attrs;
-- 
cgit v1.2.3


From 6070d81eb5f2d4943223c96e7609a53cdc984364 Mon Sep 17 00:00:00 2001
From: Adam Buchbinder <adam.buchbinder@gmail.com>
Date: Fri, 4 Dec 2009 15:47:01 -0500
Subject: tree-wide: fix misspelling of "definition" in comments

"Definition" is misspelled "defintion" in several comments; this
patch fixes them. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/arm/mach-bcmring/include/csp/reg.h          |  2 +-
 arch/arm/mach-bcmring/include/mach/csp/mm_addr.h |  2 +-
 arch/arm/mach-u300/include/mach/u300-regs.h      |  2 +-
 arch/sh/include/mach-common/mach/titan.h         |  2 +-
 arch/x86/include/asm/sigcontext.h                |  4 ++--
 drivers/block/cciss_cmd.h                        |  2 +-
 drivers/gpu/drm/radeon/atombios.h                | 16 ++++++++--------
 drivers/media/video/cx18/cx18-av-core.h          |  2 +-
 drivers/media/video/cx18/cx18-mailbox.h          |  2 +-
 drivers/net/wireless/ath/ath5k/base.h            |  2 +-
 drivers/s390/net/qeth_core_mpc.h                 |  2 +-
 drivers/scsi/advansys.c                          |  2 +-
 drivers/scsi/aic94xx/aic94xx_reg_def.h           |  2 +-
 drivers/scsi/bfa/include/protocol/ct.h           |  4 ++--
 drivers/scsi/dmx3191d.c                          |  2 +-
 drivers/scsi/wd7000.c                            |  2 +-
 drivers/staging/otus/80211core/pub_zfi.h         |  2 +-
 drivers/staging/otus/zdcompat.h                  |  2 +-
 drivers/staging/rtl8192su/r8192S_phyreg.h        |  2 +-
 fs/cifs/cifspdu.h                                |  2 +-
 fs/compat_ioctl.c                                |  2 +-
 include/linux/cciss_ioctl.h                      |  2 +-
 include/linux/in6.h                              |  2 +-
 include/linux/usb/wusb.h                         |  2 +-
 24 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-bcmring/include/csp/reg.h b/arch/arm/mach-bcmring/include/csp/reg.h
index e5f60bf5a1f3..56654d23c3d7 100644
--- a/arch/arm/mach-bcmring/include/csp/reg.h
+++ b/arch/arm/mach-bcmring/include/csp/reg.h
@@ -16,7 +16,7 @@
 /**
 *  @file    reg.h
 *
-*  @brief   Generic register defintions used in CSP
+*  @brief   Generic register definitions used in CSP
 */
 /****************************************************************************/
 
diff --git a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h
index 86bb58d4f58c..ad58cf873377 100644
--- a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h
+++ b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h
@@ -16,7 +16,7 @@
 /**
 *  @file    mm_addr.h
 *
-*  @brief   Memory Map address defintions
+*  @brief   Memory Map address definitions
 *
 *  @note
 *     None
diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h
index 88333dfb19fc..56721a0cd2af 100644
--- a/arch/arm/mach-u300/include/mach/u300-regs.h
+++ b/arch/arm/mach-u300/include/mach/u300-regs.h
@@ -6,7 +6,7 @@
  * Copyright (C) 2006-2009 ST-Ericsson AB
  * License terms: GNU General Public License (GPL) version 2
  * Basic register address definitions in physical memory and
- * some block defintions for core devices like the timer.
+ * some block definitions for core devices like the timer.
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
 
diff --git a/arch/sh/include/mach-common/mach/titan.h b/arch/sh/include/mach-common/mach/titan.h
index 03f3583c8918..4a674d27cbb8 100644
--- a/arch/sh/include/mach-common/mach/titan.h
+++ b/arch/sh/include/mach-common/mach/titan.h
@@ -1,5 +1,5 @@
 /*
- * Platform defintions for Titan
+ * Platform definitions for Titan
  */
 #ifndef _ASM_SH_TITAN_H
 #define _ASM_SH_TITAN_H
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 72e5a4491661..04459d25e66e 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -124,7 +124,7 @@ struct sigcontext {
 	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
 	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
 	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the defintion of
+	 * of extended memory layout. See comments at the definition of
 	 * (struct _fpx_sw_bytes)
 	 */
 	void __user *fpstate;		/* zero when no FPU/extended context */
@@ -219,7 +219,7 @@ struct sigcontext {
 	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
 	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
 	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the defintion of
+	 * of extended memory layout. See comments at the definition of
 	 * (struct _fpx_sw_bytes)
 	 */
 	void __user *fpstate;		/* zero when no FPU/extended context */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index dbaed1ea0da3..8098fccdbec4 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -5,7 +5,7 @@
 //###########################################################################
 #define CISS_VERSION "1.00"
 
-//general boundary defintions
+//general boundary definitions
 #define SENSEINFOBYTES          32//note that this value may vary between host implementations
 #define MAXSGENTRIES            31
 #define MAXREPLYQS              256
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 5d402086bc47..811178907413 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -1141,7 +1141,7 @@ typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS {
 /* ucTableFormatRevision=1,ucTableContentRevision=2 */
 typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2 {
 	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucMisc;		/*  see PANEL_ENCODER_MISC_xx defintions below */
+	UCHAR ucMisc;		/*  see PANEL_ENCODER_MISC_xx definitions below */
 	UCHAR ucAction;		/*  0: turn off encoder */
 	/*  1: setup and turn on encoder */
 	UCHAR ucTruncate;	/*  bit0=0: Disable truncate */
@@ -1424,7 +1424,7 @@ typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO {
 /*  Structures used in FirmwareInfoTable */
 /****************************************************************************/
 
-/*  usBIOSCapability Defintion: */
+/*  usBIOSCapability Definition: */
 /*  Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; */
 /*  Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; */
 /*  Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; */
@@ -2386,7 +2386,7 @@ typedef struct _ATOM_ANALOG_TV_INFO_V1_2 {
 } ATOM_ANALOG_TV_INFO_V1_2;
 
 /**************************************************************************/
-/*  VRAM usage and their defintions */
+/*  VRAM usage and their definitions */
 
 /*  One chunk of VRAM used by Bios are for HWICON surfaces,EDID data. */
 /*  Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below. */
@@ -3046,7 +3046,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_DC     2
 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_LITEAC 3
 
-/* Byte aligned defintion for BIOS usage */
+/* Byte aligned definition for BIOS usage */
 #define ATOM_S0_CRT1_MONOb0             0x01
 #define ATOM_S0_CRT1_COLORb0            0x02
 #define ATOM_S0_CRT1_MASKb0             (ATOM_S0_CRT1_MONOb0+ATOM_S0_CRT1_COLORb0)
@@ -3131,7 +3131,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
 #define ATOM_S2_DISPLAY_ROTATION_DEGREE_SHIFT 30
 #define ATOM_S2_DISPLAY_ROTATION_ANGLE_MASK   0xC0000000L
 
-/* Byte aligned defintion for BIOS usage */
+/* Byte aligned definition for BIOS usage */
 #define ATOM_S2_TV1_STANDARD_MASKb0     0x0F
 #define ATOM_S2_CURRENT_BL_LEVEL_MASKb1 0xFF
 #define ATOM_S2_CRT1_DPMS_STATEb2       0x01
@@ -3190,7 +3190,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
 #define ATOM_S3_ALLOW_FAST_PWR_SWITCH   0x40000000L
 #define ATOM_S3_RQST_GPU_USE_MIN_PWR    0x80000000L
 
-/* Byte aligned defintion for BIOS usage */
+/* Byte aligned definition for BIOS usage */
 #define ATOM_S3_CRT1_ACTIVEb0           0x01
 #define ATOM_S3_LCD1_ACTIVEb0           0x02
 #define ATOM_S3_TV1_ACTIVEb0            0x04
@@ -3230,7 +3230,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
 #define ATOM_S4_LCD1_REFRESH_MASK       0x0000FF00L
 #define ATOM_S4_LCD1_REFRESH_SHIFT      8
 
-/* Byte aligned defintion for BIOS usage */
+/* Byte aligned definition for BIOS usage */
 #define ATOM_S4_LCD1_PANEL_ID_MASKb0	  0x0FF
 #define ATOM_S4_LCD1_REFRESH_MASKb1		  ATOM_S4_LCD1_PANEL_ID_MASKb0
 #define ATOM_S4_VRAM_INFO_MASKb2        ATOM_S4_LCD1_PANEL_ID_MASKb0
@@ -3310,7 +3310,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
 #define ATOM_S6_VRI_BRIGHTNESS_CHANGE       0x40000000L
 #define ATOM_S6_CONFIG_DISPLAY_CHANGE_MASK  0x80000000L
 
-/* Byte aligned defintion for BIOS usage */
+/* Byte aligned definition for BIOS usage */
 #define ATOM_S6_DEVICE_CHANGEb0         0x01
 #define ATOM_S6_SCALER_CHANGEb0         0x02
 #define ATOM_S6_LID_CHANGEb0            0x04
diff --git a/drivers/media/video/cx18/cx18-av-core.h b/drivers/media/video/cx18/cx18-av-core.h
index 9b84a0c58e0e..cafb7e99b9a0 100644
--- a/drivers/media/video/cx18/cx18-av-core.h
+++ b/drivers/media/video/cx18/cx18-av-core.h
@@ -294,7 +294,7 @@ struct cx18_av_state {
 #define CXADEC_QAM_CONST_DEC       0x924
 #define CXADEC_QAM_ROTATOR_FREQ    0x948
 
-/* Bit defintions / settings used in Mako Audio */
+/* Bit definitions / settings used in Mako Audio */
 #define CXADEC_PREF_MODE_MONO_LANGA        0
 #define CXADEC_PREF_MODE_MONO_LANGB        1
 #define CXADEC_PREF_MODE_MONO_LANGC        2
diff --git a/drivers/media/video/cx18/cx18-mailbox.h b/drivers/media/video/cx18/cx18-mailbox.h
index e23aaac5b280..522ad534034c 100644
--- a/drivers/media/video/cx18/cx18-mailbox.h
+++ b/drivers/media/video/cx18/cx18-mailbox.h
@@ -41,7 +41,7 @@ struct cx18;
 /*
  * This structure is used by CPU to provide completed buffers information
  * Its structure is dictrated by the layout of the SCB, required by the
- * firmware, but its defintion needs to be here, instead of in cx18-scb.h,
+ * firmware, but its definition needs to be here, instead of in cx18-scb.h,
  * for mailbox work order scheduling
  */
 struct cx18_mdl_ack {
diff --git a/drivers/net/wireless/ath/ath5k/base.h b/drivers/net/wireless/ath/ath5k/base.h
index c79a65044b67..4da029410467 100644
--- a/drivers/net/wireless/ath/ath5k/base.h
+++ b/drivers/net/wireless/ath/ath5k/base.h
@@ -36,7 +36,7 @@
  */
 
 /*
- * Defintions for the Atheros Wireless LAN controller driver.
+ * Definitions for the Atheros Wireless LAN controller driver.
  */
 #ifndef _DEV_ATH_ATHVAR_H
 #define _DEV_ATH_ATHVAR_H
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index eecb2ee62e85..de7bccef49f0 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -507,7 +507,7 @@ extern unsigned char ULP_ENABLE[];
 		(PDU_ENCAPSULATION(buffer) + 0x17)
 #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \
 		(PDU_ENCAPSULATION(buffer) + 0x2b)
-/* Layer 2 defintions */
+/* Layer 2 definitions */
 #define QETH_PROT_LAYER2 0x08
 #define QETH_PROT_TCPIP  0x03
 #define QETH_PROT_OSN2   0x0a
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index b756041f0b26..22626abdb630 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7969,7 +7969,7 @@ static int advansys_reset(struct scsi_cmnd *scp)
 		ASC_DBG(1, "before AscInitAsc1000Driver()\n");
 		status = AscInitAsc1000Driver(asc_dvc);
 
-		/* Refer to ASC_IERR_* defintions for meaning of 'err_code'. */
+		/* Refer to ASC_IERR_* definitions for meaning of 'err_code'. */
 		if (asc_dvc->err_code) {
 			scmd_printk(KERN_INFO, scp, "SCSI bus reset error: "
 				    "0x%x\n", asc_dvc->err_code);
diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h
index a43e8cdf4ee4..28aaf349c111 100644
--- a/drivers/scsi/aic94xx/aic94xx_reg_def.h
+++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h
@@ -1,5 +1,5 @@
 /*
- * Aic94xx SAS/SATA driver hardware registers defintions.
+ * Aic94xx SAS/SATA driver hardware registers definitions.
  *
  * Copyright (C) 2004 Adaptec, Inc.  All rights reserved.
  * Copyright (C) 2004 David Chaw <david_chaw@adaptec.com>
diff --git a/drivers/scsi/bfa/include/protocol/ct.h b/drivers/scsi/bfa/include/protocol/ct.h
index c59d6630b070..79d16a9ab281 100644
--- a/drivers/scsi/bfa/include/protocol/ct.h
+++ b/drivers/scsi/bfa/include/protocol/ct.h
@@ -82,7 +82,7 @@ enum {
 };
 
 /*
- * defintions for CT reason code
+ * definitions for CT reason code
  */
 enum {
 	CT_RSN_INV_CMD		= 0x01,
@@ -129,7 +129,7 @@ enum {
 };
 
 /*
- * defintions for the explanation code for all servers
+ * definitions for the explanation code for all servers
  */
 enum {
 	CT_EXP_AUTH_EXCEPTION			= 0xF1,
diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c
index fa738ec8692a..207352cc70cc 100644
--- a/drivers/scsi/dmx3191d.c
+++ b/drivers/scsi/dmx3191d.c
@@ -31,7 +31,7 @@
 #include <scsi/scsi_host.h>
 
 /*
- * Defintions for the generic 5380 driver.
+ * Definitions for the generic 5380 driver.
  */
 #define AUTOSENSE
 
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 093610bcfcce..2f6e9d8eaf71 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -161,7 +161,7 @@
  *
  * 2003/02/12 - Christoph Hellwig <hch@infradead.org>
  *
- * Cleaned up host template defintion
+ * Cleaned up host template definition
  * Removed now obsolete wd7000.h
  */
 
diff --git a/drivers/staging/otus/80211core/pub_zfi.h b/drivers/staging/otus/80211core/pub_zfi.h
index a35bd5d41d2c..60b7d1c56dee 100644
--- a/drivers/staging/otus/80211core/pub_zfi.h
+++ b/drivers/staging/otus/80211core/pub_zfi.h
@@ -20,7 +20,7 @@
 #include "../oal_dt.h"
 
 /***** Section 1 : Tunable Parameters *****/
-/* The defintions in this section are tunabel parameters */
+/* The definitions in this section are tunabel parameters */
 
 /* Maximum number of BSS that could be scaned */
 #define ZM_MAX_BSS                          128
diff --git a/drivers/staging/otus/zdcompat.h b/drivers/staging/otus/zdcompat.h
index 84ac43356b77..cdcaef54afcd 100644
--- a/drivers/staging/otus/zdcompat.h
+++ b/drivers/staging/otus/zdcompat.h
@@ -17,7 +17,7 @@
 /*  Module Name : zdcompat.h                                            */
 /*                                                                      */
 /*  Abstract                                                            */
-/*     This module contains function defintion for compatibility.       */
+/*     This module contains function definition for compatibility.      */
 /*                                                                      */
 /*  NOTES                                                               */
 /*     Platform dependent.                                              */
diff --git a/drivers/staging/rtl8192su/r8192S_phyreg.h b/drivers/staging/rtl8192su/r8192S_phyreg.h
index 96c7cfa92542..acf644f430aa 100644
--- a/drivers/staging/rtl8192su/r8192S_phyreg.h
+++ b/drivers/staging/rtl8192su/r8192S_phyreg.h
@@ -38,7 +38,7 @@
 // 2. 0x800/0x900/0xA00/0xC00/0xD00/0xE00
 // 3. RF register 0x00-2E
 // 4. Bit Mask for BB/RF register
-// 5. Other defintion for BB/RF R/W
+// 5. Other definition for BB/RF R/W
 //
 
 
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 2d07f890a842..3877737f96a6 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1227,7 +1227,7 @@ typedef struct smb_com_setattr_rsp {
 /* empty wct response to setattr */
 
 /*******************************************************/
-/* NT Transact structure defintions follow             */
+/* NT Transact structure definitions follow            */
 /* Currently only ioctl, acl (get security descriptor) */
 /* and notify are implemented                          */
 /*******************************************************/
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f91fd51b32e3..5fc8e52ee306 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2655,7 +2655,7 @@ COMPATIBLE_IOCTL(TIOCSLTC)
 #endif
 #ifdef TIOCSTART
 /*
- * For these two we have defintions in ioctls.h and/or termios.h on
+ * For these two we have definitions in ioctls.h and/or termios.h on
  * some architectures but no actual implemention.  Some applications
  * like bash call them if they are defined in the headers, so we provide
  * entries here to avoid syslog message spew.
diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h
index cb57c30081a8..eb130b4d8e72 100644
--- a/include/linux/cciss_ioctl.h
+++ b/include/linux/cciss_ioctl.h
@@ -39,7 +39,7 @@ typedef __u32 DriverVer_type;
 #ifndef CCISS_CMD_H
 // This defines are duplicated in cciss_cmd.h in the driver directory 
 
-//general boundary defintions
+//general boundary definitions
 #define SENSEINFOBYTES          32//note that this value may vary between host implementations
 
 //Command Status value
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 718bf21c5754..010290dd79bb 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -113,7 +113,7 @@ struct in6_flowlabel_req
 #define IPV6_FLOWINFO_FLOWLABEL		0x000fffff
 #define IPV6_FLOWINFO_PRIORITY		0x0ff00000
 
-/* These defintions are obsolete */
+/* These definitions are obsolete */
 #define IPV6_PRIORITY_UNCHARACTERIZED	0x0000
 #define IPV6_PRIORITY_FILLER		0x0100
 #define IPV6_PRIORITY_UNATTENDED	0x0200
diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h
index 429c631d2aad..63ebdcc5dda6 100644
--- a/include/linux/usb/wusb.h
+++ b/include/linux/usb/wusb.h
@@ -74,7 +74,7 @@ enum {
  * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of
  * data. In order to avoid confusion and enforce types, we wrap it.
  *
- * Make it packed, as we use it in some hw defintions.
+ * Make it packed, as we use it in some hw definitions.
  */
 struct wusb_ckhdid {
 	u8 data[16];
-- 
cgit v1.2.3


From dc5351784eb36f1fec4efa88e01581be72c0b711 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 25 Nov 2009 21:04:00 +0900
Subject: PCI: portdrv: cleanup service irqs initialization

This patch cleans up the service irqs initialization as follows:

 - Remove 'irq_mode' field in pcie_port_data and related definitions,
   which is not needed because we can get the same information from
   'is_msix', 'is_msi' and 'pin' fields in struct pci_dev.

 - Change the name of 'vectors' argument of assign_interrupt_mode() to
   'irqs' because it holds irq numbers actually. People might confuse
   it with CPU vector or MSI/MSI-X vector.

 - Change function name assign_interrupt_mode() to init_service_irqs()
   becasuse we no longer have 'irq_mode' data structure, and new name
   is more straightforward (IMO).

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/portdrv_core.c | 71 +++++++++++++++++------------------------
 include/linux/pcieport_if.h     |  7 ----
 2 files changed, 29 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 52c4e6fd6fd4..d208dc2d62fd 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -177,37 +177,32 @@ static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
 }
 
 /**
- * assign_interrupt_mode - choose interrupt mode for PCI Express port services
- *                         (INTx, MSI-X, MSI) and set up vectors
+ * init_service_irqs - initialize irqs for PCI Express port services
  * @dev: PCI Express port to handle
- * @vectors: Array of interrupt vectors to populate
+ * @irqs: Array of irqs to populate
  * @mask: Bitmask of port capabilities returned by get_port_device_capability()
  *
  * Return value: Interrupt mode associated with the port
  */
-static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
+static int init_service_irqs(struct pci_dev *dev, int *irqs, int mask)
 {
-	int irq, interrupt_mode = PCIE_PORT_NO_IRQ;
-	int i;
+	int i, irq;
 
 	/* Try to use MSI-X if supported */
-	if (!pcie_port_enable_msix(dev, vectors, mask))
-		return PCIE_PORT_MSIX_MODE;
-
+	if (!pcie_port_enable_msix(dev, irqs, mask))
+		return 0;
 	/* We're not going to use MSI-X, so try MSI and fall back to INTx */
-	if (!pci_enable_msi(dev))
-		interrupt_mode = PCIE_PORT_MSI_MODE;
+	irq = -1;
+	if (!pci_enable_msi(dev) || dev->pin)
+		irq = dev->irq;
 
-	if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin)
-		interrupt_mode = PCIE_PORT_INTx_MODE;
-
-	irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1;
 	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
-		vectors[i] = irq;
-
-	vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
+		irqs[i] = irq;
+	irqs[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
 
-	return interrupt_mode;
+	if (irq < 0)
+		return -ENODEV;
+	return 0;
 }
 
 /**
@@ -294,8 +289,8 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
 int pcie_port_device_register(struct pci_dev *dev)
 {
 	struct pcie_port_data *port_data;
-	int status, capabilities, irq_mode, i, nr_serv;
-	int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
+	int status, capabilities, i, nr_serv;
+	int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
 
 	capabilities = get_port_device_capability(dev);
 	if (!capabilities)
@@ -304,23 +299,19 @@ int pcie_port_device_register(struct pci_dev *dev)
 	port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
 	if (!port_data)
 		return -ENOMEM;
-	pci_set_drvdata(dev, port_data);
-
 	port_data->port_type = dev->pcie_type;
+	pci_set_drvdata(dev, port_data);
 
-	irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
-	if (irq_mode == PCIE_PORT_NO_IRQ) {
-		/*
-		 * Don't use service devices that require interrupts if there is
-		 * no way to generate them.
-		 */
-		if (!(capabilities & PCIE_PORT_SERVICE_VC)) {
-			status = -ENODEV;
+	/*
+	 * Initialize service irqs. Don't use service devices that
+	 * require interrupts if there is no way to generate them.
+	 */
+	status = init_service_irqs(dev, irqs, capabilities);
+	if (status) {
+		capabilities &= PCIE_PORT_SERVICE_VC;
+		if (!capabilities)
 			goto Error;
-		}
-		capabilities = PCIE_PORT_SERVICE_VC;
 	}
-	port_data->port_irq_mode = irq_mode;
 
 	status = pci_enable_device(dev);
 	if (status)
@@ -334,7 +325,7 @@ int pcie_port_device_register(struct pci_dev *dev)
 		if (!(capabilities & service))
 			continue;
 
-		status = pcie_device_init(dev, service, vectors[i]);
+		status = pcie_device_init(dev, service, irqs[i]);
 		if (!status)
 			nr_serv++;
 	}
@@ -418,17 +409,13 @@ void pcie_port_device_remove(struct pci_dev *dev)
 	struct pcie_port_data *port_data = pci_get_drvdata(dev);
 
 	device_for_each_child(&dev->dev, NULL, remove_iter);
-	pci_disable_device(dev);
 
-	switch (port_data->port_irq_mode) {
-	case PCIE_PORT_MSIX_MODE:
+	if (dev->msix_enabled)
 		pci_disable_msix(dev);
-		break;
-	case PCIE_PORT_MSI_MODE:
+	else if (dev->msi_enabled)
 		pci_disable_msi(dev);
-		break;
-	}
 
+	pci_disable_device(dev);
 	kfree(port_data);
 }
 
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index b4c79545330b..ec2e1eb1bacc 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -25,15 +25,8 @@
 #define PCIE_PORT_SERVICE_VC_SHIFT	3	/* Virtual Channel */
 #define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)
 
-/* Root/Upstream/Downstream Port's Interrupt Mode */
-#define PCIE_PORT_NO_IRQ		(-1)
-#define PCIE_PORT_INTx_MODE		0
-#define PCIE_PORT_MSI_MODE		1
-#define PCIE_PORT_MSIX_MODE		2
-
 struct pcie_port_data {
 	int port_type;		/* Type of the port */
-	int port_irq_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */
 };
 
 struct pcie_device {
-- 
cgit v1.2.3


From 694f88ef7ada0d99e304f687ba92e268a594358b Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 25 Nov 2009 21:06:15 +0900
Subject: PCI: portdrv: remove unnecessary struct pcie_port_data

Remove 'port_type' field in struct pcie_port_data(), because we can
get port type information from struct pci_dev. With this change, this
patch also does followings:

 - Remove struct pcie_port_data because it no longer has any field.
 - Remove portdrv private definitions about port type (PCIE_RC_PORT,
   PCIE_SW_UPSTREAM_PORT and PCIE_SW_DOWNSTREAM_PORT), and use generic
   definitions instead.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aer/aerdrv.c      |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c | 11 +++++------
 drivers/pci/pcie/portdrv_bus.c     |  7 ++-----
 drivers/pci/pcie/portdrv_core.c    | 15 +--------------
 include/linux/pcieport_if.h        |  9 +--------
 5 files changed, 10 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 6d30e795a10d..97a345927b55 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -53,7 +53,7 @@ static struct pci_error_handlers aer_error_handlers = {
 
 static struct pcie_port_service_driver aerdriver = {
 	.name		= "aer",
-	.port_type	= PCIE_RC_PORT,
+	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
 	.service	= PCIE_PORT_SERVICE_AER,
 
 	.probe		= aer_probe,
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 5391a9b412e5..2fbbcee033a7 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -123,9 +123,9 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data)
 {
 	bool enable = *((bool *)data);
 
-	if (dev->pcie_type == PCIE_RC_PORT ||
-	    dev->pcie_type == PCIE_SW_UPSTREAM_PORT ||
-	    dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) {
+	if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
+	    (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
+	    (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) {
 		if (enable)
 			pci_enable_pcie_error_reporting(dev);
 		else
@@ -437,10 +437,9 @@ static int find_aer_service_iter(struct device *device, void *data)
 	result = (struct find_aer_service_data *) data;
 
 	if (device->bus == &pcie_port_bus_type) {
-		struct pcie_port_data *port_data;
+		struct pcie_device *pcie = to_pcie_device(device);
 
-		port_data = pci_get_drvdata(to_pcie_device(device)->port);
-		if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
+		if (pcie->port->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
 			result->is_downstream = 1;
 
 		driver = device->driver;
diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c
index ef3a4eeaebb4..18bf90f748f6 100644
--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -26,7 +26,6 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type);
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pcie_device *pciedev;
-	struct pcie_port_data *port_data;
 	struct pcie_port_service_driver *driver;
 
 	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
@@ -38,10 +37,8 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 	if (driver->service != pciedev->service)
 		return 0;
 
-	port_data = pci_get_drvdata(pciedev->port);
-
-	if (driver->port_type != PCIE_ANY_PORT
-	     && driver->port_type != port_data->port_type)
+	if ((driver->port_type != PCIE_ANY_PORT) &&
+	    (driver->port_type != pciedev->port->pcie_type))
 		return 0;
 
 	return 1;
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 758e3d339287..9318b96bb255 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -296,7 +296,6 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
  */
 int pcie_port_device_register(struct pci_dev *dev)
 {
-	struct pcie_port_data *port_data;
 	int status, capabilities, i, nr_service;
 	int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
 
@@ -305,17 +304,10 @@ int pcie_port_device_register(struct pci_dev *dev)
 	if (!capabilities)
 		return -ENODEV;
 
-	/* Allocate driver data for port device */
-	port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
-	if (!port_data)
-		return -ENOMEM;
-	port_data->port_type = dev->pcie_type;
-	pci_set_drvdata(dev, port_data);
-
 	/* Enable PCI Express port device */
 	status = pci_enable_device(dev);
 	if (status)
-		goto error_kfree;
+		return status;
 	pci_set_master(dev);
 	/*
 	 * Initialize service irqs. Don't use service devices that
@@ -347,8 +339,6 @@ error_cleanup_irqs:
 	cleanup_service_irqs(dev);
 error_disable:
 	pci_disable_device(dev);
-error_kfree:
-	kfree(port_data);
 	return status;
 }
 
@@ -416,12 +406,9 @@ static int remove_iter(struct device *dev, void *data)
  */
 void pcie_port_device_remove(struct pci_dev *dev)
 {
-	struct pcie_port_data *port_data = pci_get_drvdata(dev);
-
 	device_for_each_child(&dev->dev, NULL, remove_iter);
 	cleanup_service_irqs(dev);
 	pci_disable_device(dev);
-	kfree(port_data);
 }
 
 /**
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index ec2e1eb1bacc..6775532b92a9 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -10,10 +10,7 @@
 #define _PCIEPORT_IF_H_
 
 /* Port Type */
-#define PCIE_RC_PORT			4	/* Root port of RC */
-#define PCIE_SW_UPSTREAM_PORT		5	/* Upstream port of Switch */
-#define PCIE_SW_DOWNSTREAM_PORT		6	/* Downstream port of Switch */
-#define PCIE_ANY_PORT			7
+#define PCIE_ANY_PORT			(~0)
 
 /* Service Type */
 #define PCIE_PORT_SERVICE_PME_SHIFT	0	/* Power Management Event */
@@ -25,10 +22,6 @@
 #define PCIE_PORT_SERVICE_VC_SHIFT	3	/* Virtual Channel */
 #define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)
 
-struct pcie_port_data {
-	int port_type;		/* Type of the port */
-};
-
 struct pcie_device {
 	int 		irq;	    /* Service IRQ/MSI/MSI-X Vector */
 	struct pci_dev *port;	    /* Root/Upstream/Downstream Port */
-- 
cgit v1.2.3


From 5d990b627537e59a3a2f039ff588a4750e9c1a6a Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Fri, 4 Dec 2009 12:15:21 -0800
Subject: PCI: add pci_request_acs

Commit ae21ee65e8bc228416bbcc8a1da01c56a847a60c "PCI: acs p2p upsteram
forwarding enabling" doesn't actually enable ACS.

Add a function to pci core to allow an IOMMU to request that ACS
be enabled.  The existing mechanism of using iommu_found() in the pci
core to know when ACS should be enabled doesn't actually work due to
initialization order;  iommu has only been detected not initialized.

Have Intel and AMD IOMMUs request ACS, and Xen does as well during early
init of dom0.

Cc: Allen Kay <allen.m.kay@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/kernel/amd_iommu_init.c |  2 ++
 arch/x86/xen/enlighten.c         |  5 +++++
 drivers/pci/dmar.c               |  5 ++++-
 drivers/pci/pci.c                | 13 +++++++++++++
 drivers/pci/probe.c              |  5 +----
 include/linux/pci.h              |  2 ++
 6 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index b4b61d462dcc..e60530a5f524 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1330,6 +1330,8 @@ void __init amd_iommu_detect(void)
 		gart_iommu_aperture_disabled = 1;
 		gart_iommu_aperture = 0;
 #endif
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
 	}
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5bccd706232c..e2511bccbc8d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -27,6 +27,7 @@
 #include <linux/page-flags.h>
 #include <linux/highmem.h>
 #include <linux/console.h>
+#include <linux/pci.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
@@ -1170,7 +1171,11 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("xenboot", 0, NULL);
 		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
+	} else {
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
 	}
+		
 
 	xen_raw_console_write("about to get started...\n");
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index e01ca4d6b3e6..0e98f6b6f515 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -614,8 +614,11 @@ void __init detect_intel_iommu(void)
 #endif
 #ifdef CONFIG_DMAR
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
-		    !dmar_disabled)
+		    !dmar_disabled) {
 			iommu_detected = 1;
+			/* Make sure ACS will be enabled */
+			pci_request_acs();
+		}
 #endif
 	}
 	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6af212c509c5..cd9b375f49d5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1550,6 +1550,16 @@ void pci_enable_ari(struct pci_dev *dev)
 	bridge->ari_enabled = 1;
 }
 
+static int pci_acs_enable;
+
+/**
+ * pci_request_acs - ask for ACS to be enabled if supported
+ */
+void pci_request_acs(void)
+{
+	pci_acs_enable = 1;
+}
+
 /**
  * pci_enable_acs - enable ACS if hardware support it
  * @dev: the PCI device
@@ -1560,6 +1570,9 @@ void pci_enable_acs(struct pci_dev *dev)
 	u16 cap;
 	u16 ctrl;
 
+	if (!pci_acs_enable)
+		return;
+
 	if (!pci_is_pcie(dev))
 		return;
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2fdffc02a308..98ffb2de22e9 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -10,9 +10,7 @@
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <linux/pci-aspm.h>
-#include <linux/iommu.h>
 #include <acpi/acpi_hest.h>
-#include <xen/xen.h>
 #include "pci.h"
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
@@ -1029,8 +1027,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	pci_iov_init(dev);
 
 	/* Enable ACS P2P upstream forwarding */
-	if (iommu_found() || xen_initial_domain())
-		pci_enable_acs(dev);
+	pci_enable_acs(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2891c3d3e51a..04771b9c3316 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1328,5 +1328,7 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
 	return !!pci_pcie_cap(dev);
 }
 
+void pci_request_acs(void);
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 4 Dec 2009 17:44:50 -0800
Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2)

Starting with version 4.5, GCC has a new built-in function
__builtin_unreachable() that can be used in places like the kernel's
BUG() where inline assembly is used to transfer control flow.  This
eliminated the need for an endless loop in these places.

The patch adds a new macro 'unreachable()' that will expand to either
__builtin_unreachable() or an endless loop depending on the compiler
version.

Change from v1: Simplify unreachable() for non-GCC 4.5 case.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 14 ++++++++++++++
 include/linux/compiler.h      |  5 +++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 450fa597c94d..ab3af40a53c6 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -36,4 +36,18 @@
    the kernel context */
 #define __cold			__attribute__((__cold__))
 
+
+#if __GNUC_MINOR__ >= 5
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+#endif
+
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..59f208926d13 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define barrier() __memory_barrier()
 #endif
 
+/* Unreachable code */
+#ifndef unreachable
+# define unreachable() do { } while (1)
+#endif
+
 #ifndef RELOC_HIDE
 # define RELOC_HIDE(ptr, off)					\
   ({ unsigned long __ptr;					\
-- 
cgit v1.2.3


From 0629e370dd5819efa5cf8d418a8e6729efe388ef Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Sat, 5 Dec 2009 13:46:14 -0500
Subject: nfs41: check SEQUENCE status flag

the server can indicate a number of error conditions by setting the
appropriate bits in the SEQUENCE operation. The client re-establishes
state with the server when it receives one of those, with the action
depending on the specific case.

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |  1 +
 fs/nfs/nfs4proc.c       |  2 ++
 fs/nfs/nfs4state.c      | 22 ++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        |  4 ++--
 include/linux/nfs4.h    |  2 ++
 include/linux/nfs_xdr.h |  1 +
 6 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0d25a82451d9..9a866368896a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -270,6 +270,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d897b9e34f12..71993f122214 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -405,6 +405,8 @@ static void nfs41_sequence_done(struct nfs_client *clp,
 		if (time_before(clp->cl_last_renewal, timestamp))
 			clp->cl_last_renewal = timestamp;
 		spin_unlock(&clp->cl_lock);
+		/* Check sequence flags */
+		nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
 		return;
 	}
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 46c69e2248e3..a86f3acf3212 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1173,6 +1173,28 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
 }
 
 #ifdef CONFIG_NFS_V4_1
+void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
+{
+	if (!flags)
+		return;
+	else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) {
+		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		nfs4_state_start_reclaim_reboot(clp);
+		nfs4_schedule_state_recovery(clp);
+	} else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
+			    SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
+			    SEQ4_STATUS_ADMIN_STATE_REVOKED |
+			    SEQ4_STATUS_RECALLABLE_STATE_REVOKED |
+			    SEQ4_STATUS_LEASE_MOVED)) {
+		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		nfs4_state_start_reclaim_nograce(clp);
+		nfs4_schedule_state_recovery(clp);
+	} else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+			    SEQ4_STATUS_BACKCHANNEL_FAULT |
+			    SEQ4_STATUS_CB_PATH_DOWN_SESSION))
+		nfs_expire_all_delegations(clp);
+}
+
 static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err)
 {
 	switch (err) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4ddd04a1113e..f740472370aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4614,8 +4614,8 @@ static int decode_sequence(struct xdr_stream *xdr,
 	dummy = be32_to_cpup(p++);
 	/* target highest slot id - currently not processed */
 	dummy = be32_to_cpup(p++);
-	/* result flags - currently not processed */
-	dummy = be32_to_cpup(p);
+	/* result flags */
+	res->sr_status_flags = be32_to_cpup(p);
 	status = 0;
 out_err:
 	res->sr_status = status;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c4c060208109..89404bfb72b6 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -128,6 +128,8 @@
 #define SEQ4_STATUS_RECALLABLE_STATE_REVOKED	0x00000040
 #define SEQ4_STATUS_LEASE_MOVED			0x00000080
 #define SEQ4_STATUS_RESTART_RECLAIM_NEEDED	0x00000100
+#define SEQ4_STATUS_CB_PATH_DOWN_SESSION	0x00000200
+#define SEQ4_STATUS_BACKCHANNEL_FAULT		0x00000400
 
 #define NFS4_MAX_UINT64	(~(u64)0)
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8c880372536e..ea32db30c289 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -172,6 +172,7 @@ struct nfs4_sequence_res {
 	u8			sr_slotid;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	unsigned long		sr_renewal_time;
+	u32			sr_status_flags;
 };
 
 struct nfs4_get_lease_time_args {
-- 
cgit v1.2.3


From 21f1b932dbcc5ed18444e6995aeb856e583804ae Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 23 Oct 2009 06:50:12 -0300
Subject: V4L/DVB (13183): gspca: add stv0680 subdriver

This patch adds a new subdriver to gspca supporting cams with the stv0680
bridge (replacing the old in kernel v4l1 driver).

Many thanks to Hans Verkuil for providing me with one of the 2 cams used in
testing this new sub driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig         |   6 +-
 drivers/media/video/gspca/Kconfig   |   9 +
 drivers/media/video/gspca/Makefile  |   2 +
 drivers/media/video/gspca/stv0680.c | 365 ++++++++++++++++++++++++++++++++++++
 include/linux/videodev2.h           |   1 +
 5 files changed, 382 insertions(+), 1 deletion(-)
 create mode 100644 drivers/media/video/gspca/stv0680.c

(limited to 'include/linux')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 7ecae636e6ad..82ae85c975f5 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -1016,9 +1016,13 @@ config USB_SE401
 source "drivers/media/video/sn9c102/Kconfig"
 
 config USB_STV680
-	tristate "USB STV680 (Pencam) Camera support"
+	tristate "USB STV680 (Pencam) Camera support (DEPRECATED)"
 	depends on VIDEO_V4L1
 	---help---
+	  This driver is DEPRECATED please use the gspca stv0680 module
+	  instead. Note that for the gspca stv0680 module you need
+	  atleast version 0.6.3 of libv4l.
+
 	  Say Y here if you want to connect this type of camera to your
 	  computer's USB port. This includes the Pencam line of cameras.
 	  See <file:Documentation/video4linux/stv680.txt> for more information
diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index cbc2367719f0..568edbbb8948 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -230,6 +230,15 @@ config USB_GSPCA_STK014
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_stk014.
 
+config USB_GSPCA_STV0680
+	tristate "STV0680 USB Camera Driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for cameras based on the STV0680 chip.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_stv0680.
+
 config USB_GSPCA_SUNPLUS
 	tristate "SUNPLUS USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index b7420818037e..770b01387e99 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_USB_GSPCA_SQ905)    += gspca_sq905.o
 obj-$(CONFIG_USB_GSPCA_SQ905C)   += gspca_sq905c.o
 obj-$(CONFIG_USB_GSPCA_SUNPLUS)  += gspca_sunplus.o
 obj-$(CONFIG_USB_GSPCA_STK014)   += gspca_stk014.o
+obj-$(CONFIG_USB_GSPCA_STV0680)  += gspca_stv0680.o
 obj-$(CONFIG_USB_GSPCA_T613)     += gspca_t613.o
 obj-$(CONFIG_USB_GSPCA_TV8532)   += gspca_tv8532.o
 obj-$(CONFIG_USB_GSPCA_VC032X)   += gspca_vc032x.o
@@ -50,6 +51,7 @@ gspca_spca561-objs  := spca561.o
 gspca_sq905-objs    := sq905.o
 gspca_sq905c-objs   := sq905c.o
 gspca_stk014-objs   := stk014.o
+gspca_stv0680-objs  := stv0680.o
 gspca_sunplus-objs  := sunplus.o
 gspca_t613-objs     := t613.o
 gspca_tv8532-objs   := tv8532.o
diff --git a/drivers/media/video/gspca/stv0680.c b/drivers/media/video/gspca/stv0680.c
new file mode 100644
index 000000000000..0981ce14235d
--- /dev/null
+++ b/drivers/media/video/gspca/stv0680.c
@@ -0,0 +1,365 @@
+/*
+ * STV0680 USB Camera Driver
+ *
+ * Copyright (C) 2009 Hans de Goede <hdgoede@redhat.com>
+ *
+ * This module is adapted from the in kernel v4l1 stv680 driver:
+ *
+ *  STV0680 USB Camera Driver, by Kevin Sisson (kjsisson@bellsouth.net)
+ *
+ * Thanks to STMicroelectronics for information on the usb commands, and
+ * to Steve Miller at STM for his help and encouragement while I was
+ * writing this driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define MODULE_NAME "stv0680"
+
+#include "gspca.h"
+
+MODULE_AUTHOR("Hans de Goede <hdgoede@redhat.com>");
+MODULE_DESCRIPTION("STV0680 USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;		/* !! must be the first item */
+	struct v4l2_pix_format mode;
+	u8 orig_mode;
+	u8 video_mode;
+	u8 current_mode;
+};
+
+/* V4L2 controls supported by the driver */
+static struct ctrl sd_ctrls[] = {
+};
+
+static int stv_sndctrl(struct gspca_dev *gspca_dev, int set, u8 req, u16 val,
+		       int size)
+{
+	int ret = -1;
+	u8 req_type = 0;
+
+	switch (set) {
+	case 0: /*  0xc1  */
+		req_type = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT;
+		break;
+	case 1: /*  0x41  */
+		req_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT;
+		break;
+	case 2:	/*  0x80  */
+		req_type = USB_DIR_IN | USB_RECIP_DEVICE;
+		break;
+	case 3:	/*  0x40  */
+		req_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE;
+		break;
+	}
+
+	ret = usb_control_msg(gspca_dev->dev,
+			      usb_rcvctrlpipe(gspca_dev->dev, 0),
+			      req, req_type,
+			      val, 0, gspca_dev->usb_buf, size, 500);
+
+	if ((ret < 0) && (req != 0x0a))
+		PDEBUG(D_ERR,
+		       "usb_control_msg error %i, request = 0x%x, error = %i",
+		       set, req, ret);
+
+	return ret;
+}
+
+static int stv0680_handle_error(struct gspca_dev *gspca_dev, int ret)
+{
+	stv_sndctrl(gspca_dev, 0, 0x80, 0, 0x02); /* Get Last Error */
+	PDEBUG(D_ERR, "last error: %i,  command = 0x%x",
+	       gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]);
+	return ret;
+}
+
+static int stv0680_get_video_mode(struct gspca_dev *gspca_dev)
+{
+	/* Note not sure if this init of usb_buf is really necessary */
+	memset(gspca_dev->usb_buf, 0, 8);
+	gspca_dev->usb_buf[0] = 0x0f;
+
+	if (stv_sndctrl(gspca_dev, 0, 0x87, 0, 0x08) != 0x08) {
+		PDEBUG(D_ERR, "Get_Camera_Mode failed");
+		return stv0680_handle_error(gspca_dev, -EIO);
+	}
+
+	return gspca_dev->usb_buf[0]; /* 01 = VGA, 03 = QVGA, 00 = CIF */
+}
+
+static int stv0680_set_video_mode(struct gspca_dev *gspca_dev, u8 mode)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	if (sd->current_mode == mode)
+		return 0;
+
+	memset(gspca_dev->usb_buf, 0, 8);
+	gspca_dev->usb_buf[0] = mode;
+
+	if (stv_sndctrl(gspca_dev, 3, 0x07, 0x0100, 0x08) != 0x08) {
+		PDEBUG(D_ERR, "Set_Camera_Mode failed");
+		return stv0680_handle_error(gspca_dev, -EIO);
+	}
+
+	/* Verify we got what we've asked for */
+	if (stv0680_get_video_mode(gspca_dev) != mode) {
+		PDEBUG(D_ERR, "Error setting camera video mode!");
+		return -EIO;
+	}
+
+	sd->current_mode = mode;
+
+	return 0;
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	int ret;
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam *cam = &gspca_dev->cam;
+
+	/* ping camera to be sure STV0680 is present */
+	if (stv_sndctrl(gspca_dev, 0, 0x88, 0x5678, 0x02) != 0x02 ||
+	    gspca_dev->usb_buf[0] != 0x56 || gspca_dev->usb_buf[1] != 0x78) {
+		PDEBUG(D_ERR, "STV(e): camera ping failed!!");
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+	}
+
+	/* get camera descriptor */
+	if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0200, 0x09) != 0x09)
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+
+	if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0200, 0x22) != 0x22 ||
+	    gspca_dev->usb_buf[7] != 0xa0 || gspca_dev->usb_buf[8] != 0x23) {
+		PDEBUG(D_ERR, "Could not get descriptor 0200.");
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+	}
+	if (stv_sndctrl(gspca_dev, 0, 0x8a, 0, 0x02) != 0x02)
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+	if (stv_sndctrl(gspca_dev, 0, 0x8b, 0, 0x24) != 0x24)
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+	if (stv_sndctrl(gspca_dev, 0, 0x85, 0, 0x10) != 0x10)
+		return stv0680_handle_error(gspca_dev, -ENODEV);
+
+	if (!(gspca_dev->usb_buf[7] & 0x09)) {
+		PDEBUG(D_ERR, "Camera supports neither CIF nor QVGA mode");
+		return -ENODEV;
+	}
+	if (gspca_dev->usb_buf[7] & 0x01)
+		PDEBUG(D_PROBE, "Camera supports CIF mode");
+	if (gspca_dev->usb_buf[7] & 0x02)
+		PDEBUG(D_PROBE, "Camera supports VGA mode");
+	if (gspca_dev->usb_buf[7] & 0x08)
+		PDEBUG(D_PROBE, "Camera supports QVGA mode");
+
+	if (gspca_dev->usb_buf[7] & 0x01)
+		sd->video_mode = 0x00; /* CIF */
+	else
+		sd->video_mode = 0x03; /* QVGA */
+
+	/* FW rev, ASIC rev, sensor ID  */
+	PDEBUG(D_PROBE, "Firmware rev is %i.%i",
+	       gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]);
+	PDEBUG(D_PROBE, "ASIC rev is %i.%i",
+	       gspca_dev->usb_buf[2], gspca_dev->usb_buf[3]);
+	PDEBUG(D_PROBE, "Sensor ID is %i",
+	       (gspca_dev->usb_buf[4]*16) + (gspca_dev->usb_buf[5]>>4));
+
+
+	ret = stv0680_get_video_mode(gspca_dev);
+	if (ret < 0)
+		return ret;
+	sd->current_mode = sd->orig_mode = ret;
+
+	ret = stv0680_set_video_mode(gspca_dev, sd->video_mode);
+	if (ret < 0)
+		return ret;
+
+	/* Get mode details */
+	if (stv_sndctrl(gspca_dev, 0, 0x8f, 0, 0x10) != 0x10)
+		return stv0680_handle_error(gspca_dev, -EIO);
+
+	cam->bulk = 1;
+	cam->bulk_nurbs = 1; /* The cam cannot handle more */
+	cam->bulk_size = (gspca_dev->usb_buf[0] << 24) |
+			 (gspca_dev->usb_buf[1] << 16) |
+			 (gspca_dev->usb_buf[2] << 8) |
+			 (gspca_dev->usb_buf[3]);
+	sd->mode.width = (gspca_dev->usb_buf[4] << 8) |
+			 (gspca_dev->usb_buf[5]);  /* 322, 356, 644 */
+	sd->mode.height = (gspca_dev->usb_buf[6] << 8) |
+			  (gspca_dev->usb_buf[7]); /* 242, 292, 484 */
+	sd->mode.pixelformat = V4L2_PIX_FMT_STV0680;
+	sd->mode.field = V4L2_FIELD_NONE;
+	sd->mode.bytesperline = sd->mode.width;
+	sd->mode.sizeimage = cam->bulk_size;
+	sd->mode.colorspace = V4L2_COLORSPACE_SRGB;
+
+	/* origGain = gspca_dev->usb_buf[12]; */
+
+	cam->cam_mode = &sd->mode;
+	cam->nmodes = 1;
+
+
+	ret = stv0680_set_video_mode(gspca_dev, sd->orig_mode);
+	if (ret < 0)
+		return ret;
+
+	if (stv_sndctrl(gspca_dev, 2, 0x06, 0x0100, 0x12) != 0x12 ||
+	    gspca_dev->usb_buf[8] != 0x53 || gspca_dev->usb_buf[9] != 0x05) {
+		PDEBUG(D_ERR, "Could not get descriptor 0100.");
+		return stv0680_handle_error(gspca_dev, -EIO);
+	}
+
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	return 0;
+}
+
+/* -- start the camera -- */
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	int ret;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	ret = stv0680_set_video_mode(gspca_dev, sd->video_mode);
+	if (ret < 0)
+		return ret;
+
+	if (stv_sndctrl(gspca_dev, 0, 0x85, 0, 0x10) != 0x10)
+		return stv0680_handle_error(gspca_dev, -EIO);
+
+	/* Start stream at:
+	   0x0000 = CIF (352x288)
+	   0x0100 = VGA (640x480)
+	   0x0300 = QVGA (320x240) */
+	if (stv_sndctrl(gspca_dev, 1, 0x09, sd->video_mode << 8, 0x0) != 0x0)
+		return stv0680_handle_error(gspca_dev, -EIO);
+
+	return 0;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	/* This is a high priority command; it stops all lower order cmds */
+	if (stv_sndctrl(gspca_dev, 1, 0x04, 0x0000, 0x0) != 0x0)
+		stv0680_handle_error(gspca_dev, -EIO);
+}
+
+static void sd_stop0(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	if (!sd->gspca_dev.present)
+		return;
+
+	stv0680_set_video_mode(gspca_dev, sd->orig_mode);
+}
+
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			struct gspca_frame *frame,
+			__u8 *data,
+			int len)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	/* Every now and then the camera sends a 16 byte packet, no idea
+	   what it contains, but it is not image data, when this
+	   happens the frame received before this packet is corrupt,
+	   so discard it. */
+	if (len != sd->mode.sizeimage) {
+		gspca_dev->last_packet_type = DISCARD_PACKET;
+		return;
+	}
+
+	/* Finish the previous frame, we do this upon reception of the next
+	   packet, even though it is already complete so that the strange 16
+	   byte packets send after a corrupt frame can discard it. */
+	frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame, NULL, 0);
+
+	/* Store the just received frame */
+	gspca_frame_add(gspca_dev, FIRST_PACKET, frame, data, len);
+}
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+	.stop0 = sd_stop0,
+	.pkt_scan = sd_pkt_scan,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0553, 0x0202)},
+	{USB_DEVICE(0x041e, 0x4007)},
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	PDEBUG(D_PROBE, "registered");
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	PDEBUG(D_PROBE, "deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b59e78c57161..b9a799a35763 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -361,6 +361,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
 #define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
+#define V4L2_PIX_FMT_STV0680  v4l2_fourcc('S', '6', '8', '0') /* stv0680 bayer */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3


From 85213630731605503c8fd4df9bf06beefb2cc7c4 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <hvaibhav@ti.com>
Date: Tue, 10 Nov 2009 13:32:53 -0300
Subject: V4L/DVB (13467): V4L2: Added CID's V4L2_CID_ROTATE/BG_COLOR

Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/videodev2.h.xml | 4 +++-
 drivers/media/video/v4l2-common.c         | 9 +++++++++
 include/linux/videodev2.h                 | 4 +++-
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index bb6b07f9ea14..7162b9ffc2b3 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -915,8 +915,10 @@ enum <link linkend="v4l2-colorfx">v4l2_colorfx</link> {
 #define V4L2_CID_AUTOBRIGHTNESS                 (V4L2_CID_BASE+32)
 #define V4L2_CID_BAND_STOP_FILTER               (V4L2_CID_BASE+33)
 
+#define V4L2_CID_ROTATE                         (V4L2_CID_BASE+34)
+#define V4L2_CID_BG_COLOR                       (V4L2_CID_BASE+35)
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+34)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+36)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE                      (V4L2_CTRL_CLASS_MPEG | 0x900)
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index f5a93ae3cdf9..e8e5affbabce 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -431,6 +431,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_CHROMA_AGC:		return "Chroma AGC";
 	case V4L2_CID_COLOR_KILLER:		return "Color Killer";
 	case V4L2_CID_COLORFX:			return "Color Effects";
+	case V4L2_CID_ROTATE:			return "Rotate";
+	case V4L2_CID_BG_COLOR:			return "Background color";
 
 	/* MPEG controls */
 	case V4L2_CID_MPEG_CLASS: 		return "MPEG Encoder Controls";
@@ -587,6 +589,13 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, s32 min, s32 max, s32 ste
 		qctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 		min = max = step = def = 0;
 		break;
+	case V4L2_CID_BG_COLOR:
+		qctrl->type = V4L2_CTRL_TYPE_INTEGER;
+		step = 1;
+		min = 0;
+		/* Max is calculated as RGB888 that is 2^24 */
+		max = 0xFFFFFF;
+		break;
 	default:
 		qctrl->type = V4L2_CTRL_TYPE_INTEGER;
 		break;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b9a799a35763..54395460a414 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -913,8 +913,10 @@ enum v4l2_colorfx {
 #define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
 #define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
 
+#define V4L2_CID_ROTATE				(V4L2_CID_BASE+34)
+#define V4L2_CID_BG_COLOR			(V4L2_CID_BASE+35)
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+34)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+36)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From d8008562379b927758ca08eded1508c68d9beb4e Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <hvaibhav@ti.com>
Date: Tue, 10 Nov 2009 13:46:36 -0300
Subject: V4L/DVB (13470): V4L2: Add Capability and Flag field for Chroma Key

Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/videodev2.h.xml | 2 ++
 include/linux/videodev2.h                 | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 7162b9ffc2b3..228277bf9f10 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -566,6 +566,7 @@ struct <link linkend="v4l2-framebuffer">v4l2_framebuffer</link> {
 #define V4L2_FBUF_CAP_LOCAL_ALPHA       0x0010
 #define V4L2_FBUF_CAP_GLOBAL_ALPHA      0x0020
 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA   0x0040
+#define V4L2_FBUF_CAP_SRC_CHROMAKEY     0x0080
 /*  Flags for the 'flags' field. */
 #define V4L2_FBUF_FLAG_PRIMARY          0x0001
 #define V4L2_FBUF_FLAG_OVERLAY          0x0002
@@ -573,6 +574,7 @@ struct <link linkend="v4l2-framebuffer">v4l2_framebuffer</link> {
 #define V4L2_FBUF_FLAG_LOCAL_ALPHA      0x0008
 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA     0x0010
 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA  0x0020
+#define V4L2_FBUF_FLAG_SRC_CHROMAKEY    0x0040
 
 struct <link linkend="v4l2-clip">v4l2_clip</link> {
 	struct <link linkend="v4l2-rect">v4l2_rect</link>        c;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 54395460a414..e4eb403a3b00 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -564,6 +564,7 @@ struct v4l2_framebuffer {
 #define V4L2_FBUF_CAP_LOCAL_ALPHA	0x0010
 #define V4L2_FBUF_CAP_GLOBAL_ALPHA	0x0020
 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA	0x0040
+#define V4L2_FBUF_CAP_SRC_CHROMAKEY	0x0080
 /*  Flags for the 'flags' field. */
 #define V4L2_FBUF_FLAG_PRIMARY		0x0001
 #define V4L2_FBUF_FLAG_OVERLAY		0x0002
@@ -571,6 +572,7 @@ struct v4l2_framebuffer {
 #define V4L2_FBUF_FLAG_LOCAL_ALPHA	0x0008
 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA	0x0010
 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA	0x0020
+#define V4L2_FBUF_FLAG_SRC_CHROMAKEY	0x0040
 
 struct v4l2_clip {
 	struct v4l2_rect        c;
-- 
cgit v1.2.3


From 180197536b15d5862b389ce90b46ec8d004056f6 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Sat, 5 Dec 2009 16:08:40 -0500
Subject: nfs41: RECLAIM_COMPLETE XDR functionality

XDR encoding and decoding for RECLAIM_COMPLETE.  Implements the necessary
encoding to indicate reclaim complete for the entire client.  In the future,
it can be extended to provide reclaim complete functionality for a single
file system after migration.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c        | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  1 +
 include/linux/nfs_xdr.h | 10 ++++++++
 3 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index bc8711d30296..e437fd6a819f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -301,6 +301,8 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
 #define decode_sequence_maxsz	(op_decode_hdr_maxsz + \
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+#define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+#define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -678,6 +680,12 @@ static int nfs4_stat_to_errno(int);
 					 decode_sequence_maxsz + \
 					 decode_putrootfh_maxsz + \
 					 decode_fsinfo_maxsz)
+#define NFS4_enc_reclaim_complete_sz	(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_reclaim_complete_maxsz)
+#define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_reclaim_complete_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1623,6 +1631,19 @@ static void encode_destroy_session(struct xdr_stream *xdr,
 	hdr->nops++;
 	hdr->replen += decode_destroy_session_maxsz;
 }
+
+static void encode_reclaim_complete(struct xdr_stream *xdr,
+				    struct nfs41_reclaim_complete_args *args,
+				    struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 8);
+	*p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
+	*p++ = cpu_to_be32(args->one_fs);
+	hdr->nops++;
+	hdr->replen += decode_reclaim_complete_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void encode_sequence(struct xdr_stream *xdr,
@@ -2451,6 +2472,26 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
 	encode_nops(&hdr);
 	return 0;
 }
+
+/*
+ * a RECLAIM_COMPLETE request
+ */
+static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
+				     struct nfs41_reclaim_complete_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
+	encode_reclaim_complete(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4559,6 +4600,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
 {
 	return decode_op_hdr(xdr, OP_DESTROY_SESSION);
 }
+
+static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
+{
+	return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static int decode_sequence(struct xdr_stream *xdr,
@@ -5622,6 +5668,25 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
 		status = decode_fsinfo(&xdr, res->lr_fsinfo);
 	return status;
 }
+
+/*
+ * Decode RECLAIM_COMPLETE response
+ */
+static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
+					 struct nfs41_reclaim_complete_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (!status)
+		status = decode_reclaim_complete(&xdr, (void *)NULL);
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
@@ -5798,6 +5863,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(DESTROY_SESSION,	enc_destroy_session,	dec_destroy_session),
   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+  PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 89404bfb72b6..9b8299af3741 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -530,6 +530,7 @@ enum {
 	NFSPROC4_CLNT_DESTROY_SESSION,
 	NFSPROC4_CLNT_SEQUENCE,
 	NFSPROC4_CLNT_GET_LEASE_TIME,
+	NFSPROC4_CLNT_RECLAIM_COMPLETE,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ea32db30c289..51071b335751 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -939,6 +939,16 @@ struct nfs41_create_session_args {
 struct nfs41_create_session_res {
 	struct nfs_client	       *client;
 };
+
+struct nfs41_reclaim_complete_args {
+	/* In the future extend to include curr_fh for use with migration */
+	unsigned char			one_fs:1;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs41_reclaim_complete_res {
+	struct nfs4_sequence_res	seq_res;
+};
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
-- 
cgit v1.2.3


From ed54d0f98000ee03310150aa396e9ff8bcb394ce Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 3 Dec 2009 22:08:26 +0100
Subject: hw-breakpoints: Add two reserved fields for future extensions

Add two reserved fields for future extensions in the hardware
breakpoints interface. Further needs may arise.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 43adbd7f0010..a61e4de3448b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -220,6 +220,8 @@ struct perf_event_attr {
 			__u64		bp_addr;
 			__u32		bp_type;
 			__u32		bp_len;
+			__u64		__bp_reserved_1;
+			__u64		__bp_reserved_2;
 		};
 	};
 
-- 
cgit v1.2.3


From 189f202ed197dc25d627e8660de27ece325e9f68 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 3 Dec 2009 23:16:07 +0100
Subject: perf: Remove pointless union that wraps the hw breakpoint fields

It stands to anonymize a structure, but structures can already
anonymize by themselves.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a61e4de3448b..53230e99e9ea 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,14 +215,12 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	union {
-		struct { /* Hardware breakpoint info */
-			__u64		bp_addr;
-			__u32		bp_type;
-			__u32		bp_len;
-			__u64		__bp_reserved_1;
-			__u64		__bp_reserved_2;
-		};
+	struct { /* Hardware breakpoint info */
+		__u64		bp_addr;
+		__u32		bp_type;
+		__u32		bp_len;
+		__u64		__bp_reserved_1;
+		__u64		__bp_reserved_2;
 	};
 
 	__u32			__reserved_2;
-- 
cgit v1.2.3


From 9cef30815b0f5b76e94a58d7674fcbf824d95579 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 3 Dec 2009 23:59:31 +0100
Subject: perf: Remove unused struct perf_event::event_callback

This field might result from an older manual rebasing mistake.
We don't use it.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 53230e99e9ea..84bd28a0ffab 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -670,8 +670,6 @@ struct perf_event {
 
 	perf_callback_t			callback;
 
-	perf_callback_t			event_callback;
-
 #endif /* CONFIG_PERF_EVENTS */
 };
 
-- 
cgit v1.2.3


From 2f0993e0fb663c49e4d1e02654f6203246be4817 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 5 Dec 2009 07:06:10 +0100
Subject: hw-breakpoints: Drop callback and task parameters from modify helper

Drop the callback and task parameters from modify_user_hw_breakpoint().
For now we have no user that need to modify a breakpoint to the point
of changing its handler or its task context.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 arch/x86/kernel/ptrace.c      | 4 ++--
 include/linux/hw_breakpoint.h | 9 ++-------
 kernel/hw_breakpoint.c        | 7 +++----
 3 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 04d182a7cfdb..dbb395572ae2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -618,7 +618,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
 	attr.bp_type = gen_type;
 	attr.disabled = disabled;
 
-	return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+	return modify_user_hw_breakpoint(bp, &attr);
 }
 
 /*
@@ -740,7 +740,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 
 		attr = bp->attr;
 		attr.bp_addr = addr;
-		bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+		bp = modify_user_hw_breakpoint(bp, &attr);
 	}
 	/*
 	 * CHECKME: the previous code returned -EIO if the addr wasn't a
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a03daed08c59..d33096e0dbd4 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -57,10 +57,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 
 /* FIXME: only change from the attr, and don't unregister */
 extern struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp,
-			  struct perf_event_attr *attr,
-			  perf_callback_t triggered,
-			  struct task_struct *tsk);
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
 
 /*
  * Kernel breakpoints are not associated with any particular thread.
@@ -97,9 +94,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  struct perf_event_attr *attr,
-			  perf_callback_t triggered,
-			  struct task_struct *tsk)	{ return NULL; }
+			  struct perf_event_attr *attr)	{ return NULL; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_callback_t triggered,
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..2d10b012828f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -312,9 +312,7 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
  */
 struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
-			  perf_callback_t triggered,
-			  struct task_struct *tsk)
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
 	/*
 	 * FIXME: do it without unregistering
@@ -323,7 +321,8 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
 	 */
 	unregister_hw_breakpoint(bp);
 
-	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+	return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid,
+						bp->callback);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
-- 
cgit v1.2.3


From b326e9560a28fc3e950637ef51847ed8f05c1335 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 5 Dec 2009 09:44:31 +0100
Subject: hw-breakpoints: Use overflow handler instead of the event callback

struct perf_event::event callback was called when a breakpoint
triggers. But this is a rather opaque callback, pretty
tied-only to the breakpoint API and not really integrated into perf
as it triggers even when we don't overflow.

We prefer to use overflow_handler() as it fits into the perf events
rules, being called only when we overflow.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 arch/x86/kernel/hw_breakpoint.c         |  5 ++---
 arch/x86/kernel/ptrace.c                |  9 ++++++---
 include/linux/hw_breakpoint.h           | 25 +++++++++++--------------
 include/linux/perf_event.h              | 13 +++++++------
 kernel/hw_breakpoint.c                  | 17 +++++------------
 kernel/perf_event.c                     | 24 +++++++++---------------
 kernel/trace/trace_ksym.c               |  5 +++--
 samples/hw_breakpoint/data_breakpoint.c |  7 +++++--
 8 files changed, 48 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index d42f65ac4927..05d5fec64a94 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
 		return ret;
 	}
 
-	if (bp->callback)
-		ret = arch_store_info(bp);
+	ret = arch_store_info(bp);
 
 	if (ret < 0)
 		return ret;
@@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 			break;
 		}
 
-		(bp->callback)(bp, args->regs);
+		perf_bp_event(bp, args->regs);
 
 		rcu_read_unlock();
 	}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index dbb395572ae2..b361d28061d0 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
-static void ptrace_triggered(struct perf_event *bp, void *data)
+static void ptrace_triggered(struct perf_event *bp, int nmi,
+			     struct perf_sample_data *data,
+			     struct pt_regs *regs)
 {
 	int i;
 	struct thread_struct *thread = &(current->thread);
@@ -599,7 +601,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
 {
 	int err;
 	int gen_len, gen_type;
-	DEFINE_BREAKPOINT_ATTR(attr);
+	struct perf_event_attr attr;
 
 	/*
 	 * We shoud have at least an inactive breakpoint at this
@@ -721,9 +723,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 {
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
-	DEFINE_BREAKPOINT_ATTR(attr);
+	struct perf_event_attr attr;
 
 	if (!t->ptrace_bps[nr]) {
+		hw_breakpoint_init(&attr);
 		/*
 		 * Put stub len and type to register (reserve) an inactive but
 		 * correct bp
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index d33096e0dbd4..4d14a384a01e 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -20,19 +20,16 @@ enum {
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
-/* As it's for in-kernel or ptrace use, we want it to be pinned */
-#define DEFINE_BREAKPOINT_ATTR(name)	\
-struct perf_event_attr name = {		\
-	.type = PERF_TYPE_BREAKPOINT,	\
-	.size = sizeof(name),		\
-	.pinned = 1,			\
-};
-
 static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 {
 	attr->type = PERF_TYPE_BREAKPOINT;
 	attr->size = sizeof(*attr);
+	/*
+	 * As it's for in-kernel or ptrace use, we want it to be pinned
+	 * and to call its callback every hits.
+	 */
 	attr->pinned = 1;
+	attr->sample_period = 1;
 }
 
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
@@ -52,7 +49,7 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
 
 extern struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered,
+			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
@@ -64,12 +61,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
  */
 extern struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
-				perf_callback_t triggered,
+				perf_overflow_handler_t	triggered,
 				int cpu);
 
 extern struct perf_event **
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered);
+			    perf_overflow_handler_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -90,18 +87,18 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 
 static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered,
+			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
 			  struct perf_event_attr *attr)	{ return NULL; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
-				perf_callback_t triggered,
+				perf_overflow_handler_t	 triggered,
 				int cpu)		{ return NULL; }
 static inline struct perf_event **
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered)	{ return NULL; }
+			    perf_overflow_handler_t triggered)	{ return NULL; }
 static inline int
 register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
 static inline int
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 84bd28a0ffab..d2f2667430da 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -565,10 +565,13 @@ struct perf_pending_entry {
 	void (*func)(struct perf_pending_entry *);
 };
 
-typedef void (*perf_callback_t)(struct perf_event *, void *);
-
 struct perf_sample_data;
 
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
+					struct perf_sample_data *,
+					struct pt_regs *regs);
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -660,9 +663,7 @@ struct perf_event {
 	struct pid_namespace		*ns;
 	u64				id;
 
-	void (*overflow_handler)(struct perf_event *event,
-			int nmi, struct perf_sample_data *data,
-			struct pt_regs *regs);
+	perf_overflow_handler_t		overflow_handler;
 
 #ifdef CONFIG_EVENT_PROFILE
 	struct event_filter		*filter;
@@ -779,7 +780,7 @@ extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
 				pid_t pid,
-				perf_callback_t callback);
+				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2d10b012828f..b600fc27f161 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -259,7 +259,7 @@ void release_bp_slot(struct perf_event *bp)
 }
 
 
-int __register_perf_hw_breakpoint(struct perf_event *bp)
+int register_perf_hw_breakpoint(struct perf_event *bp)
 {
 	int ret;
 
@@ -276,19 +276,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
 	 * This is a quick hack that will be removed soon, once we remove
 	 * the tmp breakpoints from ptrace
 	 */
-	if (!bp->attr.disabled || bp->callback == perf_bp_event)
+	if (!bp->attr.disabled || !bp->overflow_handler)
 		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
 
 	return ret;
 }
 
-int register_perf_hw_breakpoint(struct perf_event *bp)
-{
-	bp->callback = perf_bp_event;
-
-	return __register_perf_hw_breakpoint(bp);
-}
-
 /**
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
  * @attr: breakpoint attributes
@@ -297,7 +290,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
  */
 struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered,
+			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk)
 {
 	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -322,7 +315,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 	unregister_hw_breakpoint(bp);
 
 	return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid,
-						bp->callback);
+						bp->overflow_handler);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
@@ -347,7 +340,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  */
 struct perf_event **
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_callback_t triggered)
+			    perf_overflow_handler_t triggered)
 {
 	struct perf_event **cpu_events, **pevent, *bp;
 	long err;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6b7ddba1dd64..fd43ff4ac860 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4286,15 +4286,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
 static const struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
 	int err;
-	/*
-	 * The breakpoint is already filled if we haven't created the counter
-	 * through perf syscall
-	 * FIXME: manage to get trigerred to NULL if it comes from syscalls
-	 */
-	if (!bp->callback)
-		err = register_perf_hw_breakpoint(bp);
-	else
-		err = __register_perf_hw_breakpoint(bp);
+
+	err = register_perf_hw_breakpoint(bp);
 	if (err)
 		return ERR_PTR(err);
 
@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		   struct perf_event_context *ctx,
 		   struct perf_event *group_leader,
 		   struct perf_event *parent_event,
-		   perf_callback_t callback,
+		   perf_overflow_handler_t overflow_handler,
 		   gfp_t gfpflags)
 {
 	const struct pmu *pmu;
@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
-	if (!callback && parent_event)
-		callback = parent_event->callback;
+	if (!overflow_handler && parent_event)
+		overflow_handler = parent_event->overflow_handler;
 	
-	event->callback	= callback;
+	event->overflow_handler	= overflow_handler;
 
 	if (attr->disabled)
 		event->state = PERF_EVENT_STATE_OFF;
@@ -4776,7 +4769,8 @@ err_put_context:
  */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-				 pid_t pid, perf_callback_t callback)
+				 pid_t pid,
+				 perf_overflow_handler_t overflow_handler)
 {
 	struct perf_event *event;
 	struct perf_event_context *ctx;
@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	}
 
 	event = perf_event_alloc(attr, cpu, ctx, NULL,
-				     NULL, callback, GFP_KERNEL);
+				 NULL, overflow_handler, GFP_KERNEL);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err_put_context;
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..acb87d4a4ac1 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 }
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-void ksym_hbp_handler(struct perf_event *hbp, void *data)
+void ksym_hbp_handler(struct perf_event *hbp, int nmi,
+		      struct perf_sample_data *data,
+		      struct pt_regs *regs)
 {
 	struct ring_buffer_event *event;
 	struct ksym_trace_entry *entry;
-	struct pt_regs *regs = data;
 	struct ring_buffer *buffer;
 	int pc;
 
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index 29525500df00..c69cbe9b2426 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -41,7 +41,9 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
 MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
 			" write operations on the kernel symbol");
 
-static void sample_hbp_handler(struct perf_event *temp, void *data)
+static void sample_hbp_handler(struct perf_event *bp, int nmi,
+			       struct perf_sample_data *data,
+			       struct pt_regs *regs)
 {
 	printk(KERN_INFO "%s value is changed\n", ksym_name);
 	dump_stack();
@@ -51,8 +53,9 @@ static void sample_hbp_handler(struct perf_event *temp, void *data)
 static int __init hw_break_module_init(void)
 {
 	int ret;
-	DEFINE_BREAKPOINT_ATTR(attr);
+	struct perf_event_attr attr;
 
+	hw_breakpoint_init(&attr);
 	attr.bp_addr = kallsyms_lookup_name(ksym_name);
 	attr.bp_len = HW_BREAKPOINT_LEN_4;
 	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
-- 
cgit v1.2.3


From c0dfb2feb632537cf0a9d2ce3c29bcf5778fec59 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 5 Dec 2009 09:53:28 +0100
Subject: perf: Remove the "event" callback from perf events

As it is not used anymore and has been superseded by overflow_handler.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d2f2667430da..89098e35a036 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -567,7 +567,6 @@ struct perf_pending_entry {
 
 struct perf_sample_data;
 
-typedef void (*perf_callback_t)(struct perf_event *, void *);
 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct perf_sample_data *,
 					struct pt_regs *regs);
@@ -669,8 +668,6 @@ struct perf_event {
 	struct event_filter		*filter;
 #endif
 
-	perf_callback_t			callback;
-
 #endif /* CONFIG_PERF_EVENTS */
 };
 
-- 
cgit v1.2.3


From 7a1a8eb58a2c6cd819d17332c5a2c369203635d5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 3 Dec 2009 21:19:18 +0100
Subject: PM: Add flag for devices capable of generating run-time wake-up
 events

Apparently, there are devices that can wake up the system from sleep
states and yet are incapable of generating wake-up events at run
time.  Thus, introduce a flag indicating if given device is capable
of generating run-time wake-up events.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/runtime_pm.txt |  7 +++++--
 include/linux/pm.h                 |  8 +++++---
 include/linux/pm_runtime.h         | 12 ++++++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 6bb25cb24da9..4a3109b28847 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -71,9 +71,9 @@ what to do to handle the device).
     purpose).
 
 In particular, if the driver requires remote wakeup capability for proper
-functioning and device_may_wakeup() returns 'false' for the device, then
+functioning and device_run_wake() returns 'false' for the device, then
 ->runtime_suspend() should return -EBUSY.  On the other hand, if
-device_may_wakeup() returns 'true' for the device and the device is put
+device_run_wake() returns 'true' for the device and the device is put
 into a low power state during the execution of its bus type's
 ->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism
 allowing the device to request a change of its power state, such as PCI PME)
@@ -215,6 +215,9 @@ defined in include/linux/pm.h:
       being executed for that device and it is not practical to wait for the
       suspend to complete; means "start a resume as soon as you've suspended"
 
+  unsigned int run_wake;
+    - set if the device is capable of generating run-time wake-up events
+
   enum rpm_status runtime_status;
     - the run-time PM status of the device; this field's initial value is
       RPM_SUSPENDED, which means that each device is initially regarded by the
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3b7e04b95bd2..0d65934246af 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -178,9 +178,10 @@ typedef struct pm_message {
  *	This need not mean that the device should be put into a low power state.
  *	For example, if the device is behind a link which is about to be turned
  *	off, the device may remain at full power.  If the device does go to low
- *	power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a
- *	hardware mechanism allowing the device to request a change of its power
- *	state, such as PCI PME) should be enabled for it.
+ *	power and is capable of generating run-time wake-up events, remote
+ *	wake-up (i.e., a hardware mechanism allowing the device to request a
+ *	change of its power state via a wake-up event, such as PCI PME) should
+ *	be enabled for it.
  *
  * @runtime_resume: Put the device into the fully active state in response to a
  *	wake-up event generated by hardware or at the request of software.  If
@@ -428,6 +429,7 @@ struct dev_pm_info {
 	unsigned int		idle_notification:1;
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
+	unsigned int		run_wake:1;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 44087044910f..370ce0a6fe4a 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -50,6 +50,16 @@ static inline void pm_runtime_put_noidle(struct device *dev)
 	atomic_add_unless(&dev->power.usage_count, -1, 0);
 }
 
+static inline bool device_run_wake(struct device *dev)
+{
+	return dev->power.run_wake;
+}
+
+static inline void device_set_run_wake(struct device *dev, bool enable)
+{
+	dev->power.run_wake = enable;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
@@ -73,6 +83,8 @@ static inline bool pm_children_suspended(struct device *dev) { return false; }
 static inline void pm_suspend_ignore_children(struct device *dev, bool en) {}
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
+static inline bool device_run_wake(struct device *dev) { return false; }
+static inline void device_set_run_wake(struct device *dev, bool enable) {}
 
 #endif /* !CONFIG_PM_RUNTIME */
 
-- 
cgit v1.2.3


From 194684e596af4bdaebb424166d94a8aa528edfda Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@nokia.com>
Date: Sun, 6 Dec 2009 17:06:22 +0100
Subject: i2c: Prevent priority inversion on top of bus lock

Low priority thread holding the i2c bus mutex could block higher
priority threads to access the bus resulting in unacceptable
latencies. Change the mutex type to rt_mutex preventing priority
inversion.

Tested-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@nokia.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/Kconfig    |  1 +
 drivers/i2c/i2c-core.c | 12 ++++++------
 include/linux/i2c.h    |  7 +++----
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index d7ece131b4f4..8d8a00e5a30e 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -5,6 +5,7 @@
 menuconfig I2C
 	tristate "I2C support"
 	depends on HAS_IOMEM
+	select RT_MUTEXES
 	---help---
 	  I2C (pronounce: I-square-C) is a slow serial bus protocol used in
 	  many micro controller applications and developed by Philips.  SMBus,
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 296504355142..d664b4a97a31 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -584,7 +584,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 		goto out_list;
 	}
 
-	mutex_init(&adap->bus_lock);
+	rt_mutex_init(&adap->bus_lock);
 
 	/* Set default timeout to 1 second if not already set */
 	if (adap->timeout == 0)
@@ -1092,12 +1092,12 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 #endif
 
 		if (in_atomic() || irqs_disabled()) {
-			ret = mutex_trylock(&adap->bus_lock);
+			ret = rt_mutex_trylock(&adap->bus_lock);
 			if (!ret)
 				/* I2C activity is ongoing. */
 				return -EAGAIN;
 		} else {
-			mutex_lock_nested(&adap->bus_lock, adap->level);
+			rt_mutex_lock(&adap->bus_lock);
 		}
 
 		/* Retry automatically on arbitration loss */
@@ -1109,7 +1109,7 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 			if (time_after(jiffies, orig_jiffies + adap->timeout))
 				break;
 		}
-		mutex_unlock(&adap->bus_lock);
+		rt_mutex_unlock(&adap->bus_lock);
 
 		return ret;
 	} else {
@@ -1913,7 +1913,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC;
 
 	if (adapter->algo->smbus_xfer) {
-		mutex_lock(&adapter->bus_lock);
+		rt_mutex_lock(&adapter->bus_lock);
 
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
@@ -1927,7 +1927,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		mutex_unlock(&adapter->bus_lock);
+		rt_mutex_unlock(&adapter->bus_lock);
 	} else
 		res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write,
 					      command, protocol, data);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7b40cda57a70..52317fb5917e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -338,8 +338,7 @@ struct i2c_adapter {
 	void *algo_data;
 
 	/* data fields that are valid for all devices	*/
-	u8 level; 			/* nesting level for lockdep */
-	struct mutex bus_lock;
+	struct rt_mutex bus_lock;
 
 	int timeout;			/* in jiffies */
 	int retries;
@@ -367,7 +366,7 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
  */
 static inline void i2c_lock_adapter(struct i2c_adapter *adapter)
 {
-	mutex_lock(&adapter->bus_lock);
+	rt_mutex_lock(&adapter->bus_lock);
 }
 
 /**
@@ -376,7 +375,7 @@ static inline void i2c_lock_adapter(struct i2c_adapter *adapter)
  */
 static inline void i2c_unlock_adapter(struct i2c_adapter *adapter)
 {
-	mutex_unlock(&adapter->bus_lock);
+	rt_mutex_unlock(&adapter->bus_lock);
 }
 
 /*flags for the client struct: */
-- 
cgit v1.2.3


From c7b25a9e96dc89954ae8d8f473f56fae62030f84 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 6 Dec 2009 17:06:24 +0100
Subject: i2c: Drop probe, ignore and force module parameters

The legacy probe and force module parameters are obsolete now, the
same can be achieved using the new_device sysfs interface, which is
both more flexible and cheaper (it is implemented by i2c-core rather
than replicated in every driver module.)

The legacy ignore module parameters can be dropped as well. Ignoring
can be done by instantiating a "dummy" device at the problematic
address.

This is the first step of a huge cleanup to i2c-core's i2c_detect
function, i2c.h's I2C_CLIENT_INSMOD* macros, and all drivers that made
use of them.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/old-module-parameters | 44 ++++++++++++++++
 drivers/i2c/i2c-core.c                  | 65 +----------------------
 include/linux/i2c.h                     | 91 +--------------------------------
 3 files changed, 46 insertions(+), 154 deletions(-)
 create mode 100644 Documentation/i2c/old-module-parameters

(limited to 'include/linux')

diff --git a/Documentation/i2c/old-module-parameters b/Documentation/i2c/old-module-parameters
new file mode 100644
index 000000000000..8e2b629d533c
--- /dev/null
+++ b/Documentation/i2c/old-module-parameters
@@ -0,0 +1,44 @@
+I2C device driver binding control from user-space
+=================================================
+
+Up to kernel 2.6.32, many i2c drivers used helper macros provided by
+<linux/i2c.h> which created standard module parameters to let the user
+control how the driver would probe i2c buses and attach to devices. These
+parameters were known as "probe" (to let the driver probe for an extra
+address), "force" (to forcibly attach the driver to a given device) and
+"ignore" (to prevent a driver from probing a given address).
+
+With the conversion of the i2c subsystem to the standard device driver
+binding model, it became clear that these per-module parameters were no
+longer needed, and that a centralized implementation was possible. The new,
+sysfs-based interface is described in the documentation file
+"instantiating-devices", section "Method 4: Instantiate from user-space".
+
+Below is a mapping from the old module parameters to the new interface.
+
+Attaching a driver to an I2C device
+-----------------------------------
+
+Old method (module parameters):
+# modprobe <driver> probe=1,0x2d
+# modprobe <driver> force=1,0x2d
+# modprobe <driver> force_<device>=1,0x2d
+
+New method (sysfs interface):
+# echo <device> 0x2d > /sys/bus/i2c/devices/i2c-1/new_device
+
+Preventing a driver from attaching to an I2C device
+---------------------------------------------------
+
+Old method (module parameters):
+# modprobe <driver> ignore=1,0x2f
+
+New method (sysfs interface):
+# echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device
+# modprobe <driver>
+
+Of course, it is important to instantiate the "dummy" device before loading
+the driver. The dummy device will be handled by i2c-core itself, preventing
+other drivers from binding to it later on. If there is a real device at the
+problematic address, and you want another driver to bind to it, then simply
+pass the name of the device in question instead of "dummy".
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index d664b4a97a31..fdfaebdf3bfe 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1259,40 +1259,13 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 		return -ENOMEM;
 	temp_client->adapter = adapter;
 
-	/* Force entries are done first, and are not affected by ignore
-	   entries */
-	if (address_data->forces) {
-		const unsigned short * const *forces = address_data->forces;
-		int kind;
-
-		for (kind = 0; forces[kind]; kind++) {
-			for (i = 0; forces[kind][i] != I2C_CLIENT_END;
-			     i += 2) {
-				if (forces[kind][i] == adap_id
-				 || forces[kind][i] == ANY_I2C_BUS) {
-					dev_dbg(&adapter->dev, "found force "
-						"parameter for adapter %d, "
-						"addr 0x%02x, kind %d\n",
-						adap_id, forces[kind][i + 1],
-						kind);
-					temp_client->addr = forces[kind][i + 1];
-					err = i2c_detect_address(temp_client,
-						kind, driver);
-					if (err)
-						goto exit_free;
-				}
-			}
-		}
-	}
-
 	/* Stop here if the classes do not match */
 	if (!(adapter->class & driver->class))
 		goto exit_free;
 
 	/* Stop here if we can't use SMBUS_QUICK */
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
-		if (address_data->probe[0] == I2C_CLIENT_END
-		 && address_data->normal_i2c[0] == I2C_CLIENT_END)
+		if (address_data->normal_i2c[0] == I2C_CLIENT_END)
 			goto exit_free;
 
 		dev_warn(&adapter->dev, "SMBus Quick command not supported, "
@@ -1301,43 +1274,7 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 		goto exit_free;
 	}
 
-	/* Probe entries are done second, and are not affected by ignore
-	   entries either */
-	for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) {
-		if (address_data->probe[i] == adap_id
-		 || address_data->probe[i] == ANY_I2C_BUS) {
-			dev_dbg(&adapter->dev, "found probe parameter for "
-				"adapter %d, addr 0x%02x\n", adap_id,
-				address_data->probe[i + 1]);
-			temp_client->addr = address_data->probe[i + 1];
-			err = i2c_detect_address(temp_client, -1, driver);
-			if (err)
-				goto exit_free;
-		}
-	}
-
-	/* Normal entries are done last, unless shadowed by an ignore entry */
 	for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
-		int j, ignore;
-
-		ignore = 0;
-		for (j = 0; address_data->ignore[j] != I2C_CLIENT_END;
-		     j += 2) {
-			if ((address_data->ignore[j] == adap_id ||
-			     address_data->ignore[j] == ANY_I2C_BUS)
-			 && address_data->ignore[j + 1]
-			    == address_data->normal_i2c[i]) {
-				dev_dbg(&adapter->dev, "found ignore "
-					"parameter for adapter %d, "
-					"addr 0x%02x\n", adap_id,
-					address_data->ignore[j + 1]);
-				ignore = 1;
-				break;
-			}
-		}
-		if (ignore)
-			continue;
-
 		dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
 			"addr 0x%02x\n", adap_id,
 			address_data->normal_i2c[i]);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 52317fb5917e..419ab546b266 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -110,7 +110,7 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * @driver: Device driver model driver
  * @id_table: List of I2C devices supported by this driver
  * @detect: Callback for device detection
- * @address_data: The I2C addresses to probe, ignore or force (for detect)
+ * @address_data: The I2C addresses to probe (for detect)
  * @clients: List of detected clients we created (for i2c-core use only)
  *
  * The driver.owner field should be set to the module owner of this driver.
@@ -397,9 +397,6 @@ static inline void i2c_unlock_adapter(struct i2c_adapter *adapter)
  */
 struct i2c_client_address_data {
 	const unsigned short *normal_i2c;
-	const unsigned short *probe;
-	const unsigned short *ignore;
-	const unsigned short * const *forces;
 };
 
 /* Internal numbers to terminate lists */
@@ -613,134 +610,48 @@ union i2c_smbus_data {
   module_param_array(var, short, &var##_num, 0); \
   MODULE_PARM_DESC(var, desc)
 
-#define I2C_CLIENT_MODULE_PARM_FORCE(name)				\
-I2C_CLIENT_MODULE_PARM(force_##name,					\
-		       "List of adapter,address pairs which are "	\
-		       "unquestionably assumed to contain a `"		\
-		       # name "' chip")
-
-
 #define I2C_CLIENT_INSMOD_COMMON					\
-I2C_CLIENT_MODULE_PARM(probe, "List of adapter,address pairs to scan "	\
-		       "additionally");					\
-I2C_CLIENT_MODULE_PARM(ignore, "List of adapter,address pairs not to "	\
-		       "scan");						\
 static const struct i2c_client_address_data addr_data = {		\
 	.normal_i2c	= normal_i2c,					\
-	.probe		= probe,					\
-	.ignore		= ignore,					\
-	.forces		= forces,					\
 }
 
-#define I2C_CLIENT_FORCE_TEXT \
-	"List of adapter,address pairs to boldly assume to be present"
-
 /* These are the ones you want to use in your own drivers. Pick the one
    which matches the number of devices the driver differenciates between. */
 #define I2C_CLIENT_INSMOD						\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-static const unsigned short * const forces[] = { force, NULL };		\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_1(chip1)					\
 enum chips { any_chip, chip1 };						\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-static const unsigned short * const forces[] =	{ force,		\
-	force_##chip1, NULL };						\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_2(chip1, chip2)				\
 enum chips { any_chip, chip1, chip2 };					\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-static const unsigned short * const forces[] =	{ force,		\
-	force_##chip1, force_##chip2, NULL };				\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_3(chip1, chip2, chip3)			\
 enum chips { any_chip, chip1, chip2, chip3 };				\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-static const unsigned short * const forces[] =	{ force,		\
-	force_##chip1, force_##chip2, force_##chip3, NULL };		\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_4(chip1, chip2, chip3, chip4)			\
 enum chips { any_chip, chip1, chip2, chip3, chip4 };			\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip4);					\
-static const unsigned short * const forces[] =	{ force,		\
-	force_##chip1, force_##chip2, force_##chip3,			\
-	force_##chip4, NULL};						\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_5(chip1, chip2, chip3, chip4, chip5)		\
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 };		\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip4);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip5);					\
-static const unsigned short * const forces[] = { force,			\
-	force_##chip1, force_##chip2, force_##chip3,			\
-	force_##chip4, force_##chip5, NULL };				\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_6(chip1, chip2, chip3, chip4, chip5, chip6)	\
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 };	\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip4);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip5);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip6);					\
-static const unsigned short * const forces[] = { force,			\
-	force_##chip1, force_##chip2, force_##chip3,			\
-	force_##chip4, force_##chip5, force_##chip6, NULL };		\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_7(chip1, chip2, chip3, chip4, chip5, chip6, chip7) \
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
 	     chip7 };							\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip4);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip5);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip6);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip7);					\
-static const unsigned short * const forces[] = { force,			\
-	force_##chip1, force_##chip2, force_##chip3,			\
-	force_##chip4, force_##chip5, force_##chip6,			\
-	force_##chip7, NULL };						\
 I2C_CLIENT_INSMOD_COMMON
 
 #define I2C_CLIENT_INSMOD_8(chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8) \
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
 	     chip7, chip8 };						\
-I2C_CLIENT_MODULE_PARM(force, I2C_CLIENT_FORCE_TEXT);			\
-I2C_CLIENT_MODULE_PARM_FORCE(chip1);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip2);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip3);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip4);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip5);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip6);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip7);					\
-I2C_CLIENT_MODULE_PARM_FORCE(chip8);					\
-static const unsigned short * const forces[] = { force,			\
-	force_##chip1, force_##chip2, force_##chip3,			\
-	force_##chip4, force_##chip5, force_##chip6,			\
-	force_##chip7, force_##chip8, NULL };				\
 I2C_CLIENT_INSMOD_COMMON
 #endif /* __KERNEL__ */
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 6ad4c18884e864cf4c77f9074d3d1816063f99cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Nov 2009 13:31:39 +0100
Subject: sched: Fix balance vs hotplug race

Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo
sched domain managment) we have cpu_active_mask which is suppose to rule
scheduler migration and load-balancing, except it never (fully) did.

The particular problem being solved here is a crash in try_to_wake_up()
where select_task_rq() ends up selecting an offline cpu because
select_task_rq_fair() trusts the sched_domain tree to reflect the
current state of affairs, similarly select_task_rq_rt() trusts the
root_domain.

However, the sched_domains are updated from CPU_DEAD, which is after the
cpu is taken offline and after stop_machine is done. Therefore it can
race perfectly well with code assuming the domains are right.

Cure this by building the domains from cpu_active_mask on
CPU_DOWN_PREPARE.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpumask.h |  2 ++
 kernel/cpu.c            | 18 +++++++++++++-----
 kernel/cpuset.c         | 16 +++++++++-------
 kernel/sched.c          | 32 +++++++++++++++++---------------
 4 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 789cf5f920ce..d77b54733c5b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -84,6 +84,7 @@ extern const struct cpumask *const cpu_active_mask;
 #define num_online_cpus()	cpumask_weight(cpu_online_mask)
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
+#define num_active_cpus()	cpumask_weight(cpu_active_mask)
 #define cpu_online(cpu)		cpumask_test_cpu((cpu), cpu_online_mask)
 #define cpu_possible(cpu)	cpumask_test_cpu((cpu), cpu_possible_mask)
 #define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
@@ -92,6 +93,7 @@ extern const struct cpumask *const cpu_active_mask;
 #define num_online_cpus()	1
 #define num_possible_cpus()	1
 #define num_present_cpus()	1
+#define num_active_cpus()	1
 #define cpu_online(cpu)		((cpu) == 0)
 #define cpu_possible(cpu)	((cpu) == 0)
 #define cpu_present(cpu)	((cpu) == 0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ba0f1ecb212..b21688640377 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
 					hcpu, -1, &nr_calls);
 	if (err == NOTIFY_BAD) {
+		set_cpu_active(cpu, true);
+
 		nr_calls--;
 		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					  hcpu, nr_calls, NULL);
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	/* Ensure that we are not runnable on dying cpu */
 	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current,
-			     cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
+	set_cpus_allowed_ptr(current, cpu_active_mask);
 
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
+		set_cpu_active(cpu, true);
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					    hcpu) == NOTIFY_BAD)
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu)
 
 	err = _cpu_down(cpu, 0);
 
-	if (cpu_online(cpu))
-		set_cpu_active(cpu, true);
-
 out:
 	cpu_maps_update_done();
 	stop_machine_destroy();
@@ -387,6 +386,15 @@ int disable_nonboot_cpus(void)
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
 	cpumask_clear(frozen_cpus);
+
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		set_cpu_active(cpu, false);
+	}
+
+	synchronize_sched();
+
 	printk("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 43fb7e800028..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 		if (retval < 0)
 			return retval;
 
-		if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
+		if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
 			return -EINVAL;
 	}
 	retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
 		}
 
 		/* Continue past cpusets with all cpus, mems online */
-		if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
+		if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
 		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
 			continue;
 
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
 		/* Remove offline cpus and mems from this cpuset. */
 		mutex_lock(&callback_mutex);
 		cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
-			    cpu_online_mask);
+			    cpu_active_mask);
 		nodes_and(cp->mems_allowed, cp->mems_allowed,
 						node_states[N_HIGH_MEMORY]);
 		mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 	switch (phase) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
 		break;
 
 	default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 
 	cgroup_lock();
 	mutex_lock(&callback_mutex);
-	cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
 	mutex_unlock(&callback_mutex);
 	scan_for_empty_cpusets(&top_cpuset);
 	ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 
 void __init cpuset_init_smp(void)
 {
-	cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	hotcpu_notifier(cpuset_track_online_cpus, 0);
diff --git a/kernel/sched.c b/kernel/sched.c
index aa31244caa9f..281da29d0801 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4134,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	unsigned long flags;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_copy(cpus, cpu_online_mask);
+	cpumask_copy(cpus, cpu_active_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -4297,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	int all_pinned = 0;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_copy(cpus, cpu_online_mask);
+	cpumask_copy(cpus, cpu_active_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -4694,7 +4694,7 @@ int select_nohz_load_balancer(int stop_tick)
 		cpumask_set_cpu(cpu, nohz.cpu_mask);
 
 		/* time for ilb owner also to sleep */
-		if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+		if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
 			if (atomic_read(&nohz.load_balancer) == cpu)
 				atomic_set(&nohz.load_balancer, -1);
 			return 0;
@@ -7093,7 +7093,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpumask_intersects(new_mask, cpu_online_mask)) {
+	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -7115,7 +7115,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+	if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		struct task_struct *mt = rq->migration_thread;
 
@@ -7269,19 +7269,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 
 again:
 	/* Look for allowed, online CPU in same node. */
-	for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
 		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 			goto move;
 
 	/* Any allowed, online CPU? */
-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
 	if (dest_cpu < nr_cpu_ids)
 		goto move;
 
 	/* No more Mr. Nice Guy. */
 	if (dest_cpu >= nr_cpu_ids) {
 		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
-		dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
+		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
 
 		/*
 		 * Don't tell them about moving exiting tasks or
@@ -7310,7 +7310,7 @@ move:
  */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
+	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -7564,7 +7564,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
 static struct ctl_table_header *sd_sysctl_header;
 static void register_sched_domain_sysctl(void)
 {
-	int i, cpu_num = num_online_cpus();
+	int i, cpu_num = num_possible_cpus();
 	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
 	char buf[32];
 
@@ -7574,7 +7574,7 @@ static void register_sched_domain_sysctl(void)
 	if (entry == NULL)
 		return;
 
-	for_each_online_cpu(i) {
+	for_each_possible_cpu(i) {
 		snprintf(buf, 32, "cpu%d", i);
 		entry->procname = kstrdup(buf, GFP_KERNEL);
 		entry->mode = 0555;
@@ -9100,7 +9100,7 @@ match1:
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
 		doms_new = &fallback_doms;
-		cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
+		cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
@@ -9231,8 +9231,10 @@ static int update_sched_domains(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
 		partition_sched_domains(1, NULL, NULL);
 		return NOTIFY_OK;
 
@@ -9279,7 +9281,7 @@ void __init sched_init_smp(void)
 #endif
 	get_online_cpus();
 	mutex_lock(&sched_domains_mutex);
-	arch_init_sched_domains(cpu_online_mask);
+	arch_init_sched_domains(cpu_active_mask);
 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
 	if (cpumask_empty(non_isolated_cpus))
 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
-- 
cgit v1.2.3


From ed872d09effd54aa8ecb4ceedbc4dbab9592f337 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 7 Dec 2009 03:14:17 +0100
Subject: hw-breakpoints: Zeroe the breakpoint attrs on initialization

The perf attrs used to set up breakpoint parameters are often allocated
in the stack and not zeroed out before calling hw_breakpoint_init().
Handle it from this helper to avoid random attributes set by the stack.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/hw_breakpoint.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 4d14a384a01e..42da1ce19ec0 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -22,6 +22,8 @@ enum {
 
 static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 {
+	memset(attr, 0, sizeof(*attr));
+
 	attr->type = PERF_TYPE_BREAKPOINT;
 	attr->size = sizeof(*attr);
 	/*
-- 
cgit v1.2.3


From 36203c4f3d091b5f6c082663bd1f74273798043a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 4 Dec 2009 10:22:23 -0800
Subject: Input: add generic support for sparse keymaps

More and more devices choose to reimplement support for sparse keymaps
first introduced by wistron driver. Move it into a library module so it
can be easily used by interested parties.

Reviewed-by: Anisse Astier <anisse@astier.eu>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/DocBook/device-drivers.tmpl |   4 +
 drivers/input/Kconfig                     |  28 +++-
 drivers/input/Makefile                    |   1 +
 drivers/input/input-polldev.c             |  14 +-
 drivers/input/sparse-keymap.c             | 250 ++++++++++++++++++++++++++++++
 include/linux/input/sparse-keymap.h       |  62 ++++++++
 6 files changed, 347 insertions(+), 12 deletions(-)
 create mode 100644 drivers/input/sparse-keymap.c
 create mode 100644 include/linux/input/sparse-keymap.h

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index e994d1d9fbe6..f9a6e2c75f12 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -306,6 +306,10 @@ X!Idrivers/video/console/fonts.c
      <sect1><title>Matrix keyboars/keypads</title>
 !Iinclude/linux/input/matrix_keypad.h
      </sect1>
+     <sect1><title>Sparse keymap support</title>
+!Iinclude/linux/input/sparse-keymap.h
+!Edrivers/input/sparse-keymap.c
+     </sect1>
   </chapter>
 
   <chapter id="spi">
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index cd50c00ab20f..50af91ebd075 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -8,7 +8,7 @@ menu "Input device support"
 config INPUT
 	tristate "Generic input layer (needed for keyboard, mouse, ...)" if EMBEDDED
 	default y
-	---help---
+	help
 	  Say Y here if you have any input device (mouse, keyboard, tablet,
 	  joystick, steering wheel ...) connected to your system and want
 	  it to be available to applications. This includes standard PS/2
@@ -27,8 +27,7 @@ if INPUT
 
 config INPUT_FF_MEMLESS
 	tristate "Support for memoryless force-feedback devices"
-	default n
-	---help---
+	help
 	  Say Y here if you have memoryless force-feedback input device
 	  such as Logitech WingMan Force 3D, ThrustMaster FireStorm Dual
 	  Power 2, or similar. You will also need to enable hardware-specific
@@ -52,12 +51,25 @@ config INPUT_POLLDEV
 	  To compile this driver as a module, choose M here: the
 	  module will be called input-polldev.
 
+config INPUT_SPARSEKMAP
+	tristate "Sparse keymap support library"
+	help
+	  Say Y here if you are using a driver for an input
+	  device that uses sparse keymap. This option is only
+	  useful for out-of-tree drivers since in-tree drivers
+	  select it automatically.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called sparse-keymap.
+
 comment "Userland interfaces"
 
 config INPUT_MOUSEDEV
 	tristate "Mouse interface" if EMBEDDED
 	default y
-	---help---
+	help
 	  Say Y here if you want your mouse to be accessible as char devices
 	  13:32+ - /dev/input/mouseX and 13:63 - /dev/input/mice as an
 	  emulated IntelliMouse Explorer PS/2 mouse. That way, all user space
@@ -73,7 +85,7 @@ config INPUT_MOUSEDEV_PSAUX
 	bool "Provide legacy /dev/psaux device"
 	default y
 	depends on INPUT_MOUSEDEV
-	---help---
+	help
 	  Say Y here if you want your mouse also be accessible as char device
 	  10:1 - /dev/psaux. The data available through /dev/psaux is exactly
 	  the same as the data from /dev/input/mice.
@@ -103,7 +115,7 @@ config INPUT_MOUSEDEV_SCREEN_Y
 
 config INPUT_JOYDEV
 	tristate "Joystick interface"
-	---help---
+	help
 	  Say Y here if you want your joystick or gamepad to be
 	  accessible as char device 13:0+ - /dev/input/jsX device.
 
@@ -125,7 +137,7 @@ config INPUT_EVDEV
 
 config INPUT_EVBUG
 	tristate "Event debugging"
-	---help---
+	help
 	  Say Y here if you have a problem with the input subsystem and
 	  want all events (keypresses, mouse movements), to be output to
 	  the system log. While this is useful for debugging, it's also
@@ -140,7 +152,7 @@ config INPUT_EVBUG
 config INPUT_APMPOWER
 	tristate "Input Power Event -> APM Bridge" if EMBEDDED
 	depends on INPUT && APM_EMULATION
-	---help---
+	help
 	  Say Y here if you want suspend key events to trigger a user
 	  requested suspend through APM. This is useful on embedded
 	  systems where such behaviour is desired without userspace
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 4c9c745a7020..7ad212d31f99 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -9,6 +9,7 @@ input-core-objs := input.o input-compat.o ff-core.o
 
 obj-$(CONFIG_INPUT_FF_MEMLESS)	+= ff-memless.o
 obj-$(CONFIG_INPUT_POLLDEV)	+= input-polldev.o
+obj-$(CONFIG_INPUT_SPARSEKMAP)	+= sparse-keymap.o
 
 obj-$(CONFIG_INPUT_MOUSEDEV)	+= mousedev.o
 obj-$(CONFIG_INPUT_JOYDEV)	+= joydev.o
diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 6a2eb399b988..aa6713b4a988 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -212,7 +212,7 @@ EXPORT_SYMBOL(input_allocate_polled_device);
  * @dev: device to free
  *
  * The function frees memory allocated for polling device and drops
- * reference to the associated input device (if present).
+ * reference to the associated input device.
  */
 void input_free_polled_device(struct input_polled_dev *dev)
 {
@@ -258,6 +258,15 @@ int input_register_polled_device(struct input_polled_dev *dev)
 		return error;
 	}
 
+	/*
+	 * Take extra reference to the underlying input device so
+	 * that it survives call to input_unregister_polled_device()
+	 * and is deleted only after input_free_polled_device()
+	 * has been invoked. This is needed to ease task of freeing
+	 * sparse keymaps.
+	 */
+	input_get_device(input);
+
 	return 0;
 }
 EXPORT_SYMBOL(input_register_polled_device);
@@ -269,8 +278,6 @@ EXPORT_SYMBOL(input_register_polled_device);
  * The function unregisters previously registered polled input
  * device from input layer. Polling is stopped and device is
  * ready to be freed with call to input_free_polled_device().
- * Callers should not attempt to access dev->input pointer
- * after calling this function.
  */
 void input_unregister_polled_device(struct input_polled_dev *dev)
 {
@@ -278,7 +285,6 @@ void input_unregister_polled_device(struct input_polled_dev *dev)
 			   &input_polldev_attribute_group);
 
 	input_unregister_device(dev->input);
-	dev->input = NULL;
 }
 EXPORT_SYMBOL(input_unregister_polled_device);
 
diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c
new file mode 100644
index 000000000000..fbd3987af57f
--- /dev/null
+++ b/drivers/input/sparse-keymap.c
@@ -0,0 +1,250 @@
+/*
+ * Generic support for sparse keymaps
+ *
+ * Copyright (c) 2009 Dmitry Torokhov
+ *
+ * Derived from wistron button driver:
+ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+
+MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
+MODULE_DESCRIPTION("Generic support for sparse keymaps");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("0.1");
+
+/**
+ * sparse_keymap_entry_from_scancode - perform sparse keymap lookup
+ * @dev: Input device using sparse keymap
+ * @code: Scan code
+ *
+ * This function is used to perform &struct key_entry lookup in an
+ * input device using sparse keymap.
+ */
+struct key_entry *sparse_keymap_entry_from_scancode(struct input_dev *dev,
+						    unsigned int code)
+{
+	struct key_entry *key;
+
+	for (key = dev->keycode; key->type != KE_END; key++)
+		if (code == key->code)
+			return key;
+
+	return NULL;
+}
+EXPORT_SYMBOL(sparse_keymap_entry_from_scancode);
+
+/**
+ * sparse_keymap_entry_from_keycode - perform sparse keymap lookup
+ * @dev: Input device using sparse keymap
+ * @keycode: Key code
+ *
+ * This function is used to perform &struct key_entry lookup in an
+ * input device using sparse keymap.
+ */
+struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
+						   unsigned int keycode)
+{
+	struct key_entry *key;
+
+	for (key = dev->keycode; key->type != KE_END; key++)
+		if (key->type == KE_KEY && keycode == key->keycode)
+			return key;
+
+	return NULL;
+}
+EXPORT_SYMBOL(sparse_keymap_entry_from_keycode);
+
+static int sparse_keymap_getkeycode(struct input_dev *dev,
+				    int scancode, int *keycode)
+{
+	const struct key_entry *key =
+			sparse_keymap_entry_from_scancode(dev, scancode);
+
+	if (key && key->type == KE_KEY) {
+		*keycode = key->keycode;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int sparse_keymap_setkeycode(struct input_dev *dev,
+				    int scancode, int keycode)
+{
+	struct key_entry *key;
+	int old_keycode;
+
+	if (keycode < 0 || keycode > KEY_MAX)
+		return -EINVAL;
+
+	key = sparse_keymap_entry_from_scancode(dev, scancode);
+	if (key && key->type == KE_KEY) {
+		old_keycode = key->keycode;
+		key->keycode = keycode;
+		set_bit(keycode, dev->keybit);
+		if (!sparse_keymap_entry_from_keycode(dev, old_keycode))
+			clear_bit(old_keycode, dev->keybit);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * sparse_keymap_setup - set up sparse keymap for an input device
+ * @dev: Input device
+ * @keymap: Keymap in form of array of &key_entry structures ending
+ *	with %KE_END type entry
+ * @setup: Function that can be used to adjust keymap entries
+ *	depending on device's deeds, may be %NULL
+ *
+ * The function calculates size and allocates copy of the original
+ * keymap after which sets up input device event bits appropriately.
+ * Before destroying input device allocated keymap should be freed
+ * with a call to sparse_keymap_free().
+ */
+int sparse_keymap_setup(struct input_dev *dev,
+			const struct key_entry *keymap,
+			int (*setup)(struct input_dev *, struct key_entry *))
+{
+	size_t map_size = 1; /* to account for the last KE_END entry */
+	const struct key_entry *e;
+	struct key_entry *map, *entry;
+	int i;
+	int error;
+
+	for (e = keymap; e->type != KE_END; e++)
+		map_size++;
+
+	map = kcalloc(map_size, sizeof (struct key_entry), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	memcpy(map, keymap, map_size * sizeof (struct key_entry));
+
+	for (i = 0; i < map_size; i++) {
+		entry = &map[i];
+
+		if (setup) {
+			error = setup(dev, entry);
+			if (error)
+				goto err_out;
+		}
+
+		switch (entry->type) {
+		case KE_KEY:
+			__set_bit(EV_KEY, dev->evbit);
+			__set_bit(entry->keycode, dev->keybit);
+			break;
+
+		case KE_SW:
+			__set_bit(EV_SW, dev->evbit);
+			__set_bit(entry->sw.code, dev->swbit);
+			break;
+		}
+	}
+
+	dev->keycode = map;
+	dev->keycodemax = map_size;
+	dev->getkeycode = sparse_keymap_getkeycode;
+	dev->setkeycode = sparse_keymap_setkeycode;
+
+	return 0;
+
+ err_out:
+	kfree(keymap);
+	return error;
+
+}
+EXPORT_SYMBOL(sparse_keymap_setup);
+
+/**
+ * sparse_keymap_free - free memory allocated for sparse keymap
+ * @dev: Input device using sparse keymap
+ *
+ * This function is used to free memory allocated by sparse keymap
+ * in an input device that was set up by sparse_keymap_setup().
+ */
+void sparse_keymap_free(struct input_dev *dev)
+{
+	kfree(dev->keycode);
+	dev->keycode = NULL;
+	dev->keycodemax = 0;
+	dev->getkeycode = NULL;
+	dev->setkeycode = NULL;
+}
+EXPORT_SYMBOL(sparse_keymap_free);
+
+/**
+ * sparse_keymap_report_entry - report event corresponding to given key entry
+ * @dev: Input device for which event should be reported
+ * @ke: key entry describing event
+ * @value: Value that should be reported (ignored by %KE_SW entries)
+ * @autorelease: Signals whether release event should be emitted for %KE_KEY
+ *	entries right after reporting press event, ignored by all other
+ *	entries
+ *
+ * This function is used to report input event described by given
+ * &struct key_entry.
+ */
+void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke,
+				unsigned int value, bool autorelease)
+{
+	switch (ke->type) {
+	case KE_KEY:
+		input_report_key(dev, ke->keycode, value);
+		input_sync(dev);
+		if (value && autorelease) {
+			input_report_key(dev, ke->keycode, 0);
+			input_sync(dev);
+		}
+		break;
+
+	case KE_SW:
+		value = ke->sw.value;
+		/* fall through */
+
+	case KE_VSW:
+		input_report_switch(dev, ke->sw.code, value);
+		break;
+	}
+}
+EXPORT_SYMBOL(sparse_keymap_report_entry);
+
+/**
+ * sparse_keymap_report_event - report event corresponding to given scancode
+ * @dev: Input device using sparse keymap
+ * @code: Scan code
+ * @value: Value that should be reported (ignored by %KE_SW entries)
+ * @autorelease: Signals whether release event should be emitted for %KE_KEY
+ *	entries right after reporting press event, ignored by all other
+ *	entries
+ *
+ * This function is used to perform lookup in an input device using sparse
+ * keymap and report corresponding event. Returns %true if lookup was
+ * successful and %false otherwise.
+ */
+bool sparse_keymap_report_event(struct input_dev *dev, unsigned int code,
+				unsigned int value, bool autorelease)
+{
+	const struct key_entry *ke =
+		sparse_keymap_entry_from_scancode(dev, code);
+
+	if (ke) {
+		sparse_keymap_report_entry(dev, ke, value, autorelease);
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(sparse_keymap_report_event);
+
diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h
new file mode 100644
index 000000000000..52db62064c6e
--- /dev/null
+++ b/include/linux/input/sparse-keymap.h
@@ -0,0 +1,62 @@
+#ifndef _SPARSE_KEYMAP_H
+#define _SPARSE_KEYMAP_H
+
+/*
+ * Copyright (c) 2009 Dmitry Torokhov
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#define KE_END		0	/* Indicates end of keymap */
+#define KE_KEY		1	/* Ordinary key/button */
+#define KE_SW		2	/* Switch (predetermined value) */
+#define KE_VSW		3	/* Switch (value supplied at runtime) */
+#define KE_IGNORE	4	/* Known entry that should be ignored */
+#define KE_LAST		KE_IGNORE
+
+/**
+ * struct key_entry - keymap entry for use in sparse keymap
+ * @type: Type of the key entry (KE_KEY, KE_SW, KE_VSW, KE_END);
+ *	drivers are allowed to extend the list with their own
+ *	private definitions.
+ * @code: Device-specific data identifying the button/switch
+ * @keycode: KEY_* code assigned to a key/button
+ * @sw.code: SW_* code assigned to a switch
+ * @sw.value: Value that should be sent in an input even when KE_SW
+ *	switch is toggled. KE_VSW switches ignore this field and
+ *	expect driver to supply value for the event.
+ *
+ * This structure defines an entry in a sparse keymap used by some
+ * input devices for which traditional table-based approach is not
+ * suitable.
+ */
+struct key_entry {
+	int type;		/* See KE_* above */
+	u32 code;
+	union {
+		u16 keycode;		/* For KE_KEY */
+		struct {		/* For KE_SW, KE_VSW */
+			u8 code;
+			u8 value;	/* For KE_SW, ignored by KE_VSW */
+		} sw;
+	};
+};
+
+struct key_entry *sparse_keymap_entry_from_scancode(struct input_dev *dev,
+						    unsigned int code);
+struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
+						   unsigned int code);
+int sparse_keymap_setup(struct input_dev *dev,
+			const struct key_entry *keymap,
+			int (*setup)(struct input_dev *, struct key_entry *));
+void sparse_keymap_free(struct input_dev *dev);
+
+void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke,
+				unsigned int value, bool autorelease);
+
+bool sparse_keymap_report_event(struct input_dev *dev, unsigned int code,
+				unsigned int value, bool autorelease);
+
+#endif /* _SPARSE_KEYMAP_H */
-- 
cgit v1.2.3


From 2ff6cfd70720780234fdfea636218c2a62b31287 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 7 Dec 2009 17:12:58 +0100
Subject: perf events: hw_breakpoints: Don't include asm/hw_breakpoint.h in
 user space

asm/hw_breakpoint.h is evidently a kernel internal file and
should not be included globally, not even under an #ifdef.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <200912071712.58650.arnd@arndb.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 89098e35a036..bf3329413e18 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,10 +18,6 @@
 #include <linux/ioctl.h>
 #include <asm/byteorder.h>
 
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-#include <asm/hw_breakpoint.h>
-#endif
-
 /*
  * User-space ABI bits:
  */
@@ -451,6 +447,10 @@ enum perf_callchain_context {
 # include <asm/perf_event.h>
 #endif
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/rculist.h>
-- 
cgit v1.2.3


From e15a113700324f7fdcee95589875daed2b98a2fe Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Nov 2009 03:02:02 +0000
Subject: powerpc/kvm: Sync guest visible MMU state

Currently userspace has no chance to find out which virtual address space we're
in and resolve addresses. While that is a big problem for migration, it's also
unpleasent when debugging, as gdb and the monitor don't work on virtual
addresses.

This patch exports enough of the MMU segment state to userspace to make
debugging work and thus also includes the groundwork for migration.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm.h        | 18 ++++++++++++-
 arch/powerpc/include/asm/kvm_asm.h    |  1 +
 arch/powerpc/include/asm/kvm_book3s.h |  3 +++
 arch/powerpc/kvm/book3s.c             | 49 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_64_emulate.c  | 38 ++++++++++++++++-----------
 arch/powerpc/kvm/book3s_64_mmu.c      |  2 ++
 arch/powerpc/kvm/powerpc.c            |  3 +++
 include/linux/kvm.h                   |  3 +++
 8 files changed, 101 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index c9ca97f43bc1..81f3b0b5601e 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -47,7 +47,23 @@ struct kvm_regs {
 
 struct kvm_sregs {
 	__u32 pvr;
-	char pad[1020];
+	union {
+		struct {
+			__u64 sdr1;
+			struct {
+				struct {
+					__u64 slbe;
+					__u64 slbv;
+				} slb[64];
+			} ppc64;
+			struct {
+				__u32 sr[16];
+				__u64 ibat[8]; 
+				__u64 dbat[8]; 
+			} ppc32;
+		} s;
+		__u8 pad[1020];
+	} u;
 };
 
 struct kvm_fpu {
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 19ddb352fd0f..af2abe74f544 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -87,6 +87,7 @@
 #define BOOK3S_IRQPRIO_MAX			16
 
 #define BOOK3S_HFLAG_DCBZ32			0x1
+#define BOOK3S_HFLAG_SLB			0x2
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index c6011336371e..74b7369770d0 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -46,6 +46,7 @@ struct kvmppc_sr {
 };
 
 struct kvmppc_bat {
+	u64 raw;
 	u32 bepi;
 	u32 bepi_mask;
 	bool vs;
@@ -113,6 +114,8 @@ extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, boo
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
+			   bool upper, u32 val);
 
 extern u32 kvmppc_trampoline_lowmem;
 extern u32 kvmppc_trampoline_enter;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 42037d46a416..3e294bd9b8c6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -281,6 +281,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 {
+	vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
 	vcpu->arch.pvr = pvr;
 	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
 		kvmppc_mmu_book3s_64_init(vcpu);
@@ -762,14 +763,62 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
 	sregs->pvr = vcpu->arch.pvr;
+
+	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		for (i = 0; i < 64; i++) {
+			sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
+			sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
+		}
+	} else {
+		for (i = 0; i < 16; i++) {
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
+		}
+		for (i = 0; i < 8; i++) {
+			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+		}
+	}
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
 	kvmppc_set_pvr(vcpu, sregs->pvr);
+
+	vcpu3s->sdr1 = sregs->u.s.sdr1;
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		for (i = 0; i < 64; i++) {
+			vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
+						    sregs->u.s.ppc64.slb[i].slbe);
+		}
+	} else {
+		for (i = 0; i < 16; i++) {
+			vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+		}
+		for (i = 0; i < 8; i++) {
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+				       (u32)sregs->u.s.ppc32.ibat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+				       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+				       (u32)sregs->u.s.ppc32.dbat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+				       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+		}
+	}
+
+	/* Flush the MMU after messing with the segments */
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c
index c343e67306e0..1027eac6d474 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -185,7 +185,27 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return emulated;
 }
 
-static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u64 val)
+void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
+                    u32 val)
+{
+	if (upper) {
+		/* Upper BAT */
+		u32 bl = (val >> 2) & 0x7ff;
+		bat->bepi_mask = (~bl << 17);
+		bat->bepi = val & 0xfffe0000;
+		bat->vs = (val & 2) ? 1 : 0;
+		bat->vp = (val & 1) ? 1 : 0;
+		bat->raw = (bat->raw & 0xffffffff00000000ULL) | val;
+	} else {
+		/* Lower BAT */
+		bat->brpn = val & 0xfffe0000;
+		bat->wimg = (val >> 3) & 0xf;
+		bat->pp = val & 3;
+		bat->raw = (bat->raw & 0x00000000ffffffffULL) | ((u64)val << 32);
+	}
+}
+
+static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	struct kvmppc_bat *bat;
@@ -207,19 +227,7 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u64 val)
 		BUG();
 	}
 
-	if (!(sprn % 2)) {
-		/* Upper BAT */
-		u32 bl = (val >> 2) & 0x7ff;
-		bat->bepi_mask = (~bl << 17);
-		bat->bepi = val & 0xfffe0000;
-		bat->vs = (val & 2) ? 1 : 0;
-		bat->vp = (val & 1) ? 1 : 0;
-	} else {
-		/* Lower BAT */
-		bat->brpn = val & 0xfffe0000;
-		bat->wimg = (val >> 3) & 0xf;
-		bat->pp = val & 3;
-	}
+	kvmppc_set_bat(vcpu, bat, !(sprn % 2), val);
 }
 
 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
@@ -243,7 +251,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_IBAT4U ... SPRN_IBAT7L:
 	case SPRN_DBAT0U ... SPRN_DBAT3L:
 	case SPRN_DBAT4U ... SPRN_DBAT7L:
-		kvmppc_write_bat(vcpu, sprn, vcpu->arch.gpr[rs]);
+		kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
 		/* BAT writes happen so rarely that we're ok to flush
 		 * everything here */
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index a31f9c677d23..5598f88f142e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -473,4 +473,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
 	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
 	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
 	mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 692c3709011e..d82551efbfbf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -144,6 +144,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
+	case KVM_CAP_PPC_SEGSTATE:
+		r = 1;
+		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f8f8900fc5ec..caf6173bd2e8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -436,6 +436,9 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+/* KVM upstream has more features, but we synched this number.
+   Linux, please remove this comment on rebase. */
+#define KVM_CAP_PPC_SEGSTATE 43
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From d5af91a1faca68e9a8cc493b85aa7b194b6128aa Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@mocean-labs.com>
Date: Fri, 13 Nov 2009 12:28:39 +0100
Subject: xilinx_spi: Split into of driver and generic part.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch splits the xilinx_spi driver into a generic part and a
OF driver part.

The reason for this is to later add in a platform driver as well.

Tested-by: John Linn <John.Linn@xilinx.com>
Signed-off-by: Richard Röjfors <richard.rojfors@mocean-labs.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig            |   9 ++-
 drivers/spi/Makefile           |   1 +
 drivers/spi/xilinx_spi.c       | 159 ++++++++++-------------------------------
 drivers/spi/xilinx_spi.h       |  32 +++++++++
 drivers/spi/xilinx_spi_of.c    | 133 ++++++++++++++++++++++++++++++++++
 include/linux/spi/xilinx_spi.h |  16 +++++
 6 files changed, 229 insertions(+), 121 deletions(-)
 create mode 100644 drivers/spi/xilinx_spi.h
 create mode 100644 drivers/spi/xilinx_spi_of.c
 create mode 100644 include/linux/spi/xilinx_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2a4ba1993083..f34a2d16d18f 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -244,14 +244,21 @@ config SPI_TXX9
 
 config SPI_XILINX
 	tristate "Xilinx SPI controller"
-	depends on (XILINX_VIRTEX || MICROBLAZE) && EXPERIMENTAL
+	depends on EXPERIMENTAL
 	select SPI_BITBANG
+	select SPI_XILINX_OF if (XILINX_VIRTEX || MICROBLAZE)
 	help
 	  This exposes the SPI controller IP from the Xilinx EDK.
 
 	  See the "OPB Serial Peripheral Interface (SPI) (v1.00e)"
 	  Product Specification document (DS464) for hardware details.
 
+config SPI_XILINX_OF
+	tristate "Xilinx SPI controller OF device"
+	depends on SPI_XILINX && (XILINX_VIRTEX || MICROBLAZE)
+	help
+	  This is the OF driver for the SPI controller IP from the Xilinx EDK.
+
 #
 # Add new SPI master controllers in alphabetical order above this line
 #
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index e3f092a9afa5..01c409548044 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi_s3c24xx.o
 obj-$(CONFIG_SPI_TXX9)			+= spi_txx9.o
 obj-$(CONFIG_SPI_XILINX)		+= xilinx_spi.o
+obj-$(CONFIG_SPI_XILINX_OF)		+= xilinx_spi_of.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi_sh_sci.o
 obj-$(CONFIG_SPI_STMP3XXX)		+= spi_stmp.o
 # 	... add above this line ...
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 5a143b9f6361..69fa26d82ce4 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -14,16 +14,14 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/platform_device.h>
-
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
-#include <linux/of_spi.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/io.h>
 
+#include "xilinx_spi.h"
+#include <linux/spi/xilinx_spi.h>
+
 #define XILINX_SPI_NAME "xilinx_spi"
 
 /* Register definitions as per "OPB Serial Peripheral Interface (SPI) (v1.00e)
@@ -78,7 +76,7 @@ struct xilinx_spi {
 	/* bitbang has to be first */
 	struct spi_bitbang bitbang;
 	struct completion done;
-
+	struct resource mem; /* phys mem */
 	void __iomem	*regs;	/* virt. address of the control registers */
 
 	u32		irq;
@@ -284,40 +282,22 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int __init xilinx_spi_of_probe(struct of_device *ofdev,
-					const struct of_device_id *match)
+struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
+	u32 irq, s16 bus_num)
 {
 	struct spi_master *master;
 	struct xilinx_spi *xspi;
-	struct resource r_irq_struct;
-	struct resource r_mem_struct;
-
-	struct resource *r_irq = &r_irq_struct;
-	struct resource *r_mem = &r_mem_struct;
-	int rc = 0;
-	const u32 *prop;
-	int len;
-
-	/* Get resources(memory, IRQ) associated with the device */
-	master = spi_alloc_master(&ofdev->dev, sizeof(struct xilinx_spi));
+	struct xspi_platform_data *pdata = dev->platform_data;
+	int ret;
 
-	if (master == NULL) {
-		return -ENOMEM;
-	}
-
-	dev_set_drvdata(&ofdev->dev, master);
-
-	rc = of_address_to_resource(ofdev->node, 0, r_mem);
-	if (rc) {
-		dev_warn(&ofdev->dev, "invalid address\n");
-		goto put_master;
+	if (!pdata) {
+		dev_err(dev, "No platform data attached\n");
+		return NULL;
 	}
 
-	rc = of_irq_to_resource(ofdev->node, 0, r_irq);
-	if (rc == NO_IRQ) {
-		dev_warn(&ofdev->dev, "no IRQ found\n");
-		goto put_master;
-	}
+	master = spi_alloc_master(dev, sizeof(struct xilinx_spi));
+	if (!master)
+		return NULL;
 
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA;
@@ -330,128 +310,67 @@ static int __init xilinx_spi_of_probe(struct of_device *ofdev,
 	xspi->bitbang.master->setup = xilinx_spi_setup;
 	init_completion(&xspi->done);
 
-	xspi->irq = r_irq->start;
-
-	if (!request_mem_region(r_mem->start,
-			r_mem->end - r_mem->start + 1, XILINX_SPI_NAME)) {
-		rc = -ENXIO;
-		dev_warn(&ofdev->dev, "memory request failure\n");
+	if (!request_mem_region(mem->start, resource_size(mem),
+		XILINX_SPI_NAME))
 		goto put_master;
-	}
 
-	xspi->regs = ioremap(r_mem->start, r_mem->end - r_mem->start + 1);
+	xspi->regs = ioremap(mem->start, resource_size(mem));
 	if (xspi->regs == NULL) {
-		rc = -ENOMEM;
-		dev_warn(&ofdev->dev, "ioremap failure\n");
-		goto release_mem;
+		dev_warn(dev, "ioremap failure\n");
+		goto map_failed;
 	}
-	xspi->irq = r_irq->start;
 
-	/* dynamic bus assignment */
-	master->bus_num = -1;
+	master->bus_num = bus_num;
+	master->num_chipselect = pdata->num_chipselect;
 
-	/* number of slave select bits is required */
-	prop = of_get_property(ofdev->node, "xlnx,num-ss-bits", &len);
-	if (!prop || len < sizeof(*prop)) {
-		dev_warn(&ofdev->dev, "no 'xlnx,num-ss-bits' property\n");
-		goto unmap_io;
-	}
-	master->num_chipselect = *prop;
+	xspi->mem = *mem;
+	xspi->irq = irq;
 
 	/* SPI controller initializations */
 	xspi_init_hw(xspi->regs);
 
 	/* Register for SPI Interrupt */
-	rc = request_irq(xspi->irq, xilinx_spi_irq, 0, XILINX_SPI_NAME, xspi);
-	if (rc != 0) {
-		dev_warn(&ofdev->dev, "irq request failure: %d\n", xspi->irq);
+	ret = request_irq(xspi->irq, xilinx_spi_irq, 0, XILINX_SPI_NAME, xspi);
+	if (ret)
 		goto unmap_io;
-	}
 
-	rc = spi_bitbang_start(&xspi->bitbang);
-	if (rc != 0) {
-		dev_err(&ofdev->dev, "spi_bitbang_start FAILED\n");
+	ret = spi_bitbang_start(&xspi->bitbang);
+	if (ret) {
+		dev_err(dev, "spi_bitbang_start FAILED\n");
 		goto free_irq;
 	}
 
-	dev_info(&ofdev->dev, "at 0x%08X mapped to 0x%08X, irq=%d\n",
-			(unsigned int)r_mem->start, (u32)xspi->regs, xspi->irq);
-
-	/* Add any subnodes on the SPI bus */
-	of_register_spi_devices(master, ofdev->node);
-
-	return rc;
+	dev_info(dev, "at 0x%08X mapped to 0x%08X, irq=%d\n",
+		(u32)mem->start, (u32)xspi->regs, xspi->irq);
+	return master;
 
 free_irq:
 	free_irq(xspi->irq, xspi);
 unmap_io:
 	iounmap(xspi->regs);
-release_mem:
-	release_mem_region(r_mem->start, resource_size(r_mem));
+map_failed:
+	release_mem_region(mem->start, resource_size(mem));
 put_master:
 	spi_master_put(master);
-	return rc;
+	return NULL;
 }
+EXPORT_SYMBOL(xilinx_spi_init);
 
-static int __devexit xilinx_spi_remove(struct of_device *ofdev)
+void xilinx_spi_deinit(struct spi_master *master)
 {
 	struct xilinx_spi *xspi;
-	struct spi_master *master;
-	struct resource r_mem;
 
-	master = platform_get_drvdata(ofdev);
 	xspi = spi_master_get_devdata(master);
 
 	spi_bitbang_stop(&xspi->bitbang);
 	free_irq(xspi->irq, xspi);
 	iounmap(xspi->regs);
-	if (!of_address_to_resource(ofdev->node, 0, &r_mem))
-		release_mem_region(r_mem.start, resource_size(&r_mem));
-	dev_set_drvdata(&ofdev->dev, 0);
-	spi_master_put(xspi->bitbang.master);
-
-	return 0;
-}
-
-/* work with hotplug and coldplug */
-MODULE_ALIAS("platform:" XILINX_SPI_NAME);
-
-static int __exit xilinx_spi_of_remove(struct of_device *op)
-{
-	return xilinx_spi_remove(op);
-}
 
-static struct of_device_id xilinx_spi_of_match[] = {
-	{ .compatible = "xlnx,xps-spi-2.00.a", },
-	{ .compatible = "xlnx,xps-spi-2.00.b", },
-	{}
-};
-
-MODULE_DEVICE_TABLE(of, xilinx_spi_of_match);
-
-static struct of_platform_driver xilinx_spi_of_driver = {
-	.owner = THIS_MODULE,
-	.name = "xilinx-xps-spi",
-	.match_table = xilinx_spi_of_match,
-	.probe = xilinx_spi_of_probe,
-	.remove = __exit_p(xilinx_spi_of_remove),
-	.driver = {
-		.name = "xilinx-xps-spi",
-		.owner = THIS_MODULE,
-	},
-};
-
-static int __init xilinx_spi_init(void)
-{
-	return of_register_platform_driver(&xilinx_spi_of_driver);
+	release_mem_region(xspi->mem.start, resource_size(&xspi->mem));
+	spi_master_put(xspi->bitbang.master);
 }
-module_init(xilinx_spi_init);
+EXPORT_SYMBOL(xilinx_spi_deinit);
 
-static void __exit xilinx_spi_exit(void)
-{
-	of_unregister_platform_driver(&xilinx_spi_of_driver);
-}
-module_exit(xilinx_spi_exit);
 MODULE_AUTHOR("MontaVista Software, Inc. <source@mvista.com>");
 MODULE_DESCRIPTION("Xilinx SPI driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/xilinx_spi.h b/drivers/spi/xilinx_spi.h
new file mode 100644
index 000000000000..d211accf68d2
--- /dev/null
+++ b/drivers/spi/xilinx_spi.h
@@ -0,0 +1,32 @@
+/*
+ * Xilinx SPI device driver API and platform data header file
+ *
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _XILINX_SPI_H_
+#define _XILINX_SPI_H_
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+
+#define XILINX_SPI_NAME "xilinx_spi"
+
+struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
+	u32 irq, s16 bus_num);
+
+void xilinx_spi_deinit(struct spi_master *master);
+#endif
diff --git a/drivers/spi/xilinx_spi_of.c b/drivers/spi/xilinx_spi_of.c
new file mode 100644
index 000000000000..151aa13494bd
--- /dev/null
+++ b/drivers/spi/xilinx_spi_of.c
@@ -0,0 +1,133 @@
+/*
+ * Xilinx SPI OF device driver
+ *
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Xilinx SPI devices as OF devices
+ *
+ * Inspired by xilinx_spi.c, 2002-2007 (c) MontaVista Software, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/of_spi.h>
+
+#include <linux/spi/xilinx_spi.h>
+#include "xilinx_spi.h"
+
+
+static int __devinit xilinx_spi_of_probe(struct of_device *ofdev,
+	const struct of_device_id *match)
+{
+	struct spi_master *master;
+	struct xspi_platform_data *pdata;
+	struct resource r_mem;
+	struct resource r_irq;
+	int rc = 0;
+	const u32 *prop;
+	int len;
+
+	rc = of_address_to_resource(ofdev->node, 0, &r_mem);
+	if (rc) {
+		dev_warn(&ofdev->dev, "invalid address\n");
+		return rc;
+	}
+
+	rc = of_irq_to_resource(ofdev->node, 0, &r_irq);
+	if (rc == NO_IRQ) {
+		dev_warn(&ofdev->dev, "no IRQ found\n");
+		return -ENODEV;
+	}
+
+	ofdev->dev.platform_data =
+		kzalloc(sizeof(struct xspi_platform_data), GFP_KERNEL);
+	pdata = ofdev->dev.platform_data;
+	if (!pdata)
+		return -ENOMEM;
+
+	/* number of slave select bits is required */
+	prop = of_get_property(ofdev->node, "xlnx,num-ss-bits", &len);
+	if (!prop || len < sizeof(*prop)) {
+		dev_warn(&ofdev->dev, "no 'xlnx,num-ss-bits' property\n");
+		return -EINVAL;
+	}
+	pdata->num_chipselect = *prop;
+	master = xilinx_spi_init(&ofdev->dev, &r_mem, r_irq.start, -1);
+	if (!master)
+		return -ENODEV;
+
+	dev_set_drvdata(&ofdev->dev, master);
+
+	/* Add any subnodes on the SPI bus */
+	of_register_spi_devices(master, ofdev->node);
+
+	return 0;
+}
+
+static int __devexit xilinx_spi_remove(struct of_device *ofdev)
+{
+	xilinx_spi_deinit(dev_get_drvdata(&ofdev->dev));
+	dev_set_drvdata(&ofdev->dev, 0);
+	kfree(ofdev->dev.platform_data);
+	ofdev->dev.platform_data = NULL;
+	return 0;
+}
+
+static int __exit xilinx_spi_of_remove(struct of_device *op)
+{
+	return xilinx_spi_remove(op);
+}
+
+static struct of_device_id xilinx_spi_of_match[] = {
+	{ .compatible = "xlnx,xps-spi-2.00.a", },
+	{ .compatible = "xlnx,xps-spi-2.00.b", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, xilinx_spi_of_match);
+
+static struct of_platform_driver xilinx_spi_of_driver = {
+	.match_table = xilinx_spi_of_match,
+	.probe = xilinx_spi_of_probe,
+	.remove = __exit_p(xilinx_spi_of_remove),
+	.driver = {
+		.name = "xilinx-xps-spi",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init xilinx_spi_of_init(void)
+{
+	return of_register_platform_driver(&xilinx_spi_of_driver);
+}
+module_init(xilinx_spi_of_init);
+
+static void __exit xilinx_spi_of_exit(void)
+{
+	of_unregister_platform_driver(&xilinx_spi_of_driver);
+}
+module_exit(xilinx_spi_of_exit);
+
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_DESCRIPTION("Xilinx SPI platform driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/xilinx_spi.h b/include/linux/spi/xilinx_spi.h
new file mode 100644
index 000000000000..06df0aba3aee
--- /dev/null
+++ b/include/linux/spi/xilinx_spi.h
@@ -0,0 +1,16 @@
+#ifndef __LINUX_SPI_XILINX_SPI_H
+#define __LINUX_SPI_XILINX_SPI_H
+
+/**
+ * struct xspi_platform_data - Platform data of the Xilinx SPI driver
+ * @num_chipselect:	Number of chip select by the IP
+ * @devices:		Devices to add when the driver is probed.
+ * @num_devices:	Number of devices in the devices array.
+ */
+struct xspi_platform_data {
+	u16 num_chipselect;
+	struct spi_board_info *devices;
+	u8 num_devices;
+};
+
+#endif /* __LINUX_SPI_XILINX_SPI_H */
-- 
cgit v1.2.3


From 86fc593599c11b62a11c85b4d7b709089df15c29 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@mocean-labs.com>
Date: Fri, 13 Nov 2009 12:28:49 +0100
Subject: xilinx_spi: Switch to iomem functions and support little endian.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch changes the out_(be)(8|16|32) and in_(be)(8|16|32) calls to 32 bits ioread/iowrite.

The read and write function are attached to the internal struct as callbacks, callback
is selected depending on endianess.

This will also build on platforms not supporting the in/out calls for instance x86.

Acked-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: John Linn <John.Linn@xilinx.com>
Signed-off-by: Richard Röjfors <richard.rojfors@mocean-labs.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig            |  2 +-
 drivers/spi/xilinx_spi.c       | 96 ++++++++++++++++++++++++------------------
 include/linux/spi/xilinx_spi.h |  2 +
 3 files changed, 57 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f34a2d16d18f..b528271c72ef 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -244,7 +244,7 @@ config SPI_TXX9
 
 config SPI_XILINX
 	tristate "Xilinx SPI controller"
-	depends on EXPERIMENTAL
+	depends on HAS_IOMEM && EXPERIMENTAL
 	select SPI_BITBANG
 	select SPI_XILINX_OF if (XILINX_VIRTEX || MICROBLAZE)
 	help
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 69fa26d82ce4..135a3a5931a4 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -27,7 +27,7 @@
 /* Register definitions as per "OPB Serial Peripheral Interface (SPI) (v1.00e)
  * Product Specification", DS464
  */
-#define XSPI_CR_OFFSET		0x62	/* 16-bit Control Register */
+#define XSPI_CR_OFFSET		0x60	/* 16-bit Control Register */
 
 #define XSPI_CR_ENABLE		0x02
 #define XSPI_CR_MASTER_MODE	0x04
@@ -39,7 +39,7 @@
 #define XSPI_CR_MANUAL_SSELECT	0x80
 #define XSPI_CR_TRANS_INHIBIT	0x100
 
-#define XSPI_SR_OFFSET		0x67	/* 8-bit Status Register */
+#define XSPI_SR_OFFSET		0x64	/* 8-bit Status Register */
 
 #define XSPI_SR_RX_EMPTY_MASK	0x01	/* Receive FIFO is empty */
 #define XSPI_SR_RX_FULL_MASK	0x02	/* Receive FIFO is full */
@@ -47,8 +47,8 @@
 #define XSPI_SR_TX_FULL_MASK	0x08	/* Transmit FIFO is full */
 #define XSPI_SR_MODE_FAULT_MASK	0x10	/* Mode fault error */
 
-#define XSPI_TXD_OFFSET		0x6b	/* 8-bit Data Transmit Register */
-#define XSPI_RXD_OFFSET		0x6f	/* 8-bit Data Receive Register */
+#define XSPI_TXD_OFFSET		0x68	/* 8-bit Data Transmit Register */
+#define XSPI_RXD_OFFSET		0x6c	/* 8-bit Data Receive Register */
 
 #define XSPI_SSR_OFFSET		0x70	/* 32-bit Slave Select Register */
 
@@ -86,25 +86,29 @@ struct xilinx_spi {
 	u8 *rx_ptr;		/* pointer in the Tx buffer */
 	const u8 *tx_ptr;	/* pointer in the Rx buffer */
 	int remaining_bytes;	/* the number of bytes left to transfer */
+	unsigned int (*read_fn) (void __iomem *);
+	void (*write_fn) (u32, void __iomem *);
 };
 
-static void xspi_init_hw(void __iomem *regs_base)
+static void xspi_init_hw(struct xilinx_spi *xspi)
 {
+	void __iomem *regs_base = xspi->regs;
+
 	/* Reset the SPI device */
-	out_be32(regs_base + XIPIF_V123B_RESETR_OFFSET,
-		 XIPIF_V123B_RESET_MASK);
+	xspi->write_fn(XIPIF_V123B_RESET_MASK,
+		regs_base + XIPIF_V123B_RESETR_OFFSET);
 	/* Disable all the interrupts just in case */
-	out_be32(regs_base + XIPIF_V123B_IIER_OFFSET, 0);
+	xspi->write_fn(0, regs_base + XIPIF_V123B_IIER_OFFSET);
 	/* Enable the global IPIF interrupt */
-	out_be32(regs_base + XIPIF_V123B_DGIER_OFFSET,
-		 XIPIF_V123B_GINTR_ENABLE);
+	xspi->write_fn(XIPIF_V123B_GINTR_ENABLE,
+		regs_base + XIPIF_V123B_DGIER_OFFSET);
 	/* Deselect the slave on the SPI bus */
-	out_be32(regs_base + XSPI_SSR_OFFSET, 0xffff);
+	xspi->write_fn(0xffff, regs_base + XSPI_SSR_OFFSET);
 	/* Disable the transmitter, enable Manual Slave Select Assertion,
 	 * put SPI controller into master mode, and enable it */
-	out_be16(regs_base + XSPI_CR_OFFSET,
-		 XSPI_CR_TRANS_INHIBIT | XSPI_CR_MANUAL_SSELECT
-		 | XSPI_CR_MASTER_MODE | XSPI_CR_ENABLE);
+	xspi->write_fn(XSPI_CR_TRANS_INHIBIT | XSPI_CR_MANUAL_SSELECT |
+		XSPI_CR_MASTER_MODE | XSPI_CR_ENABLE,
+		regs_base + XSPI_CR_OFFSET);
 }
 
 static void xilinx_spi_chipselect(struct spi_device *spi, int is_on)
@@ -113,16 +117,16 @@ static void xilinx_spi_chipselect(struct spi_device *spi, int is_on)
 
 	if (is_on == BITBANG_CS_INACTIVE) {
 		/* Deselect the slave on the SPI bus */
-		out_be32(xspi->regs + XSPI_SSR_OFFSET, 0xffff);
+		xspi->write_fn(0xffff, xspi->regs + XSPI_SSR_OFFSET);
 	} else if (is_on == BITBANG_CS_ACTIVE) {
 		/* Set the SPI clock phase and polarity */
-		u16 cr = in_be16(xspi->regs + XSPI_CR_OFFSET)
+		u16 cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET)
 			 & ~XSPI_CR_MODE_MASK;
 		if (spi->mode & SPI_CPHA)
 			cr |= XSPI_CR_CPHA;
 		if (spi->mode & SPI_CPOL)
 			cr |= XSPI_CR_CPOL;
-		out_be16(xspi->regs + XSPI_CR_OFFSET, cr);
+		xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET);
 
 		/* We do not check spi->max_speed_hz here as the SPI clock
 		 * frequency is not software programmable (the IP block design
@@ -130,8 +134,8 @@ static void xilinx_spi_chipselect(struct spi_device *spi, int is_on)
 		 */
 
 		/* Activate the chip select */
-		out_be32(xspi->regs + XSPI_SSR_OFFSET,
-			 ~(0x0001 << spi->chip_select));
+		xspi->write_fn(~(0x0001 << spi->chip_select),
+			xspi->regs + XSPI_SSR_OFFSET);
 	}
 }
 
@@ -178,15 +182,15 @@ static void xilinx_spi_fill_tx_fifo(struct xilinx_spi *xspi)
 	u8 sr;
 
 	/* Fill the Tx FIFO with as many bytes as possible */
-	sr = in_8(xspi->regs + XSPI_SR_OFFSET);
+	sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 	while ((sr & XSPI_SR_TX_FULL_MASK) == 0 && xspi->remaining_bytes > 0) {
-		if (xspi->tx_ptr) {
-			out_8(xspi->regs + XSPI_TXD_OFFSET, *xspi->tx_ptr++);
-		} else {
-			out_8(xspi->regs + XSPI_TXD_OFFSET, 0);
-		}
+		if (xspi->tx_ptr)
+			xspi->write_fn(*xspi->tx_ptr++,
+				xspi->regs + XSPI_TXD_OFFSET);
+		else
+			xspi->write_fn(0, xspi->regs + XSPI_TXD_OFFSET);
 		xspi->remaining_bytes--;
-		sr = in_8(xspi->regs + XSPI_SR_OFFSET);
+		sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 	}
 }
 
@@ -208,18 +212,19 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 	/* Enable the transmit empty interrupt, which we use to determine
 	 * progress on the transmission.
 	 */
-	ipif_ier = in_be32(xspi->regs + XIPIF_V123B_IIER_OFFSET);
-	out_be32(xspi->regs + XIPIF_V123B_IIER_OFFSET,
-		 ipif_ier | XSPI_INTR_TX_EMPTY);
+	ipif_ier = xspi->read_fn(xspi->regs + XIPIF_V123B_IIER_OFFSET);
+	xspi->write_fn(ipif_ier | XSPI_INTR_TX_EMPTY,
+		xspi->regs + XIPIF_V123B_IIER_OFFSET);
 
 	/* Start the transfer by not inhibiting the transmitter any longer */
-	cr = in_be16(xspi->regs + XSPI_CR_OFFSET) & ~XSPI_CR_TRANS_INHIBIT;
-	out_be16(xspi->regs + XSPI_CR_OFFSET, cr);
+	cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET) &
+		~XSPI_CR_TRANS_INHIBIT;
+	xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET);
 
 	wait_for_completion(&xspi->done);
 
 	/* Disable the transmit empty interrupt */
-	out_be32(xspi->regs + XIPIF_V123B_IIER_OFFSET, ipif_ier);
+	xspi->write_fn(ipif_ier, xspi->regs + XIPIF_V123B_IIER_OFFSET);
 
 	return t->len - xspi->remaining_bytes;
 }
@@ -236,8 +241,8 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
 	u32 ipif_isr;
 
 	/* Get the IPIF interrupts, and clear them immediately */
-	ipif_isr = in_be32(xspi->regs + XIPIF_V123B_IISR_OFFSET);
-	out_be32(xspi->regs + XIPIF_V123B_IISR_OFFSET, ipif_isr);
+	ipif_isr = xspi->read_fn(xspi->regs + XIPIF_V123B_IISR_OFFSET);
+	xspi->write_fn(ipif_isr, xspi->regs + XIPIF_V123B_IISR_OFFSET);
 
 	if (ipif_isr & XSPI_INTR_TX_EMPTY) {	/* Transmission completed */
 		u16 cr;
@@ -248,20 +253,20 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
 		 * transmitter while the Isr refills the transmit register/FIFO,
 		 * or make sure it is stopped if we're done.
 		 */
-		cr = in_be16(xspi->regs + XSPI_CR_OFFSET);
-		out_be16(xspi->regs + XSPI_CR_OFFSET,
-			 cr | XSPI_CR_TRANS_INHIBIT);
+		cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET);
+		xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT,
+			xspi->regs + XSPI_CR_OFFSET);
 
 		/* Read out all the data from the Rx FIFO */
-		sr = in_8(xspi->regs + XSPI_SR_OFFSET);
+		sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 		while ((sr & XSPI_SR_RX_EMPTY_MASK) == 0) {
 			u8 data;
 
-			data = in_8(xspi->regs + XSPI_RXD_OFFSET);
+			data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET);
 			if (xspi->rx_ptr) {
 				*xspi->rx_ptr++ = data;
 			}
-			sr = in_8(xspi->regs + XSPI_SR_OFFSET);
+			sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 		}
 
 		/* See if there is more data to send */
@@ -270,7 +275,7 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
 			/* Start the transfer by not inhibiting the
 			 * transmitter any longer
 			 */
-			out_be16(xspi->regs + XSPI_CR_OFFSET, cr);
+			xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET);
 		} else {
 			/* No more data to send.
 			 * Indicate the transfer is completed.
@@ -325,9 +330,16 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
 
 	xspi->mem = *mem;
 	xspi->irq = irq;
+	if (pdata->little_endian) {
+		xspi->read_fn = ioread32;
+		xspi->write_fn = iowrite32;
+	} else {
+		xspi->read_fn = ioread32be;
+		xspi->write_fn = iowrite32be;
+	}
 
 	/* SPI controller initializations */
-	xspi_init_hw(xspi->regs);
+	xspi_init_hw(xspi);
 
 	/* Register for SPI Interrupt */
 	ret = request_irq(xspi->irq, xilinx_spi_irq, 0, XILINX_SPI_NAME, xspi);
diff --git a/include/linux/spi/xilinx_spi.h b/include/linux/spi/xilinx_spi.h
index 06df0aba3aee..a705ad85eebe 100644
--- a/include/linux/spi/xilinx_spi.h
+++ b/include/linux/spi/xilinx_spi.h
@@ -4,11 +4,13 @@
 /**
  * struct xspi_platform_data - Platform data of the Xilinx SPI driver
  * @num_chipselect:	Number of chip select by the IP
+ * @little_endian	If registers should be accessed little endian or not
  * @devices:		Devices to add when the driver is probed.
  * @num_devices:	Number of devices in the devices array.
  */
 struct xspi_platform_data {
 	u16 num_chipselect;
+	bool little_endian;
 	struct spi_board_info *devices;
 	u8 num_devices;
 };
-- 
cgit v1.2.3


From c9da2e125588677d74324df5088149063d578e8f Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@mocean-labs.com>
Date: Fri, 13 Nov 2009 12:28:55 +0100
Subject: xilinx_spi: add support for the DS570 IP.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds in support for the DS570 IP.

It's register compatible with the DS464, but adds support for 8/16/32 SPI.

The 8/16/32 support is added by attaching callbacks reading/writing the
proper amount of data. To indicate to the driver which amount of bits
to use a new field is introduced in the platform data struct.

Acked-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: John Linn <John.Linn@xilinx.com>
Signed-off-by: Richard Röjfors <richard.rojfors@mocean-labs.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig            |   4 +-
 drivers/spi/xilinx_spi.c       | 118 ++++++++++++++++++++++++++++++-----------
 drivers/spi/xilinx_spi_of.c    |   1 +
 include/linux/spi/xilinx_spi.h |   6 ++-
 4 files changed, 95 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b528271c72ef..4dda097c239e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -243,7 +243,7 @@ config SPI_TXX9
 	  SPI driver for Toshiba TXx9 MIPS SoCs
 
 config SPI_XILINX
-	tristate "Xilinx SPI controller"
+	tristate "Xilinx SPI controller common module"
 	depends on HAS_IOMEM && EXPERIMENTAL
 	select SPI_BITBANG
 	select SPI_XILINX_OF if (XILINX_VIRTEX || MICROBLAZE)
@@ -253,6 +253,8 @@ config SPI_XILINX
 	  See the "OPB Serial Peripheral Interface (SPI) (v1.00e)"
 	  Product Specification document (DS464) for hardware details.
 
+	  Or for the DS570, see "XPS Serial Peripheral Interface (SPI) (v2.00b)"
+
 config SPI_XILINX_OF
 	tristate "Xilinx SPI controller OF device"
 	depends on SPI_XILINX && (XILINX_VIRTEX || MICROBLAZE)
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 135a3a5931a4..b927812822c1 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -27,7 +27,7 @@
 /* Register definitions as per "OPB Serial Peripheral Interface (SPI) (v1.00e)
  * Product Specification", DS464
  */
-#define XSPI_CR_OFFSET		0x60	/* 16-bit Control Register */
+#define XSPI_CR_OFFSET		0x60	/* Control Register */
 
 #define XSPI_CR_ENABLE		0x02
 #define XSPI_CR_MASTER_MODE	0x04
@@ -38,8 +38,9 @@
 #define XSPI_CR_RXFIFO_RESET	0x40
 #define XSPI_CR_MANUAL_SSELECT	0x80
 #define XSPI_CR_TRANS_INHIBIT	0x100
+#define XSPI_CR_LSB_FIRST	0x200
 
-#define XSPI_SR_OFFSET		0x64	/* 8-bit Status Register */
+#define XSPI_SR_OFFSET		0x64	/* Status Register */
 
 #define XSPI_SR_RX_EMPTY_MASK	0x01	/* Receive FIFO is empty */
 #define XSPI_SR_RX_FULL_MASK	0x02	/* Receive FIFO is full */
@@ -47,8 +48,8 @@
 #define XSPI_SR_TX_FULL_MASK	0x08	/* Transmit FIFO is full */
 #define XSPI_SR_MODE_FAULT_MASK	0x10	/* Mode fault error */
 
-#define XSPI_TXD_OFFSET		0x68	/* 8-bit Data Transmit Register */
-#define XSPI_RXD_OFFSET		0x6c	/* 8-bit Data Receive Register */
+#define XSPI_TXD_OFFSET		0x68	/* Data Transmit Register */
+#define XSPI_RXD_OFFSET		0x6c	/* Data Receive Register */
 
 #define XSPI_SSR_OFFSET		0x70	/* 32-bit Slave Select Register */
 
@@ -68,6 +69,7 @@
 #define XSPI_INTR_TX_UNDERRUN		0x08	/* TxFIFO was underrun */
 #define XSPI_INTR_RX_FULL		0x10	/* RxFIFO is full */
 #define XSPI_INTR_RX_OVERRUN		0x20	/* RxFIFO was overrun */
+#define XSPI_INTR_TX_HALF_EMPTY		0x40	/* TxFIFO is half empty */
 
 #define XIPIF_V123B_RESETR_OFFSET	0x40	/* IPIF reset register */
 #define XIPIF_V123B_RESET_MASK		0x0a	/* the value to write */
@@ -81,15 +83,61 @@ struct xilinx_spi {
 
 	u32		irq;
 
-	u32		speed_hz; /* SCK has a fixed frequency of speed_hz Hz */
-
 	u8 *rx_ptr;		/* pointer in the Tx buffer */
 	const u8 *tx_ptr;	/* pointer in the Rx buffer */
 	int remaining_bytes;	/* the number of bytes left to transfer */
+	u8 bits_per_word;
 	unsigned int (*read_fn) (void __iomem *);
 	void (*write_fn) (u32, void __iomem *);
+	void (*tx_fn) (struct xilinx_spi *);
+	void (*rx_fn) (struct xilinx_spi *);
 };
 
+static void xspi_tx8(struct xilinx_spi *xspi)
+{
+	xspi->write_fn(*xspi->tx_ptr, xspi->regs + XSPI_TXD_OFFSET);
+	xspi->tx_ptr++;
+}
+
+static void xspi_tx16(struct xilinx_spi *xspi)
+{
+	xspi->write_fn(*(u16 *)(xspi->tx_ptr), xspi->regs + XSPI_TXD_OFFSET);
+	xspi->tx_ptr += 2;
+}
+
+static void xspi_tx32(struct xilinx_spi *xspi)
+{
+	xspi->write_fn(*(u32 *)(xspi->tx_ptr), xspi->regs + XSPI_TXD_OFFSET);
+	xspi->tx_ptr += 4;
+}
+
+static void xspi_rx8(struct xilinx_spi *xspi)
+{
+	u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET);
+	if (xspi->rx_ptr) {
+		*xspi->rx_ptr = data & 0xff;
+		xspi->rx_ptr++;
+	}
+}
+
+static void xspi_rx16(struct xilinx_spi *xspi)
+{
+	u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET);
+	if (xspi->rx_ptr) {
+		*(u16 *)(xspi->rx_ptr) = data & 0xffff;
+		xspi->rx_ptr += 2;
+	}
+}
+
+static void xspi_rx32(struct xilinx_spi *xspi)
+{
+	u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET);
+	if (xspi->rx_ptr) {
+		*(u32 *)(xspi->rx_ptr) = data;
+		xspi->rx_ptr += 4;
+	}
+}
+
 static void xspi_init_hw(struct xilinx_spi *xspi)
 {
 	void __iomem *regs_base = xspi->regs;
@@ -107,8 +155,8 @@ static void xspi_init_hw(struct xilinx_spi *xspi)
 	/* Disable the transmitter, enable Manual Slave Select Assertion,
 	 * put SPI controller into master mode, and enable it */
 	xspi->write_fn(XSPI_CR_TRANS_INHIBIT | XSPI_CR_MANUAL_SSELECT |
-		XSPI_CR_MASTER_MODE | XSPI_CR_ENABLE,
-		regs_base + XSPI_CR_OFFSET);
+		XSPI_CR_MASTER_MODE | XSPI_CR_ENABLE | XSPI_CR_TXFIFO_RESET |
+		XSPI_CR_RXFIFO_RESET, regs_base + XSPI_CR_OFFSET);
 }
 
 static void xilinx_spi_chipselect(struct spi_device *spi, int is_on)
@@ -141,18 +189,20 @@ static void xilinx_spi_chipselect(struct spi_device *spi, int is_on)
 
 /* spi_bitbang requires custom setup_transfer() to be defined if there is a
  * custom txrx_bufs(). We have nothing to setup here as the SPI IP block
- * supports just 8 bits per word, and SPI clock can't be changed in software.
- * Check for 8 bits per word. Chip select delay calculations could be
+ * supports 8 or 16 bits per word which cannot be changed in software.
+ * SPI clock can't be changed in software either.
+ * Check for correct bits per word. Chip select delay calculations could be
  * added here as soon as bitbang_work() can be made aware of the delay value.
  */
 static int xilinx_spi_setup_transfer(struct spi_device *spi,
 		struct spi_transfer *t)
 {
+	struct xilinx_spi *xspi = spi_master_get_devdata(spi->master);
 	u8 bits_per_word;
 
 	bits_per_word = (t && t->bits_per_word)
 			 ? t->bits_per_word : spi->bits_per_word;
-	if (bits_per_word != 8) {
+	if (bits_per_word != xspi->bits_per_word) {
 		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
 			__func__, bits_per_word);
 		return -EINVAL;
@@ -163,17 +213,16 @@ static int xilinx_spi_setup_transfer(struct spi_device *spi,
 
 static int xilinx_spi_setup(struct spi_device *spi)
 {
-	struct spi_bitbang *bitbang;
-	struct xilinx_spi *xspi;
-	int retval;
-
-	xspi = spi_master_get_devdata(spi->master);
-	bitbang = &xspi->bitbang;
-
-	retval = xilinx_spi_setup_transfer(spi, NULL);
-	if (retval < 0)
-		return retval;
-
+	/* always return 0, we can not check the number of bits.
+	 * There are cases when SPI setup is called before any driver is
+	 * there, in that case the SPI core defaults to 8 bits, which we
+	 * do not support in some cases. But if we return an error, the
+	 * SPI device would not be registered and no driver can get hold of it
+	 * When the driver is there, it will call SPI setup again with the
+	 * correct number of bits per transfer.
+	 * If a driver setups with the wrong bit number, it will fail when
+	 * it tries to do a transfer
+	 */
 	return 0;
 }
 
@@ -185,11 +234,10 @@ static void xilinx_spi_fill_tx_fifo(struct xilinx_spi *xspi)
 	sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 	while ((sr & XSPI_SR_TX_FULL_MASK) == 0 && xspi->remaining_bytes > 0) {
 		if (xspi->tx_ptr)
-			xspi->write_fn(*xspi->tx_ptr++,
-				xspi->regs + XSPI_TXD_OFFSET);
+			xspi->tx_fn(xspi);
 		else
 			xspi->write_fn(0, xspi->regs + XSPI_TXD_OFFSET);
-		xspi->remaining_bytes--;
+		xspi->remaining_bytes -= xspi->bits_per_word / 8;
 		sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 	}
 }
@@ -260,12 +308,7 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
 		/* Read out all the data from the Rx FIFO */
 		sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 		while ((sr & XSPI_SR_RX_EMPTY_MASK) == 0) {
-			u8 data;
-
-			data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET);
-			if (xspi->rx_ptr) {
-				*xspi->rx_ptr++ = data;
-			}
+			xspi->rx_fn(xspi);
 			sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET);
 		}
 
@@ -337,6 +380,19 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
 		xspi->read_fn = ioread32be;
 		xspi->write_fn = iowrite32be;
 	}
+	xspi->bits_per_word = pdata->bits_per_word;
+	if (xspi->bits_per_word == 8) {
+		xspi->tx_fn = xspi_tx8;
+		xspi->rx_fn = xspi_rx8;
+	} else if (xspi->bits_per_word == 16) {
+		xspi->tx_fn = xspi_tx16;
+		xspi->rx_fn = xspi_rx16;
+	} else if (xspi->bits_per_word == 32) {
+		xspi->tx_fn = xspi_tx32;
+		xspi->rx_fn = xspi_rx32;
+	} else
+		goto unmap_io;
+
 
 	/* SPI controller initializations */
 	xspi_init_hw(xspi);
diff --git a/drivers/spi/xilinx_spi_of.c b/drivers/spi/xilinx_spi_of.c
index 151aa13494bd..71dc3adc0495 100644
--- a/drivers/spi/xilinx_spi_of.c
+++ b/drivers/spi/xilinx_spi_of.c
@@ -72,6 +72,7 @@ static int __devinit xilinx_spi_of_probe(struct of_device *ofdev,
 		return -EINVAL;
 	}
 	pdata->num_chipselect = *prop;
+	pdata->bits_per_word = 8;
 	master = xilinx_spi_init(&ofdev->dev, &r_mem, r_irq.start, -1);
 	if (!master)
 		return -ENODEV;
diff --git a/include/linux/spi/xilinx_spi.h b/include/linux/spi/xilinx_spi.h
index a705ad85eebe..6f17278810b0 100644
--- a/include/linux/spi/xilinx_spi.h
+++ b/include/linux/spi/xilinx_spi.h
@@ -3,14 +3,16 @@
 
 /**
  * struct xspi_platform_data - Platform data of the Xilinx SPI driver
- * @num_chipselect:	Number of chip select by the IP
- * @little_endian	If registers should be accessed little endian or not
+ * @num_chipselect:	Number of chip select by the IP.
+ * @little_endian:	If registers should be accessed little endian or not.
+ * @bits_per_word:	Number of bits per word.
  * @devices:		Devices to add when the driver is probed.
  * @num_devices:	Number of devices in the devices array.
  */
 struct xspi_platform_data {
 	u16 num_chipselect;
 	bool little_endian;
+	u8 bits_per_word;
 	struct spi_board_info *devices;
 	u8 num_devices;
 };
-- 
cgit v1.2.3


From 937041e21634ffecc92d05cf693423a2c95b7252 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 24 Nov 2009 17:18:31 -0700
Subject: spi/mpc52xx-spi: minor cleanups

- drop own, obsolete include-file
- drop IRQF_SAMPLE_RANDOM (deprecated feature)
- drop 'if' above kfree()
- typos, braces & whitespaces

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/mpc52xx_spi.c       | 26 +++++++++++---------------
 include/linux/spi/mpc52xx_spi.h | 10 ----------
 2 files changed, 11 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/spi/mpc52xx_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/mpc52xx_spi.c b/drivers/spi/mpc52xx_spi.c
index 97beba2895e7..45bfe6458173 100644
--- a/drivers/spi/mpc52xx_spi.c
+++ b/drivers/spi/mpc52xx_spi.c
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/mpc52xx_spi.h>
 #include <linux/of_spi.h>
 #include <linux/io.h>
 #include <linux/of_gpio.h>
@@ -54,7 +53,7 @@ MODULE_LICENSE("GPL");
 /* FSM state return values */
 #define FSM_STOP	0	/* Nothing more for the state machine to */
 				/* do.  If something interesting happens */
-				/* then and IRQ will be received */
+				/* then an IRQ will be received */
 #define FSM_POLL	1	/* need to poll for completion, an IRQ is */
 				/* not expected */
 #define FSM_CONTINUE	2	/* Keep iterating the state machine */
@@ -62,13 +61,12 @@ MODULE_LICENSE("GPL");
 /* Driver internal data */
 struct mpc52xx_spi {
 	struct spi_master *master;
-	u32 sysclk;
 	void __iomem *regs;
 	int irq0;	/* MODF irq */
 	int irq1;	/* SPIF irq */
-	int ipb_freq;
+	unsigned int ipb_freq;
 
-	/* Statistics */
+	/* Statistics; not used now, but will be reintroduced for debugfs */
 	int msg_count;
 	int wcol_count;
 	int wcol_ticks;
@@ -229,7 +227,7 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
 		ms->wcol_tx_timestamp = get_tbl();
 		data = 0;
 		if (ms->tx_buf)
-			data = *(ms->tx_buf-1);
+			data = *(ms->tx_buf - 1);
 		out_8(ms->regs + SPI_DATA, data); /* try again */
 		return FSM_CONTINUE;
 	} else if (status & SPI_STATUS_MODF) {
@@ -481,8 +479,9 @@ static int __devinit mpc52xx_spi_probe(struct of_device *op,
 			gpio_direction_output(gpio_cs, 1);
 			ms->gpio_cs[i] = gpio_cs;
 		}
-	} else
+	} else {
 		master->num_chipselect = 1;
+	}
 
 	spin_lock_init(&ms->lock);
 	INIT_LIST_HEAD(&ms->queue);
@@ -490,10 +489,10 @@ static int __devinit mpc52xx_spi_probe(struct of_device *op,
 
 	/* Decide if interrupts can be used */
 	if (ms->irq0 && ms->irq1) {
-		rc = request_irq(ms->irq0, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
+		rc = request_irq(ms->irq0, mpc52xx_spi_irq, 0,
 				  "mpc5200-spi-modf", ms);
-		rc |= request_irq(ms->irq1, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
-				  "mpc5200-spi-spiF", ms);
+		rc |= request_irq(ms->irq1, mpc52xx_spi_irq, 0,
+				  "mpc5200-spi-spif", ms);
 		if (rc) {
 			free_irq(ms->irq0, ms);
 			free_irq(ms->irq1, ms);
@@ -524,8 +523,7 @@ static int __devinit mpc52xx_spi_probe(struct of_device *op,
 	while (i-- > 0)
 		gpio_free(ms->gpio_cs[i]);
 
-	if (ms->gpio_cs != NULL)
-		kfree(ms->gpio_cs);
+	kfree(ms->gpio_cs);
  err_alloc:
  err_init:
 	iounmap(regs);
@@ -544,9 +542,7 @@ static int __devexit mpc52xx_spi_remove(struct of_device *op)
 	for (i = 0; i < ms->gpio_cs_count; i++)
 		gpio_free(ms->gpio_cs[i]);
 
-	if (ms->gpio_cs != NULL)
-		kfree(ms->gpio_cs);
-
+	kfree(ms->gpio_cs);
 	spi_unregister_master(master);
 	spi_master_put(master);
 	iounmap(ms->regs);
diff --git a/include/linux/spi/mpc52xx_spi.h b/include/linux/spi/mpc52xx_spi.h
deleted file mode 100644
index d1004cf09241..000000000000
--- a/include/linux/spi/mpc52xx_spi.h
+++ /dev/null
@@ -1,10 +0,0 @@
-
-#ifndef INCLUDE_MPC5200_SPI_H
-#define INCLUDE_MPC5200_SPI_H
-
-extern void mpc52xx_spi_set_premessage_hook(struct spi_master *master,
-					    void (*hook)(struct spi_message *m,
-							 void *context),
-					    void *hook_context);
-
-#endif
-- 
cgit v1.2.3


From 64f16603eae17e869d5fc8a60ae987394190e639 Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Sat, 5 Dec 2009 08:54:20 +0000
Subject: gigaset: documentation amendments

Various additions and improvements to the Gigaset driver's README
file, and added comments to its userspace visible include file.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/isdn/README.gigaset | 116 ++++++++++++++++++++++++++++++--------
 include/linux/gigaset_dev.h       |  22 +++++---
 2 files changed, 106 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 0fc9831d7ecb..794941fc9493 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -68,22 +68,38 @@ GigaSet 307x Device Driver
      for troubleshooting or to pass module parameters.
 
      The module ser_gigaset provides a serial line discipline N_GIGASET_M101
-     which drives the device through the regular serial line driver. It must
-     be attached to the serial line to which the M101 is connected with the
-     ldattach(8) command (requires util-linux-ng release 2.14 or later), for
-     example:
-	 ldattach GIGASET_M101 /dev/ttyS1
+     which uses the regular serial port driver to access the device, and must
+     therefore be attached to the serial device to which the M101 is connected.
+     The ldattach(8) command (included in util-linux-ng release 2.14 or later)
+     can be used for that purpose, for example:
+	ldattach GIGASET_M101 /dev/ttyS1
      This will open the device file, attach the line discipline to it, and
      then sleep in the background, keeping the device open so that the line
      discipline remains active. To deactivate it, kill the daemon, for example
      with
-	 killall ldattach
+	killall ldattach
      before disconnecting the device. To have this happen automatically at
      system startup/shutdown on an LSB compatible system, create and activate
      an appropriate LSB startup script /etc/init.d/gigaset. (The init name
      'gigaset' is officially assigned to this project by LANANA.)
      Alternatively, just add the 'ldattach' command line to /etc/rc.local.
 
+     The modules accept the following parameters:
+
+	Module	 	Parameter  Meaning
+
+	gigaset	 	debug	   debug level (see section 3.2.)
+
+			startmode  initial operation mode (see section 2.5.):
+	bas_gigaset )		   1=ISDN4linux/CAPI (default), 0=Unimodem
+	ser_gigaset )
+	usb_gigaset )	cidmode    initial Call-ID mode setting (see section
+				   2.5.): 1=on (default), 0=off
+
+     Depending on your distribution you may want to create a separate module
+     configuration file /etc/modprobe.d/gigaset for these, or add them to a
+     custom file like /etc/modprobe.conf.local.
+
 2.2. Device nodes for user space programs
      ------------------------------------
      The device can be accessed from user space (eg. by the user space tools
@@ -93,11 +109,48 @@ GigaSet 307x Device Driver
      - /dev/ttyGU0 for M105 (USB data boxes)
      - /dev/ttyGB0 for the base driver (direct USB connection)
 
-     You can also select a "default device" which is used by the frontends when
+     If you connect more than one device of a type, they will get consecutive
+     device nodes, eg. /dev/ttyGU1 for a second M105.
+
+     You can also set a "default device" for the user space tools to use when
      no device node is given as parameter, by creating a symlink /dev/ttyG to
      one of them, eg.:
 
-        ln -s /dev/ttyGB0 /dev/ttyG
+	ln -s /dev/ttyGB0 /dev/ttyG
+
+     The devices accept the following device specific ioctl calls
+     (defined in gigaset_dev.h):
+
+     ioctl(int fd, GIGASET_REDIR, int *cmd);
+     If cmd==1, the device is set to be controlled exclusively through the
+     character device node; access from the ISDN subsystem is blocked.
+     If cmd==0, the device is set to be used from the ISDN subsystem and does
+     not communicate through the character device node.
+
+     ioctl(int fd, GIGASET_CONFIG, int *cmd);
+     (ser_gigaset and usb_gigaset only)
+     If cmd==1, the device is set to adapter configuration mode where commands
+     are interpreted by the M10x DECT adapter itself instead of being
+     forwarded to the base station. In this mode, the device accepts the
+     commands described in Siemens document "AT-Kommando Alignment M10x Data"
+     for setting the operation mode, associating with a base station and
+     querying parameters like field strengh and signal quality.
+     Note that there is no ioctl command for leaving adapter configuration
+     mode and returning to regular operation. In order to leave adapter
+     configuration mode, write the command ATO to the device.
+
+     ioctl(int fd, GIGASET_BRKCHARS, unsigned char brkchars[6]);
+     (usb_gigaset only)
+     Set the break characters on an M105's internal serial adapter to the six
+     bytes stored in brkchars[]. Unused bytes should be set to zero.
+
+     ioctl(int fd, GIGASET_VERSION, unsigned version[4]);
+     Retrieve version information from the driver. version[0] must be set to
+     one of:
+     - GIGVER_DRIVER: retrieve driver version
+     - GIGVER_COMPAT: retrieve interface compatibility version
+     - GIGVER_FWBASE: retrieve the firmware version of the base
+     Upon return, version[] is filled with the requested version information.
 
 2.3. ISDN4linux
      ----------
@@ -113,15 +166,24 @@ GigaSet 307x Device Driver
          Connection State: 0, Response: -1
          gigaset_process_response: resp_code -1 in ConState 0 !
          Timeout occurred
-     you might need to use unimodem mode. (see section 2.5.)
+     you probably need to use unimodem mode. (see section 2.5.)
 
 2.4. CAPI
      ----
      If the driver is compiled with CAPI support (kernel configuration option
      GIGASET_CAPI, experimental) it can also be used with CAPI 2.0 kernel and
-     user space applications.  ISDN4Linux is supported in this configuration
+     user space applications. For user space access, the module capi.ko must
+     be loaded. The capiinit command (included in the capi4k-utils package)
+     does this for you.
+
+     The CAPI variant of the driver supports legacy ISDN4Linux applications
      via the capidrv compatibility driver. The kernel module capidrv.ko must
-     be loaded explicitly ("modprobe capidrv") if needed.
+     be loaded explicitly with the command
+        modprobe capidrv
+     if needed, and cannot be unloaded again without unloading the driver
+     first. (These are limitations of capidrv.)
+
+     The note about unimodem mode in the preceding section applies here, too.
 
 2.5. Unimodem mode
      -------------
@@ -134,9 +196,14 @@ GigaSet 307x Device Driver
      You can switch back using
          gigacontr --mode isdn
 
-     You can also load the driver using e.g.
-         modprobe usb_gigaset startmode=0
-     to prevent the driver from starting in "isdn4linux mode".
+     You can also put the driver directly into Unimodem mode when it's loaded,
+     by passing the module parameter startmode=0 to the hardware specific
+     module, e.g.
+	modprobe usb_gigaset startmode=0
+     or by adding a line like
+	options usb_gigaset startmode=0
+     to an appropriate module configuration file, like /etc/modprobe.d/gigaset
+     or /etc/modprobe.conf.local.
 
      In this mode the device works like a modem connected to a serial port
      (the /dev/ttyGU0, ... mentioned above) which understands the commands
@@ -164,9 +231,8 @@ GigaSet 307x Device Driver
 
         options ppp_async flag_time=0
 
-     to /etc/modprobe.conf. If your distribution has some local module
-     configuration file like /etc/modprobe.conf.local,
-     using that should be preferred.
+     to an appropriate module configuration file, like /etc/modprobe.d/gigaset
+     or /etc/modprobe.conf.local.
 
 2.6. Call-ID (CID) mode
      ------------------
@@ -189,12 +255,13 @@ GigaSet 307x Device Driver
        settings (CID mode).
      - If you have several DECT data devices (M10x) which you want to use
        in turn, select Unimodem mode by passing the parameter "cidmode=0" to
-       the driver ("modprobe usb_gigaset cidmode=0" or modprobe.conf).
+       the appropriate driver module (ser_gigaset or usb_gigaset).
 
      If you want both of these at once, you are out of luck.
 
-     You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode
-     setting (ttyGxy is ttyGU0 or ttyGB0).
+     You can also use the tty class parameter "cidmode" of the device to
+     change its CID mode while the driver is loaded, eg.
+        echo 0 > /sys/class/tty/ttyGU0/cidmode
 
 2.7. Unregistered Wireless Devices (M101/M105)
      -----------------------------------------
@@ -208,7 +275,7 @@ GigaSet 307x Device Driver
      driver. In that situation, a restricted set of functions is available
      which includes, in particular, those necessary for registering the device
      to a base or for switching it between Fixed Part and Portable Part
-     modes.
+     modes. See the gigacontr(8) manpage for details.
 
 3.   Troubleshooting
      ---------------
@@ -222,9 +289,7 @@ GigaSet 307x Device Driver
 
            options isdn dialtimeout=15
 
-        to /etc/modprobe.conf. If your distribution has some local module
-        configuration file like /etc/modprobe.conf.local,
-        using that should be preferred.
+        to /etc/modprobe.d/gigaset, /etc/modprobe.conf.local or a similar file.
 
      Problem:
         Your isdn script aborts with a message about isdnlog.
@@ -264,7 +329,8 @@ GigaSet 307x Device Driver
      The initial value can be set using the debug parameter when loading the
      module "gigaset", e.g. by adding a line
         options gigaset debug=0
-     to /etc/modprobe.conf, ...
+     to your module configuration file, eg. /etc/modprobe.d/gigaset or
+     /etc/modprobe.conf.local.
 
      Generated debugging information can be found
      - as output of the command
diff --git a/include/linux/gigaset_dev.h b/include/linux/gigaset_dev.h
index 5dc4a316ca37..258ba82937e7 100644
--- a/include/linux/gigaset_dev.h
+++ b/include/linux/gigaset_dev.h
@@ -16,15 +16,23 @@
 
 #include <linux/ioctl.h>
 
+/* The magic IOCTL value for this interface. */
 #define GIGASET_IOCTL 0x47
 
-#define GIGVER_DRIVER 0
-#define GIGVER_COMPAT 1
-#define GIGVER_FWBASE 2
+/* enable/disable device control via character device (lock out ISDN subsys) */
+#define GIGASET_REDIR    _IOWR(GIGASET_IOCTL, 0, int)
 
-#define GIGASET_REDIR    _IOWR (GIGASET_IOCTL, 0, int)
-#define GIGASET_CONFIG   _IOWR (GIGASET_IOCTL, 1, int)
-#define GIGASET_BRKCHARS _IOW  (GIGASET_IOCTL, 2, unsigned char[6]) //FIXME [6] okay?
-#define GIGASET_VERSION  _IOWR (GIGASET_IOCTL, 3, unsigned[4])
+/* enable adapter configuration mode (M10x only) */
+#define GIGASET_CONFIG   _IOWR(GIGASET_IOCTL, 1, int)
+
+/* set break characters (M105 only) */
+#define GIGASET_BRKCHARS _IOW(GIGASET_IOCTL, 2, unsigned char[6])
+
+/* get version information selected by arg[0] */
+#define GIGASET_VERSION  _IOWR(GIGASET_IOCTL, 3, unsigned[4])
+/* values for GIGASET_VERSION arg[0] */
+#define GIGVER_DRIVER 0		/* get driver version */
+#define GIGVER_COMPAT 1		/* get interface compatibility version */
+#define GIGVER_FWBASE 2		/* get base station firmware version */
 
 #endif
-- 
cgit v1.2.3


From b38310e99ed09163062902285edd6d7b3fc136d6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 6 Dec 2009 10:35:30 +0000
Subject: include/linux/if_ether.h: Remove unused defines MAC_BUF_SIZE and
 DECLARE_MAC_BUF

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 005e1525ab86..299b4121f914 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -137,8 +137,6 @@ extern struct ctl_table ether_table[];
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
 #define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
-#define MAC_BUF_SIZE	18
-#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE]
 
 #endif
 
-- 
cgit v1.2.3


From 12633e803a2a556f6469e0933d08233d0844a2d9 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Wed, 25 Nov 2009 17:23:25 +0000
Subject: sysfs/cpu: Add probe/release files

Version 3 of this patch is updated with documentation added to
Documentation/ABI.  There are no changes to any of the C code from v2
of the patch.

In order to support kernel DLPAR of CPU resources we need to provide an
interface to add (probe) and remove (release) the resource from the system.
This patch Creates new generic probe and release sysfs files to facilitate
cpu probe/release.  The probe/release interface provides for allowing each
arch to supply their own routines for implementing the backend of adding
and removing cpus to/from the system.

This also creates the powerpc specific stubs to handle the arch callouts
from writes to the sysfs files.

The creation and use of these files is regulated by the
CONFIG_ARCH_CPU_PROBE_RELEASE option so that only architectures that need the
capability will have the files created.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu | 15 ++++++++++
 arch/powerpc/Kconfig                               |  4 +++
 arch/powerpc/include/asm/machdep.h                 |  5 ++++
 arch/powerpc/kernel/sysfs.c                        | 19 +++++++++++++
 drivers/base/cpu.c                                 | 32 ++++++++++++++++++++++
 include/linux/cpu.h                                |  2 ++
 6 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index a703b9e9aeb9..d868a11c94a5 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -62,6 +62,21 @@ Description:	CPU topology files that describe kernel limits related to
 		See Documentation/cputopology.txt for more information.
 
 
+What:		/sys/devices/system/cpu/probe
+		/sys/devices/system/cpu/release
+Date:		November 2009
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Dynamic addition and removal of CPU's.  This is not hotplug
+		removal, this is meant complete removal/addition of the CPU
+		from the system.
+
+		probe: writes to this file will dynamically add a CPU to the
+		system.  Information written to the file to add CPU's is
+		architecture specific.
+
+		release: writes to this file dynamically remove a CPU from
+		the system.  Information writtento the file to remove CPU's
+		is architecture specific.
 
 What:		/sys/devices/system/cpu/cpu#/node
 Date:		October 2009
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5dbd375a3f2a..0df57466e783 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -320,6 +320,10 @@ config HOTPLUG_CPU
 
 	  Say N if you are unsure.
 
+config ARCH_CPU_PROBE_RELEASE
+	def_bool y
+	depends on HOTPLUG_CPU
+
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 9efa2be78331..9f0fc9e6ce0d 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -266,6 +266,11 @@ struct machdep_calls {
 	void (*suspend_disable_irqs)(void);
 	void (*suspend_enable_irqs)(void);
 #endif
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ssize_t (*cpu_probe)(const char *, size_t);
+	ssize_t (*cpu_release)(const char *, size_t);
+#endif
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 956ab33fd73f..e235e52dc4fe 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -461,6 +461,25 @@ static void unregister_cpu_online(unsigned int cpu)
 
 	cacheinfo_cpu_offline(cpu);
 }
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_probe)
+		return ppc_md.cpu_probe(buf, count);
+
+	return -EINVAL;
+}
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_release)
+		return ppc_md.cpu_release(buf, count);
+
+	return -EINVAL;
+}
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index e62a4ccea54d..7c03af7b84a9 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -72,6 +72,38 @@ void unregister_cpu(struct cpu *cpu)
 	per_cpu(cpu_sys_devices, logical_cpu) = NULL;
 	return;
 }
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+			       size_t count)
+{
+	return arch_cpu_probe(buf, count);
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+				 size_t count)
+{
+	return arch_cpu_release(buf, count);
+}
+
+static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+
+int __init cpu_probe_release_init(void)
+{
+	int rc;
+
+	rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			       &class_attr_probe.attr);
+	if (!rc)
+		rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+				       &class_attr_release.attr);
+
+	return rc;
+}
+device_initcall(cpu_probe_release_init);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 static inline void register_cpu_control(struct cpu *cpu)
 {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 47536197ffdd..c972f7ccb7d3 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -43,6 +43,8 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
 
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
+extern ssize_t arch_cpu_probe(const char *, size_t);
+extern ssize_t arch_cpu_release(const char *, size_t);
 #endif
 struct notifier_block;
 
-- 
cgit v1.2.3


From 51badebdcf394cc5fd574a524b55b3f6085e5e9c Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Thu, 26 Nov 2009 09:59:05 +0000
Subject: powerpc/pseries: Serialize cpu hotplug operations during deactivate
 Vs deallocate

Currently the cpu-allocation/deallocation process comprises of two steps:
- Set the indicators and to update the device tree with DLPAR node
  information.

- Online/offline the allocated/deallocated CPU.

This is achieved by writing to the sysfs tunables "probe" during allocation
and "release" during deallocation.

At the sametime, the userspace can independently online/offline the CPUs of
the system using the sysfs tunable "online".

It is quite possible that when a userspace tool offlines a CPU
for the purpose of deallocation and is in the process of updating the device
tree, some other userspace tool could bring the CPU back online by writing to
the "online" sysfs tunable thereby causing the deallocate process to fail.

The solution to this is to serialize writes to the "probe/release" sysfs
tunable with the writes to the "online" sysfs tunable.

This patch employs a mutex to provide this serialization, which is a no-op on
all architectures except PPC_PSERIES

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/dlpar.c | 45 ++++++++++++++++++++++++++--------
 drivers/base/cpu.c                     |  2 ++
 include/linux/cpu.h                    | 13 ++++++++++
 3 files changed, 50 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 642e1b2e5c42..fd2f0afeb4de 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -436,6 +436,18 @@ int dlpar_release_drc(u32 drc_index)
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 
+static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+	mutex_lock(&pseries_cpu_hotplug_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+	mutex_unlock(&pseries_cpu_hotplug_mutex);
+}
+
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 {
 	struct device_node *dn;
@@ -443,13 +455,18 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	char *cpu_name;
 	int rc;
 
+	cpu_hotplug_driver_lock();
 	rc = strict_strtoul(buf, 0, &drc_index);
-	if (rc)
-		return -EINVAL;
+	if (rc) {
+		rc = -EINVAL;
+		goto out;
+	}
 
 	dn = dlpar_configure_connector(drc_index);
-	if (!dn)
-		return -EINVAL;
+	if (!dn) {
+		rc = -EINVAL;
+		goto out;
+	}
 
 	/* configure-connector reports cpus as living in the base
 	 * directory of the device tree.  CPUs actually live in the
@@ -459,7 +476,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 			   GFP_KERNEL);
 	if (!cpu_name) {
 		dlpar_free_cc_nodes(dn);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out;
 	}
 
 	sprintf(cpu_name, "/cpus%s", dn->full_name);
@@ -469,7 +487,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc) {
 		dlpar_free_cc_nodes(dn);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto out;
 	}
 
 	rc = dlpar_attach_node(dn);
@@ -479,6 +498,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	}
 
 	rc = online_node_cpus(dn);
+out:
+	cpu_hotplug_driver_unlock();
 
 	return rc ? rc : count;
 }
@@ -499,26 +520,30 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
+	cpu_hotplug_driver_lock();
 	rc = offline_node_cpus(dn);
 	if (rc) {
 		of_node_put(dn);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto out;
 	}
 
 	rc = dlpar_release_drc(*drc_index);
 	if (rc) {
 		of_node_put(dn);
-		return -EINVAL;
+		goto out;
 	}
 
 	rc = dlpar_detach_node(dn);
 	if (rc) {
 		dlpar_acquire_drc(*drc_index);
-		return rc;
+		goto out;
 	}
 
 	of_node_put(dn);
-	return count;
+out:
+	cpu_hotplug_driver_unlock();
+	return rc ? rc : count;
 }
 
 static int __init pseries_dlpar_init(void)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 7c03af7b84a9..27fd775375b0 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -35,6 +35,7 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
 	ssize_t ret;
 
+	cpu_hotplug_driver_lock();
 	switch (buf[0]) {
 	case '0':
 		ret = cpu_down(cpu->sysdev.id);
@@ -49,6 +50,7 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut
 	default:
 		ret = -EINVAL;
 	}
+	cpu_hotplug_driver_unlock();
 
 	if (ret >= 0)
 		ret = count;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c972f7ccb7d3..e287863ac053 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -117,6 +117,19 @@ extern void put_online_cpus(void);
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
 int cpu_down(unsigned int cpu);
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+extern void cpu_hotplug_driver_lock(void);
+extern void cpu_hotplug_driver_unlock(void);
+#else
+static inline void cpu_hotplug_driver_lock(void)
+{
+}
+
+static inline void cpu_hotplug_driver_unlock(void)
+{
+}
+#endif
+
 #else		/* CONFIG_HOTPLUG_CPU */
 
 #define get_online_cpus()	do { } while (0)
-- 
cgit v1.2.3


From 44234adcdce38f83c56e05f808ce656175b4beeb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 9 Dec 2009 09:25:48 +0100
Subject: hw-breakpoints: Modify breakpoints without unregistering them

Currently, when ptrace needs to modify a breakpoint, like disabling
it, changing its address, type or len, it calls
modify_user_hw_breakpoint(). This latter will perform the heavy and
racy task of unregistering the old breakpoint and registering a new
one.

This is racy as someone else might steal the reserved breakpoint
slot under us, which is undesired as the breakpoint is only
supposed to be modified, sometimes in the middle of a debugging
workflow. We don't want our slot to be stolen in the middle.

So instead of unregistering/registering the breakpoint, just
disable it while we modify its breakpoint fields and re-enable it
after if necessary.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1260347148-5519-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c      | 57 ++++++++++++++++++++-----------------------
 include/linux/hw_breakpoint.h |  4 +--
 include/linux/perf_event.h    |  5 +++-
 kernel/hw_breakpoint.c        | 42 +++++++++++++++++++++++--------
 kernel/perf_event.c           |  4 +--
 5 files changed, 66 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b361d28061d0..7079ddaf0731 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -595,7 +595,7 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 	return dr7;
 }
 
-static struct perf_event *
+static int
 ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
 			 struct task_struct *tsk, int disabled)
 {
@@ -609,11 +609,11 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
 	 * written the address register first
 	 */
 	if (!bp)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	attr = bp->attr;
 	attr.bp_len = gen_len;
@@ -658,28 +658,17 @@ restore:
 				if (!second_pass)
 					continue;
 
-				thread->ptrace_bps[i] = NULL;
-				bp = ptrace_modify_breakpoint(bp, len, type,
+				rc = ptrace_modify_breakpoint(bp, len, type,
 							      tsk, 1);
-				if (IS_ERR(bp)) {
-					rc = PTR_ERR(bp);
-					thread->ptrace_bps[i] = NULL;
+				if (rc)
 					break;
-				}
-				thread->ptrace_bps[i] = bp;
 			}
 			continue;
 		}
 
-		bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
-
-		/* Incorrect bp, or we have a bug in bp API */
-		if (IS_ERR(bp)) {
-			rc = PTR_ERR(bp);
-			thread->ptrace_bps[i] = NULL;
+		rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+		if (rc)
 			break;
-		}
-		thread->ptrace_bps[i] = bp;
 	}
 	/*
 	 * Make a second pass to free the remaining unused breakpoints
@@ -737,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 		attr.disabled = 1;
 
 		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+
+		/*
+		 * CHECKME: the previous code returned -EIO if the addr wasn't
+		 * a valid task virtual addr. The new one will return -EINVAL in
+		 *  this case.
+		 * -EINVAL may be what we want for in-kernel breakpoints users,
+		 * but -EIO looks better for ptrace, since we refuse a register
+		 * writing for the user. And anyway this is the previous
+		 * behaviour.
+		 */
+		if (IS_ERR(bp))
+			return PTR_ERR(bp);
+
+		t->ptrace_bps[nr] = bp;
 	} else {
+		int err;
+
 		bp = t->ptrace_bps[nr];
-		t->ptrace_bps[nr] = NULL;
 
 		attr = bp->attr;
 		attr.bp_addr = addr;
-		bp = modify_user_hw_breakpoint(bp, &attr);
+		err = modify_user_hw_breakpoint(bp, &attr);
+		if (err)
+			return err;
 	}
-	/*
-	 * CHECKME: the previous code returned -EIO if the addr wasn't a
-	 * valid task virtual addr. The new one will return -EINVAL in this
-	 * case.
-	 * -EINVAL may be what we want for in-kernel breakpoints users, but
-	 * -EIO looks better for ptrace, since we refuse a register writing
-	 * for the user. And anyway this is the previous behaviour.
-	 */
-	if (IS_ERR(bp))
-		return PTR_ERR(bp);
 
-	t->ptrace_bps[nr] = bp;
 
 	return 0;
 }
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 42da1ce19ec0..69f07a9f1277 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -55,7 +55,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
-extern struct perf_event *
+extern int
 modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
 
 /*
@@ -91,7 +91,7 @@ static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk)	{ return NULL; }
-static inline struct perf_event *
+static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
 			  struct perf_event_attr *attr)	{ return NULL; }
 static inline struct perf_event *
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bf3329413e18..64a53f74c9a9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -872,6 +872,8 @@ extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
 extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
+extern void perf_event_enable(struct perf_event *event);
+extern void perf_event_disable(struct perf_event *event);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -902,7 +904,8 @@ static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
-
+static inline void perf_event_enable(struct perf_event *event)		{ }
+static inline void perf_event_disable(struct perf_event *event)		{ }
 #endif
 
 #define perf_output_put(handle, x) \
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 03a0773ac2b2..366eedf949c0 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -320,18 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
  * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
  */
-struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
+int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
-	/*
-	 * FIXME: do it without unregistering
-	 * - We don't want to lose our slot
-	 * - If the new bp is incorrect, don't lose the older one
-	 */
-	unregister_hw_breakpoint(bp);
+	u64 old_addr = bp->attr.bp_addr;
+	int old_type = bp->attr.bp_type;
+	int old_len = bp->attr.bp_len;
+	int err = 0;
+
+	perf_event_disable(bp);
+
+	bp->attr.bp_addr = attr->bp_addr;
+	bp->attr.bp_type = attr->bp_type;
+	bp->attr.bp_len = attr->bp_len;
+
+	if (attr->disabled)
+		goto end;
 
-	return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid,
-						bp->overflow_handler);
+	err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+	if (!err)
+		perf_event_enable(bp);
+
+	if (err) {
+		bp->attr.bp_addr = old_addr;
+		bp->attr.bp_type = old_type;
+		bp->attr.bp_len = old_len;
+		if (!bp->attr.disabled)
+			perf_event_enable(bp);
+
+		return err;
+	}
+
+end:
+	bp->attr.disabled = attr->disabled;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fd43ff4ac860..3b0cf86eee84 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
  */
-static void perf_event_disable(struct perf_event *event)
+void perf_event_disable(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *task = ctx->task;
@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info)
  * perf_event_for_each_child or perf_event_for_each as described
  * for perf_event_disable.
  */
-static void perf_event_enable(struct perf_event *event)
+void perf_event_enable(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *task = ctx->task;
-- 
cgit v1.2.3


From dba091b9e3522b9d32fc9975e48d3b69633b45f0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Dec 2009 09:32:03 +0100
Subject: sched: Protect sched_rr_get_param() access to task->sched_class

sched_rr_get_param calls
task->sched_class->get_rr_interval(task) without protection
against a concurrent sched_setscheduler() call which modifies
task->sched_class.

Serialize the access with task_rq_lock(task) and hand the rq
pointer into get_rr_interval() as it's needed at least in the
sched_fair implementation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <alpine.LFD.2.00.0912090930120.3089@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   | 3 ++-
 kernel/sched.c          | 6 +++++-
 kernel/sched_fair.c     | 6 +-----
 kernel/sched_idletask.c | 2 +-
 kernel/sched_rt.c       | 2 +-
 5 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89115ec7d43f..9b2402725088 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1111,7 +1111,8 @@ struct sched_class {
 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 			     int oldprio, int running);
 
-	unsigned int (*get_rr_interval) (struct task_struct *task);
+	unsigned int (*get_rr_interval) (struct rq *rq,
+					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	void (*moved_group) (struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c
index c4635f74540c..68db5a2e6545 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6887,6 +6887,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 {
 	struct task_struct *p;
 	unsigned int time_slice;
+	unsigned long flags;
+	struct rq *rq;
 	int retval;
 	struct timespec t;
 
@@ -6903,7 +6905,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	if (retval)
 		goto out_unlock;
 
-	time_slice = p->sched_class->get_rr_interval(p);
+	rq = task_rq_lock(p, &flags);
+	time_slice = p->sched_class->get_rr_interval(rq, p);
+	task_rq_unlock(rq, &flags);
 
 	read_unlock(&tasklist_lock);
 	jiffies_to_timespec(time_slice, &t);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f61837ad336d..613c1c749677 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2014,21 +2014,17 @@ static void moved_group_fair(struct task_struct *p)
 }
 #endif
 
-unsigned int get_rr_interval_fair(struct task_struct *task)
+unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
 {
 	struct sched_entity *se = &task->se;
-	unsigned long flags;
-	struct rq *rq;
 	unsigned int rr_interval = 0;
 
 	/*
 	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
 	 * idle runqueue:
 	 */
-	rq = task_rq_lock(task, &flags);
 	if (rq->cfs.load.weight)
 		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
-	task_rq_unlock(rq, &flags);
 
 	return rr_interval;
 }
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index b133a28fcde3..33d5384a73a8 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
 		check_preempt_curr(rq, p, 0);
 }
 
-unsigned int get_rr_interval_idle(struct task_struct *task)
+unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
 {
 	return 0;
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5c5fef378415..aecbd9c6b20c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
 	dequeue_pushable_task(rq, p);
 }
 
-unsigned int get_rr_interval_rt(struct task_struct *task)
+unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 {
 	/*
 	 * Time slice is 0 for SCHED_FIFO tasks
-- 
cgit v1.2.3


From 6b314d0e11924c803bf8cd944e87fd58cdb5088c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 2 Dec 2009 18:58:05 +0100
Subject: sched: Remove sysctl.sched_features

Since we've had a much saner debugfs interface to this, remove the
sysctl one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
[ v2: build fix ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 kernel/sysctl.c       | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9b2402725088..ca72ed42ac34 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1905,7 +1905,6 @@ extern unsigned int sysctl_sched_shares_ratelimit;
 extern unsigned int sysctl_sched_shares_thresh;
 extern unsigned int sysctl_sched_child_runs_first;
 #ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dbf93a52ee9..e5cc53514caa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -314,14 +314,6 @@ static struct ctl_table kern_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_features",
-		.data		= &sysctl_sched_features,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_migration_cost",
-- 
cgit v1.2.3


From cd29fe6f2637cc2ccbda5ac65f5332d6bf5fa3c6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 27 Nov 2009 17:32:46 +0100
Subject: sched: Sanitize fork() handling

Currently we try to do task placement in wake_up_new_task() after we do
the load-balance pass in sched_fork(). This yields complicated semantics
in that we have to deal with tasks on different RQs and the
set_task_cpu() calls in copy_process() and sched_fork()

Rename ->task_new() to ->task_fork() and call it from sched_fork()
before the balancing, this gives the policy a clear point to place the
task.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        | 47 ++++++++++++++++++-----------------------------
 kernel/sched_fair.c   | 28 +++++++++++++++-------------
 3 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca72ed42ac34..31d9dec78675 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1102,7 +1102,7 @@ struct sched_class {
 
 	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
-	void (*task_new) (struct rq *rq, struct task_struct *p);
+	void (*task_fork) (struct task_struct *p);
 
 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
 			       int running);
diff --git a/kernel/sched.c b/kernel/sched.c
index c92670f8e097..33c903573132 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1811,6 +1811,20 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 
 static void calc_load_account_active(struct rq *this_rq);
 
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+	set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+	/*
+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+	 * successfuly executed on another CPU. We must ensure that updates of
+	 * per-task data have been completed by this moment.
+	 */
+	smp_wmb();
+	task_thread_info(p)->cpu = cpu;
+#endif
+}
+
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -1967,20 +1981,6 @@ inline int task_curr(const struct task_struct *p)
 	return cpu_curr(task_cpu(p)) == p;
 }
 
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
-	set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
-	/*
-	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-	 * successfuly executed on another CPU. We must ensure that updates of
-	 * per-task data have been completed by this moment.
-	 */
-	smp_wmb();
-	task_thread_info(p)->cpu = cpu;
-#endif
-}
-
 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 				       const struct sched_class *prev_class,
 				       int oldprio, int running)
@@ -2552,7 +2552,6 @@ static void __sched_fork(struct task_struct *p)
 void sched_fork(struct task_struct *p, int clone_flags)
 {
 	int cpu = get_cpu();
-	unsigned long flags;
 
 	__sched_fork(p);
 
@@ -2586,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (!rt_prio(p->prio))
 		p->sched_class = &fair_sched_class;
 
+	if (p->sched_class->task_fork)
+		p->sched_class->task_fork(p);
+
 #ifdef CONFIG_SMP
 	cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
 #endif
-	local_irq_save(flags);
-	update_rq_clock(cpu_rq(cpu));
 	set_task_cpu(p, cpu);
-	local_irq_restore(flags);
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
@@ -2625,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
 	update_rq_clock(rq);
-
-	if (!p->sched_class->task_new || !current->se.on_rq) {
-		activate_task(rq, p, 0);
-	} else {
-		/*
-		 * Let the scheduling class do new task startup
-		 * management (if any):
-		 */
-		p->sched_class->task_new(rq, p);
-		inc_nr_running(rq);
-	}
+	activate_task(rq, p, 0);
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 613c1c749677..44ec80ccfa85 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1922,28 +1922,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 }
 
 /*
- * Share the fairness runtime between parent and child, thus the
- * total amount of pressure for CPU stays equal - new tasks
- * get a chance to run but frequent forkers are not allowed to
- * monopolize the CPU. Note: the parent runqueue is locked,
- * the child is not running yet.
+ * called on fork with the child task as argument from the parent's context
+ *  - child not yet on the tasklist
+ *  - preemption disabled
  */
-static void task_new_fair(struct rq *rq, struct task_struct *p)
+static void task_fork_fair(struct task_struct *p)
 {
-	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+	struct cfs_rq *cfs_rq = task_cfs_rq(current);
 	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
 	int this_cpu = smp_processor_id();
+	struct rq *rq = this_rq();
+	unsigned long flags;
+
+	spin_lock_irqsave(&rq->lock, flags);
 
-	sched_info_queued(p);
+	if (unlikely(task_cpu(p) != this_cpu))
+		__set_task_cpu(p, this_cpu);
 
 	update_curr(cfs_rq);
+
 	if (curr)
 		se->vruntime = curr->vruntime;
 	place_entity(cfs_rq, se, 1);
 
-	/* 'curr' will be NULL if the child belongs to a different group */
-	if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
-			curr && entity_before(curr, se)) {
+	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
 		/*
 		 * Upon rescheduling, sched_class::put_prev_task() will place
 		 * 'current' within the tree based on its new key value.
@@ -1952,7 +1954,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 		resched_task(rq->curr);
 	}
 
-	enqueue_task_fair(rq, p, 0);
+	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
 /*
@@ -2052,7 +2054,7 @@ static const struct sched_class fair_sched_class = {
 
 	.set_curr_task          = set_curr_task_fair,
 	.task_tick		= task_tick_fair,
-	.task_new		= task_new_fair,
+	.task_fork		= task_fork_fair,
 
 	.prio_changed		= prio_changed_fair,
 	.switched_to		= switched_to_fair,
-- 
cgit v1.2.3


From 6cecd084d0fd27bb1e498e2829fd45846d806856 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Nov 2009 13:00:37 +0100
Subject: sched: Discard some old bits

WAKEUP_RUNNING was an experiment, not sure why that ever ended up being
merged...

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  2 --
 kernel/sched.c          | 17 +++++++----------
 kernel/sched_debug.c    |  1 -
 kernel/sched_fair.c     |  3 ---
 kernel/sched_features.h |  5 -----
 5 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 31d9dec78675..4b1ebd3280c6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1152,8 +1152,6 @@ struct sched_entity {
 	u64			start_runtime;
 	u64			avg_wakeup;
 
-	u64			avg_running;
-
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
diff --git a/kernel/sched.c b/kernel/sched.c
index 33c903573132..0170735bdafc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2493,7 +2493,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg_overlap		= 0;
 	p->se.start_runtime		= 0;
 	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
-	p->se.avg_running		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start			= 0;
@@ -5379,13 +5378,14 @@ static inline void schedule_debug(struct task_struct *prev)
 #endif
 }
 
-static void put_prev_task(struct rq *rq, struct task_struct *p)
+static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
+	if (prev->state == TASK_RUNNING) {
+		u64 runtime = prev->se.sum_exec_runtime;
 
-	update_avg(&p->se.avg_running, runtime);
+		runtime -= prev->se.prev_sum_exec_runtime;
+		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
 
-	if (p->state == TASK_RUNNING) {
 		/*
 		 * In order to avoid avg_overlap growing stale when we are
 		 * indeed overlapping and hence not getting put to sleep, grow
@@ -5395,12 +5395,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
 		 * correlates to the amount of cache footprint a task can
 		 * build up.
 		 */
-		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
-		update_avg(&p->se.avg_overlap, runtime);
-	} else {
-		update_avg(&p->se.avg_running, 0);
+		update_avg(&prev->se.avg_overlap, runtime);
 	}
-	p->sched_class->put_prev_task(rq, p);
+	prev->sched_class->put_prev_task(rq, prev);
 }
 
 /*
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6988cf08f705..5fda66615fee 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -399,7 +399,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.sum_exec_runtime);
 	PN(se.avg_overlap);
 	PN(se.avg_wakeup);
-	PN(se.avg_running);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 76b5792c4198..e9f5daee12c7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1689,9 +1689,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 			pse->avg_overlap < sysctl_sched_migration_cost)
 		goto preempt;
 
-	if (sched_feat(WAKEUP_RUNNING) && pse->avg_running < se->avg_running)
-		goto preempt;
-
 	if (!sched_feat(WAKEUP_PREEMPT))
 		return;
 
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d94083582c7..d5059fd761d9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -53,11 +53,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
  */
 SCHED_FEAT(WAKEUP_OVERLAP, 0)
 
-/*
- * Wakeup preemption towards tasks that run short
- */
-SCHED_FEAT(WAKEUP_RUNNING, 0)
-
 /*
  * Use the SYNC wakeup hint, pipes and the likes use this to indicate
  * the remote end is likely to consume the data we just wrote, and
-- 
cgit v1.2.3


From 1983a922a1bc843806b9a36cf3a370b242783140 Mon Sep 17 00:00:00 2001
From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Date: Mon, 30 Nov 2009 12:16:47 +0100
Subject: sched: Make tunable scaling style configurable

As scaling now takes place on all kind of cpu add/remove events a user
that configures values via proc should be able to configure if his set
values are still rescaled or kept whatever happens.

As the comments state that log2 was just a second guess that worked the
interface is not just designed for on/off, but to choose a scaling type.
Currently this allows none, log and linear, but more important it allwos
us to keep the interface even if someone has an even better idea how to
scale the values.

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1259579808-11357-3-git-send-email-ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 11 ++++++++++-
 kernel/sched.c        | 15 ++++++++++++++-
 kernel/sched_debug.c  | 10 ++++++++++
 kernel/sched_fair.c   | 13 +++++++++++++
 kernel/sysctl.c       | 14 ++++++++++++++
 5 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b1ebd3280c6..ee9f200d12d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1902,13 +1902,22 @@ extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_shares_ratelimit;
 extern unsigned int sysctl_sched_shares_thresh;
 extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+	SCHED_TUNABLESCALING_NONE,
+	SCHED_TUNABLESCALING_LOG,
+	SCHED_TUNABLESCALING_LINEAR,
+	SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
-int sched_nr_latency_handler(struct ctl_table *table, int write,
+int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index b54ecf84b6be..116efed962c6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7033,7 +7033,20 @@ cpumask_var_t nohz_cpu_mask;
 static void update_sysctl(void)
 {
 	unsigned int cpus = min(num_online_cpus(), 8U);
-	unsigned int factor = 1 + ilog2(cpus);
+	unsigned int factor;
+
+	switch (sysctl_sched_tunable_scaling) {
+	case SCHED_TUNABLESCALING_NONE:
+		factor = 1;
+		break;
+	case SCHED_TUNABLESCALING_LINEAR:
+		factor = cpus;
+		break;
+	case SCHED_TUNABLESCALING_LOG:
+	default:
+		factor = 1 + ilog2(cpus);
+		break;
+	}
 
 #define SET_SYSCTL(name) \
 	(sysctl_##name = (factor) * normalized_sysctl_##name)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5fda66615fee..0fc5287fe80f 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu)
 	print_rq(m, rq, cpu);
 }
 
+static const char *sched_tunable_scaling_names[] = {
+	"none",
+	"logaritmic",
+	"linear"
+};
+
 static int sched_debug_show(struct seq_file *m, void *v)
 {
 	u64 now = ktime_to_ns(ktime_get());
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v)
 #undef PN
 #undef P
 
+	SEQ_printf(m, "  .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
+		sysctl_sched_tunable_scaling,
+		sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+
 	for_each_online_cpu(cpu)
 		print_cpu(m, cpu);
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 71b3458245e5..455106d318a8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/latencytop.h>
+#include <linux/sched.h>
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -37,6 +38,18 @@
 unsigned int sysctl_sched_latency = 5000000ULL;
 unsigned int normalized_sysctl_sched_latency = 5000000ULL;
 
+/*
+ * The initial- and re-scaling of tunables is configurable
+ * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
+ *
+ * Options are:
+ * SCHED_TUNABLESCALING_NONE - unscaled, always *1
+ * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
+ * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
+ */
+enum sched_tunable_scaling sysctl_sched_tunable_scaling
+	= SCHED_TUNABLESCALING_LOG;
+
 /*
  * Minimal preemption granularity for CPU-bound tasks:
  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5cc53514caa..d10406e5fdfe 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -251,6 +251,8 @@ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_wakeup_granularity_ns;			/* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 #endif
 
 static struct ctl_table kern_table[] = {
@@ -304,6 +306,18 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_tunable_scaling",
+		.data		= &sysctl_sched_tunable_scaling,
+		.maxlen		= sizeof(enum sched_tunable_scaling),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_sched_tunable_scaling,
+		.extra2		= &max_sched_tunable_scaling,
+	},
+
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_shares_thresh",
-- 
cgit v1.2.3


From 91773a00f8235e4b697217867529f73e298298df Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Date: Mon, 3 Aug 2009 15:06:36 +0300
Subject: OMAP: OMAPFB: split omapfb.h

Split arch/arm/plat-omap/include/mach/omapfb.h into two files:

include/linux/omapfb.h - ioctls etc for userspace and some kernel
                         stuff for board files
drivers/video/omap/omapfb.h - for omapfb internal use

This cleans up omapfb.h and also makes it easier for the upcoming new
DSS driver to co-exist with the old driver.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-nokia770.c     |   2 +-
 arch/arm/mach-omap2/io.c                 |   2 +-
 arch/arm/plat-omap/fb.c                  |   2 +-
 arch/arm/plat-omap/include/plat/omapfb.h | 398 -------------------------------
 drivers/video/omap/blizzard.c            |   2 +-
 drivers/video/omap/dispc.c               |   2 +-
 drivers/video/omap/hwa742.c              |   3 +-
 drivers/video/omap/lcd_2430sdp.c         |   3 +-
 drivers/video/omap/lcd_ams_delta.c       |   3 +-
 drivers/video/omap/lcd_apollon.c         |   3 +-
 drivers/video/omap/lcd_h3.c              |   2 +-
 drivers/video/omap/lcd_h4.c              |   2 +-
 drivers/video/omap/lcd_htcherald.c       |   2 +-
 drivers/video/omap/lcd_inn1510.c         |   2 +-
 drivers/video/omap/lcd_inn1610.c         |   2 +-
 drivers/video/omap/lcd_ldp.c             |   3 +-
 drivers/video/omap/lcd_mipid.c           |   3 +-
 drivers/video/omap/lcd_omap2evm.c        |   3 +-
 drivers/video/omap/lcd_omap3beagle.c     |   4 +-
 drivers/video/omap/lcd_omap3evm.c        |   3 +-
 drivers/video/omap/lcd_osk.c             |   2 +-
 drivers/video/omap/lcd_overo.c           |   3 +-
 drivers/video/omap/lcd_palmte.c          |   2 +-
 drivers/video/omap/lcd_palmtt.c          |   2 +-
 drivers/video/omap/lcd_palmz71.c         |   2 +-
 drivers/video/omap/lcdc.c                |   3 +-
 drivers/video/omap/omapfb.h              | 227 ++++++++++++++++++
 drivers/video/omap/omapfb_main.c         |   2 +-
 drivers/video/omap/rfbi.c                |   3 +-
 drivers/video/omap/sossi.c               |   3 +-
 include/linux/omapfb.h                   | 197 +++++++++++++++
 31 files changed, 465 insertions(+), 427 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/omapfb.h
 create mode 100644 drivers/video/omap/omapfb.h
 create mode 100644 include/linux/omapfb.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 5a275bab2dfe..71e1a3fad0ea 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/clk.h>
+#include <linux/omapfb.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
@@ -32,7 +33,6 @@
 #include <plat/keypad.h>
 #include <plat/common.h>
 #include <plat/dsp_common.h>
-#include <plat/omapfb.h>
 #include <plat/hwa742.h>
 #include <plat/lcd_mipid.h>
 #include <plat/mmc.h>
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 59d28b2fd8c5..9f22c201ef9d 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -22,13 +22,13 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/omapfb.h>
 
 #include <asm/tlb.h>
 
 #include <asm/mach/map.h>
 
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <plat/sram.h>
 #include <plat/sdrc.h>
 #include <plat/gpmc.h>
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 78a4ce538dbd..18cf1d4b7931 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -28,13 +28,13 @@
 #include <linux/platform_device.h>
 #include <linux/bootmem.h>
 #include <linux/io.h>
+#include <linux/omapfb.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/map.h>
 
 #include <plat/board.h>
 #include <plat/sram.h>
-#include <plat/omapfb.h>
 
 #if defined(CONFIG_FB_OMAP) || defined(CONFIG_FB_OMAP_MODULE)
 
diff --git a/arch/arm/plat-omap/include/plat/omapfb.h b/arch/arm/plat-omap/include/plat/omapfb.h
deleted file mode 100644
index bfef7ab95f17..000000000000
--- a/arch/arm/plat-omap/include/plat/omapfb.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * File: arch/arm/plat-omap/include/mach/omapfb.h
- *
- * Framebuffer driver for TI OMAP boards
- *
- * Copyright (C) 2004 Nokia Corporation
- * Author: Imre Deak <imre.deak@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-
-#ifndef __OMAPFB_H
-#define __OMAPFB_H
-
-#include <asm/ioctl.h>
-#include <asm/types.h>
-
-/* IOCTL commands. */
-
-#define OMAP_IOW(num, dtype)	_IOW('O', num, dtype)
-#define OMAP_IOR(num, dtype)	_IOR('O', num, dtype)
-#define OMAP_IOWR(num, dtype)	_IOWR('O', num, dtype)
-#define OMAP_IO(num)		_IO('O', num)
-
-#define OMAPFB_MIRROR		OMAP_IOW(31, int)
-#define OMAPFB_SYNC_GFX		OMAP_IO(37)
-#define OMAPFB_VSYNC		OMAP_IO(38)
-#define OMAPFB_SET_UPDATE_MODE	OMAP_IOW(40, int)
-#define OMAPFB_GET_CAPS		OMAP_IOR(42, struct omapfb_caps)
-#define OMAPFB_GET_UPDATE_MODE	OMAP_IOW(43, int)
-#define OMAPFB_LCD_TEST		OMAP_IOW(45, int)
-#define OMAPFB_CTRL_TEST	OMAP_IOW(46, int)
-#define OMAPFB_UPDATE_WINDOW_OLD OMAP_IOW(47, struct omapfb_update_window_old)
-#define OMAPFB_SET_COLOR_KEY	OMAP_IOW(50, struct omapfb_color_key)
-#define OMAPFB_GET_COLOR_KEY	OMAP_IOW(51, struct omapfb_color_key)
-#define OMAPFB_SETUP_PLANE	OMAP_IOW(52, struct omapfb_plane_info)
-#define OMAPFB_QUERY_PLANE	OMAP_IOW(53, struct omapfb_plane_info)
-#define OMAPFB_UPDATE_WINDOW	OMAP_IOW(54, struct omapfb_update_window)
-#define OMAPFB_SETUP_MEM	OMAP_IOW(55, struct omapfb_mem_info)
-#define OMAPFB_QUERY_MEM	OMAP_IOW(56, struct omapfb_mem_info)
-
-#define OMAPFB_CAPS_GENERIC_MASK	0x00000fff
-#define OMAPFB_CAPS_LCDC_MASK		0x00fff000
-#define OMAPFB_CAPS_PANEL_MASK		0xff000000
-
-#define OMAPFB_CAPS_MANUAL_UPDATE	0x00001000
-#define OMAPFB_CAPS_TEARSYNC		0x00002000
-#define OMAPFB_CAPS_PLANE_RELOCATE_MEM	0x00004000
-#define OMAPFB_CAPS_PLANE_SCALE		0x00008000
-#define OMAPFB_CAPS_WINDOW_PIXEL_DOUBLE	0x00010000
-#define OMAPFB_CAPS_WINDOW_SCALE	0x00020000
-#define OMAPFB_CAPS_WINDOW_OVERLAY	0x00040000
-#define OMAPFB_CAPS_WINDOW_ROTATE	0x00080000
-#define OMAPFB_CAPS_SET_BACKLIGHT	0x01000000
-
-/* Values from DSP must map to lower 16-bits */
-#define OMAPFB_FORMAT_MASK		0x00ff
-#define OMAPFB_FORMAT_FLAG_DOUBLE	0x0100
-#define OMAPFB_FORMAT_FLAG_TEARSYNC	0x0200
-#define OMAPFB_FORMAT_FLAG_FORCE_VSYNC	0x0400
-#define OMAPFB_FORMAT_FLAG_ENABLE_OVERLAY	0x0800
-#define OMAPFB_FORMAT_FLAG_DISABLE_OVERLAY	0x1000
-
-#define OMAPFB_EVENT_READY	1
-#define OMAPFB_EVENT_DISABLED	2
-
-#define OMAPFB_MEMTYPE_SDRAM		0
-#define OMAPFB_MEMTYPE_SRAM		1
-#define OMAPFB_MEMTYPE_MAX		1
-
-enum omapfb_color_format {
-	OMAPFB_COLOR_RGB565 = 0,
-	OMAPFB_COLOR_YUV422,
-	OMAPFB_COLOR_YUV420,
-	OMAPFB_COLOR_CLUT_8BPP,
-	OMAPFB_COLOR_CLUT_4BPP,
-	OMAPFB_COLOR_CLUT_2BPP,
-	OMAPFB_COLOR_CLUT_1BPP,
-	OMAPFB_COLOR_RGB444,
-	OMAPFB_COLOR_YUY422,
-};
-
-struct omapfb_update_window {
-	__u32 x, y;
-	__u32 width, height;
-	__u32 format;
-	__u32 out_x, out_y;
-	__u32 out_width, out_height;
-	__u32 reserved[8];
-};
-
-struct omapfb_update_window_old {
-	__u32 x, y;
-	__u32 width, height;
-	__u32 format;
-};
-
-enum omapfb_plane {
-	OMAPFB_PLANE_GFX = 0,
-	OMAPFB_PLANE_VID1,
-	OMAPFB_PLANE_VID2,
-};
-
-enum omapfb_channel_out {
-	OMAPFB_CHANNEL_OUT_LCD = 0,
-	OMAPFB_CHANNEL_OUT_DIGIT,
-};
-
-struct omapfb_plane_info {
-	__u32 pos_x;
-	__u32 pos_y;
-	__u8  enabled;
-	__u8  channel_out;
-	__u8  mirror;
-	__u8  reserved1;
-	__u32 out_width;
-	__u32 out_height;
-	__u32 reserved2[12];
-};
-
-struct omapfb_mem_info {
-	__u32 size;
-	__u8  type;
-	__u8  reserved[3];
-};
-
-struct omapfb_caps {
-	__u32 ctrl;
-	__u32 plane_color;
-	__u32 wnd_color;
-};
-
-enum omapfb_color_key_type {
-	OMAPFB_COLOR_KEY_DISABLED = 0,
-	OMAPFB_COLOR_KEY_GFX_DST,
-	OMAPFB_COLOR_KEY_VID_SRC,
-};
-
-struct omapfb_color_key {
-	__u8  channel_out;
-	__u32 background;
-	__u32 trans_key;
-	__u8  key_type;
-};
-
-enum omapfb_update_mode {
-	OMAPFB_UPDATE_DISABLED = 0,
-	OMAPFB_AUTO_UPDATE,
-	OMAPFB_MANUAL_UPDATE
-};
-
-#ifdef __KERNEL__
-
-#include <linux/completion.h>
-#include <linux/interrupt.h>
-#include <linux/fb.h>
-#include <linux/mutex.h>
-
-#include <plat/board.h>
-
-#define OMAP_LCDC_INV_VSYNC             0x0001
-#define OMAP_LCDC_INV_HSYNC             0x0002
-#define OMAP_LCDC_INV_PIX_CLOCK         0x0004
-#define OMAP_LCDC_INV_OUTPUT_EN         0x0008
-#define OMAP_LCDC_HSVS_RISING_EDGE      0x0010
-#define OMAP_LCDC_HSVS_OPPOSITE         0x0020
-
-#define OMAP_LCDC_SIGNAL_MASK		0x003f
-
-#define OMAP_LCDC_PANEL_TFT		0x0100
-
-#define OMAPFB_PLANE_XRES_MIN		8
-#define OMAPFB_PLANE_YRES_MIN		8
-
-#ifdef CONFIG_ARCH_OMAP1
-#define OMAPFB_PLANE_NUM		1
-#else
-#define OMAPFB_PLANE_NUM		3
-#endif
-
-struct omapfb_device;
-
-struct lcd_panel {
-	const char	*name;
-	int		config;		/* TFT/STN, signal inversion */
-	int		bpp;		/* Pixel format in fb mem */
-	int		data_lines;	/* Lines on LCD HW interface */
-
-	int		x_res, y_res;
-	int		pixel_clock;	/* In kHz */
-	int		hsw;		/* Horizontal synchronization
-					   pulse width */
-	int		hfp;		/* Horizontal front porch */
-	int		hbp;		/* Horizontal back porch */
-	int		vsw;		/* Vertical synchronization
-					   pulse width */
-	int		vfp;		/* Vertical front porch */
-	int		vbp;		/* Vertical back porch */
-	int		acb;		/* ac-bias pin frequency */
-	int		pcd;		/* pixel clock divider.
-					   Obsolete use pixel_clock instead */
-
-	int		(*init)		(struct lcd_panel *panel,
-					 struct omapfb_device *fbdev);
-	void		(*cleanup)	(struct lcd_panel *panel);
-	int		(*enable)	(struct lcd_panel *panel);
-	void		(*disable)	(struct lcd_panel *panel);
-	unsigned long	(*get_caps)	(struct lcd_panel *panel);
-	int		(*set_bklight_level)(struct lcd_panel *panel,
-					     unsigned int level);
-	unsigned int	(*get_bklight_level)(struct lcd_panel *panel);
-	unsigned int	(*get_bklight_max)  (struct lcd_panel *panel);
-	int		(*run_test)	(struct lcd_panel *panel, int test_num);
-};
-
-struct extif_timings {
-	int cs_on_time;
-	int cs_off_time;
-	int we_on_time;
-	int we_off_time;
-	int re_on_time;
-	int re_off_time;
-	int we_cycle_time;
-	int re_cycle_time;
-	int cs_pulse_width;
-	int access_time;
-
-	int clk_div;
-
-	u32 tim[5];		/* set by extif->convert_timings */
-
-	int converted;
-};
-
-struct lcd_ctrl_extif {
-	int  (*init)		(struct omapfb_device *fbdev);
-	void (*cleanup)		(void);
-	void (*get_clk_info)	(u32 *clk_period, u32 *max_clk_div);
-	unsigned long (*get_max_tx_rate)(void);
-	int  (*convert_timings)	(struct extif_timings *timings);
-	void (*set_timings)	(const struct extif_timings *timings);
-	void (*set_bits_per_cycle)(int bpc);
-	void (*write_command)	(const void *buf, unsigned int len);
-	void (*read_data)	(void *buf, unsigned int len);
-	void (*write_data)	(const void *buf, unsigned int len);
-	void (*transfer_area)	(int width, int height,
-				 void (callback)(void * data), void *data);
-	int  (*setup_tearsync)	(unsigned pin_cnt,
-				 unsigned hs_pulse_time, unsigned vs_pulse_time,
-				 int hs_pol_inv, int vs_pol_inv, int div);
-	int  (*enable_tearsync) (int enable, unsigned line);
-
-	unsigned long		max_transmit_size;
-};
-
-struct omapfb_notifier_block {
-	struct notifier_block	nb;
-	void			*data;
-	int			plane_idx;
-};
-
-typedef int (*omapfb_notifier_callback_t)(struct notifier_block *,
-					  unsigned long event,
-					  void *fbi);
-
-struct omapfb_mem_region {
-	u32		paddr;
-	void __iomem	*vaddr;
-	unsigned long	size;
-	u8		type;		/* OMAPFB_PLANE_MEM_* */
-	unsigned	alloc:1;	/* allocated by the driver */
-	unsigned	map:1;		/* kernel mapped by the driver */
-};
-
-struct omapfb_mem_desc {
-	int				region_cnt;
-	struct omapfb_mem_region	region[OMAPFB_PLANE_NUM];
-};
-
-struct lcd_ctrl {
-	const char	*name;
-	void		*data;
-
-	int		(*init)		  (struct omapfb_device *fbdev,
-					   int ext_mode,
-					   struct omapfb_mem_desc *req_md);
-	void		(*cleanup)	  (void);
-	void		(*bind_client)	  (struct omapfb_notifier_block *nb);
-	void		(*get_caps)	  (int plane, struct omapfb_caps *caps);
-	int		(*set_update_mode)(enum omapfb_update_mode mode);
-	enum omapfb_update_mode (*get_update_mode)(void);
-	int		(*setup_plane)	  (int plane, int channel_out,
-					   unsigned long offset,
-					   int screen_width,
-					   int pos_x, int pos_y, int width,
-					   int height, int color_mode);
-	int		(*set_rotate)	  (int angle);
-	int		(*setup_mem)	  (int plane, size_t size,
-					   int mem_type, unsigned long *paddr);
-	int		(*mmap)		  (struct fb_info *info,
-					   struct vm_area_struct *vma);
-	int		(*set_scale)	  (int plane,
-					   int orig_width, int orig_height,
-					   int out_width, int out_height);
-	int		(*enable_plane)	  (int plane, int enable);
-	int		(*update_window)  (struct fb_info *fbi,
-					   struct omapfb_update_window *win,
-					   void (*callback)(void *),
-					   void *callback_data);
-	void		(*sync)		  (void);
-	void		(*suspend)	  (void);
-	void		(*resume)	  (void);
-	int		(*run_test)	  (int test_num);
-	int		(*setcolreg)	  (u_int regno, u16 red, u16 green,
-					   u16 blue, u16 transp,
-					   int update_hw_mem);
-	int		(*set_color_key)  (struct omapfb_color_key *ck);
-	int		(*get_color_key)  (struct omapfb_color_key *ck);
-};
-
-enum omapfb_state {
-	OMAPFB_DISABLED	= 0,
-	OMAPFB_SUSPENDED= 99,
-	OMAPFB_ACTIVE	= 100
-};
-
-struct omapfb_plane_struct {
-	int				idx;
-	struct omapfb_plane_info	info;
-	enum omapfb_color_format	color_mode;
-	struct omapfb_device		*fbdev;
-};
-
-struct omapfb_device {
-	int			state;
-	int                     ext_lcdc;               /* Using external
-                                                           LCD controller */
-	struct mutex		rqueue_mutex;
-
-	int			palette_size;
-	u32			pseudo_palette[17];
-
-	struct lcd_panel	*panel;			/* LCD panel */
-	const struct lcd_ctrl	*ctrl;			/* LCD controller */
-	const struct lcd_ctrl	*int_ctrl;		/* internal LCD ctrl */
-	struct lcd_ctrl_extif	*ext_if;		/* LCD ctrl external
-							   interface */
-	struct device		*dev;
-	struct fb_var_screeninfo	new_var;	/* for mode changes */
-
-	struct omapfb_mem_desc		mem_desc;
-	struct fb_info			*fb_info[OMAPFB_PLANE_NUM];
-};
-
-struct omapfb_platform_data {
-	struct omap_lcd_config		lcd;
-	struct omapfb_mem_desc		mem_desc;
-	void				*ctrl_platform_data;
-};
-
-#ifdef CONFIG_ARCH_OMAP1
-extern struct lcd_ctrl omap1_lcd_ctrl;
-#else
-extern struct lcd_ctrl omap2_disp_ctrl;
-#endif
-
-extern void omapfb_reserve_sdram(void);
-extern void omapfb_register_panel(struct lcd_panel *panel);
-extern void omapfb_write_first_pixel(struct omapfb_device *fbdev, u16 pixval);
-extern void omapfb_notify_clients(struct omapfb_device *fbdev,
-				  unsigned long event);
-extern int  omapfb_register_client(struct omapfb_notifier_block *nb,
-				   omapfb_notifier_callback_t callback,
-				   void *callback_data);
-extern int  omapfb_unregister_client(struct omapfb_notifier_block *nb);
-extern int  omapfb_update_window_async(struct fb_info *fbi,
-				       struct omapfb_update_window *win,
-				       void (*callback)(void *),
-				       void *callback_data);
-
-/* in arch/arm/plat-omap/fb.c */
-extern void omapfb_set_ctrl_platform_data(void *pdata);
-
-#endif /* __KERNEL__ */
-
-#endif /* __OMAPFB_H */
diff --git a/drivers/video/omap/blizzard.c b/drivers/video/omap/blizzard.c
index f5d75f22cef9..2ffb34af4c59 100644
--- a/drivers/video/omap/blizzard.c
+++ b/drivers/video/omap/blizzard.c
@@ -27,9 +27,9 @@
 #include <linux/clk.h>
 
 #include <plat/dma.h>
-#include <plat/omapfb.h>
 #include <plat/blizzard.h>
 
+#include "omapfb.h"
 #include "dispc.h"
 
 #define MODULE_NAME				"blizzard"
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 7c833db4f9b7..b8f75a7936a2 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -26,9 +26,9 @@
 #include <linux/io.h>
 
 #include <plat/sram.h>
-#include <plat/omapfb.h>
 #include <plat/board.h>
 
+#include "omapfb.h"
 #include "dispc.h"
 
 #define MODULE_NAME			"dispc"
diff --git a/drivers/video/omap/hwa742.c b/drivers/video/omap/hwa742.c
index 17a975e4c9c9..0016f77cd13f 100644
--- a/drivers/video/omap/hwa742.c
+++ b/drivers/video/omap/hwa742.c
@@ -25,10 +25,11 @@
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
+#include <linux/interrupt.h>
 
 #include <plat/dma.h>
-#include <plat/omapfb.h>
 #include <plat/hwa742.h>
+#include "omapfb.h"
 
 #define HWA742_REV_CODE_REG       0x0
 #define HWA742_CONFIG_REG         0x2
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
index fea7feee0b77..760645d9dbb6 100644
--- a/drivers/video/omap/lcd_2430sdp.c
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -28,9 +28,10 @@
 #include <linux/i2c/twl4030.h>
 
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define SDP2430_LCD_PANEL_BACKLIGHT_GPIO	91
 #define SDP2430_LCD_PANEL_ENABLE_GPIO		154
 #define SDP3430_LCD_PANEL_BACKLIGHT_GPIO	24
diff --git a/drivers/video/omap/lcd_ams_delta.c b/drivers/video/omap/lcd_ams_delta.c
index 3d5277252ca3..9340ca3c1c81 100644
--- a/drivers/video/omap/lcd_ams_delta.c
+++ b/drivers/video/omap/lcd_ams_delta.c
@@ -27,7 +27,8 @@
 
 #include <plat/board-ams-delta.h>
 #include <mach/hardware.h>
-#include <plat/omapfb.h>
+
+#include "omapfb.h"
 
 #define AMS_DELTA_DEFAULT_CONTRAST	112
 
diff --git a/drivers/video/omap/lcd_apollon.c b/drivers/video/omap/lcd_apollon.c
index 4c5cefc5153b..2be94eb3bbf5 100644
--- a/drivers/video/omap/lcd_apollon.c
+++ b/drivers/video/omap/lcd_apollon.c
@@ -26,7 +26,8 @@
 
 #include <mach/gpio.h>
 #include <plat/mux.h>
-#include <plat/omapfb.h>
+
+#include "omapfb.h"
 
 /* #define USE_35INCH_LCD 1 */
 
diff --git a/drivers/video/omap/lcd_h3.c b/drivers/video/omap/lcd_h3.c
index 240b4fb10741..8df688748b5a 100644
--- a/drivers/video/omap/lcd_h3.c
+++ b/drivers/video/omap/lcd_h3.c
@@ -24,7 +24,7 @@
 #include <linux/i2c/tps65010.h>
 
 #include <mach/gpio.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 #define MODULE_NAME	"omapfb-lcd_h3"
 
diff --git a/drivers/video/omap/lcd_h4.c b/drivers/video/omap/lcd_h4.c
index 720625da1f4e..03a06a982750 100644
--- a/drivers/video/omap/lcd_h4.c
+++ b/drivers/video/omap/lcd_h4.c
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int h4_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
 {
diff --git a/drivers/video/omap/lcd_htcherald.c b/drivers/video/omap/lcd_htcherald.c
index 2e0c81ea7483..a9007c5d1fad 100644
--- a/drivers/video/omap/lcd_htcherald.c
+++ b/drivers/video/omap/lcd_htcherald.c
@@ -29,7 +29,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int htcherald_panel_init(struct lcd_panel *panel,
 					struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_inn1510.c b/drivers/video/omap/lcd_inn1510.c
index aafe9b497e2d..3271f1643b26 100644
--- a/drivers/video/omap/lcd_inn1510.c
+++ b/drivers/video/omap/lcd_inn1510.c
@@ -24,7 +24,7 @@
 #include <linux/io.h>
 
 #include <plat/fpga.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int innovator1510_panel_init(struct lcd_panel *panel,
 				    struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_inn1610.c b/drivers/video/omap/lcd_inn1610.c
index 0de338264a8a..9fff86f67bde 100644
--- a/drivers/video/omap/lcd_inn1610.c
+++ b/drivers/video/omap/lcd_inn1610.c
@@ -23,7 +23,7 @@
 #include <linux/platform_device.h>
 
 #include <mach/gpio.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 #define MODULE_NAME	"omapfb-lcd_h3"
 
diff --git a/drivers/video/omap/lcd_ldp.c b/drivers/video/omap/lcd_ldp.c
index 6a260dfdadc5..5bb7f6f14601 100644
--- a/drivers/video/omap/lcd_ldp.c
+++ b/drivers/video/omap/lcd_ldp.c
@@ -28,9 +28,10 @@
 
 #include <mach/gpio.h>
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define LCD_PANEL_BACKLIGHT_GPIO	(15 + OMAP_MAX_GPIO_LINES)
 #define LCD_PANEL_ENABLE_GPIO		(7 + OMAP_MAX_GPIO_LINES)
 
diff --git a/drivers/video/omap/lcd_mipid.c b/drivers/video/omap/lcd_mipid.c
index 2162eb09e0fe..46ca90d1d177 100644
--- a/drivers/video/omap/lcd_mipid.c
+++ b/drivers/video/omap/lcd_mipid.c
@@ -23,9 +23,10 @@
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
 
-#include <plat/omapfb.h>
 #include <plat/lcd_mipid.h>
 
+#include "omapfb.h"
+
 #define MIPID_MODULE_NAME		"lcd_mipid"
 
 #define MIPID_CMD_READ_DISP_ID		0x04
diff --git a/drivers/video/omap/lcd_omap2evm.c b/drivers/video/omap/lcd_omap2evm.c
index e1a38abca3e7..006c2fe7360e 100644
--- a/drivers/video/omap/lcd_omap2evm.c
+++ b/drivers/video/omap/lcd_omap2evm.c
@@ -27,9 +27,10 @@
 #include <linux/i2c/twl4030.h>
 
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define LCD_PANEL_ENABLE_GPIO	154
 #define LCD_PANEL_LR		128
 #define LCD_PANEL_UD		129
diff --git a/drivers/video/omap/lcd_omap3beagle.c b/drivers/video/omap/lcd_omap3beagle.c
index ccec084ed647..fc503d8f3c24 100644
--- a/drivers/video/omap/lcd_omap3beagle.c
+++ b/drivers/video/omap/lcd_omap3beagle.c
@@ -26,9 +26,11 @@
 #include <linux/i2c/twl4030.h>
 
 #include <plat/mux.h>
-#include <plat/omapfb.h>
+#include <plat/mux.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define LCD_PANEL_ENABLE_GPIO       170
 
 static int omap3beagle_panel_init(struct lcd_panel *panel,
diff --git a/drivers/video/omap/lcd_omap3evm.c b/drivers/video/omap/lcd_omap3evm.c
index 556eb31db24c..ae2edc4081a8 100644
--- a/drivers/video/omap/lcd_omap3evm.c
+++ b/drivers/video/omap/lcd_omap3evm.c
@@ -26,9 +26,10 @@
 #include <linux/i2c/twl4030.h>
 
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define LCD_PANEL_ENABLE_GPIO       153
 #define LCD_PANEL_LR                2
 #define LCD_PANEL_UD                3
diff --git a/drivers/video/omap/lcd_osk.c b/drivers/video/omap/lcd_osk.c
index bb21d7dca39e..b87e8b83f29c 100644
--- a/drivers/video/omap/lcd_osk.c
+++ b/drivers/video/omap/lcd_osk.c
@@ -25,7 +25,7 @@
 
 #include <mach/gpio.h>
 #include <plat/mux.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int osk_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
 {
diff --git a/drivers/video/omap/lcd_overo.c b/drivers/video/omap/lcd_overo.c
index b0f86e514cde..56ee192e9ee2 100644
--- a/drivers/video/omap/lcd_overo.c
+++ b/drivers/video/omap/lcd_overo.c
@@ -25,9 +25,10 @@
 
 #include <mach/gpio.h>
 #include <plat/mux.h>
-#include <plat/omapfb.h>
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #define LCD_ENABLE       144
 
 static int overo_panel_init(struct lcd_panel *panel,
diff --git a/drivers/video/omap/lcd_palmte.c b/drivers/video/omap/lcd_palmte.c
index d30289603ce8..4cb301750d02 100644
--- a/drivers/video/omap/lcd_palmte.c
+++ b/drivers/video/omap/lcd_palmte.c
@@ -24,7 +24,7 @@
 #include <linux/io.h>
 
 #include <plat/fpga.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int palmte_panel_init(struct lcd_panel *panel,
 				struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_palmtt.c b/drivers/video/omap/lcd_palmtt.c
index 557424fb6df1..ff0e6d7ab3a2 100644
--- a/drivers/video/omap/lcd_palmtt.c
+++ b/drivers/video/omap/lcd_palmtt.c
@@ -30,7 +30,7 @@ GPIO13 - screen blanking
 #include <linux/io.h>
 
 #include <mach/gpio.h>
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int palmtt_panel_init(struct lcd_panel *panel,
 	struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_palmz71.c b/drivers/video/omap/lcd_palmz71.c
index 5f4b5b2c1f41..2334e56536bc 100644
--- a/drivers/video/omap/lcd_palmz71.c
+++ b/drivers/video/omap/lcd_palmz71.c
@@ -24,7 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
-#include <plat/omapfb.h>
+#include "omapfb.h"
 
 static int palmz71_panel_init(struct lcd_panel *panel,
 			      struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcdc.c b/drivers/video/omap/lcdc.c
index 5f32cafbf74c..b831e1df629e 100644
--- a/drivers/video/omap/lcdc.c
+++ b/drivers/video/omap/lcdc.c
@@ -30,10 +30,11 @@
 #include <linux/clk.h>
 
 #include <plat/dma.h>
-#include <plat/omapfb.h>
 
 #include <asm/mach-types.h>
 
+#include "omapfb.h"
+
 #include "lcdc.h"
 
 #define MODULE_NAME			"lcdc"
diff --git a/drivers/video/omap/omapfb.h b/drivers/video/omap/omapfb.h
new file mode 100644
index 000000000000..46e4714014e8
--- /dev/null
+++ b/drivers/video/omap/omapfb.h
@@ -0,0 +1,227 @@
+/*
+ * File: drivers/video/omap/omapfb.h
+ *
+ * Framebuffer driver for TI OMAP boards
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Author: Imre Deak <imre.deak@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef __OMAPFB_H
+#define __OMAPFB_H
+
+#include <linux/fb.h>
+#include <linux/mutex.h>
+#include <linux/omapfb.h>
+
+#define OMAPFB_EVENT_READY	1
+#define OMAPFB_EVENT_DISABLED	2
+
+#define OMAP_LCDC_INV_VSYNC             0x0001
+#define OMAP_LCDC_INV_HSYNC             0x0002
+#define OMAP_LCDC_INV_PIX_CLOCK         0x0004
+#define OMAP_LCDC_INV_OUTPUT_EN         0x0008
+#define OMAP_LCDC_HSVS_RISING_EDGE      0x0010
+#define OMAP_LCDC_HSVS_OPPOSITE         0x0020
+
+#define OMAP_LCDC_SIGNAL_MASK		0x003f
+
+#define OMAP_LCDC_PANEL_TFT		0x0100
+
+#define OMAPFB_PLANE_XRES_MIN		8
+#define OMAPFB_PLANE_YRES_MIN		8
+
+struct omapfb_device;
+
+struct lcd_panel {
+	const char	*name;
+	int		config;		/* TFT/STN, signal inversion */
+	int		bpp;		/* Pixel format in fb mem */
+	int		data_lines;	/* Lines on LCD HW interface */
+
+	int		x_res, y_res;
+	int		pixel_clock;	/* In kHz */
+	int		hsw;		/* Horizontal synchronization
+					   pulse width */
+	int		hfp;		/* Horizontal front porch */
+	int		hbp;		/* Horizontal back porch */
+	int		vsw;		/* Vertical synchronization
+					   pulse width */
+	int		vfp;		/* Vertical front porch */
+	int		vbp;		/* Vertical back porch */
+	int		acb;		/* ac-bias pin frequency */
+	int		pcd;		/* pixel clock divider.
+					   Obsolete use pixel_clock instead */
+
+	int		(*init)		(struct lcd_panel *panel,
+					 struct omapfb_device *fbdev);
+	void		(*cleanup)	(struct lcd_panel *panel);
+	int		(*enable)	(struct lcd_panel *panel);
+	void		(*disable)	(struct lcd_panel *panel);
+	unsigned long	(*get_caps)	(struct lcd_panel *panel);
+	int		(*set_bklight_level)(struct lcd_panel *panel,
+					     unsigned int level);
+	unsigned int	(*get_bklight_level)(struct lcd_panel *panel);
+	unsigned int	(*get_bklight_max)  (struct lcd_panel *panel);
+	int		(*run_test)	(struct lcd_panel *panel, int test_num);
+};
+
+struct extif_timings {
+	int cs_on_time;
+	int cs_off_time;
+	int we_on_time;
+	int we_off_time;
+	int re_on_time;
+	int re_off_time;
+	int we_cycle_time;
+	int re_cycle_time;
+	int cs_pulse_width;
+	int access_time;
+
+	int clk_div;
+
+	u32 tim[5];		/* set by extif->convert_timings */
+
+	int converted;
+};
+
+struct lcd_ctrl_extif {
+	int  (*init)		(struct omapfb_device *fbdev);
+	void (*cleanup)		(void);
+	void (*get_clk_info)	(u32 *clk_period, u32 *max_clk_div);
+	unsigned long (*get_max_tx_rate)(void);
+	int  (*convert_timings)	(struct extif_timings *timings);
+	void (*set_timings)	(const struct extif_timings *timings);
+	void (*set_bits_per_cycle)(int bpc);
+	void (*write_command)	(const void *buf, unsigned int len);
+	void (*read_data)	(void *buf, unsigned int len);
+	void (*write_data)	(const void *buf, unsigned int len);
+	void (*transfer_area)	(int width, int height,
+				 void (callback)(void *data), void *data);
+	int  (*setup_tearsync)	(unsigned pin_cnt,
+				 unsigned hs_pulse_time, unsigned vs_pulse_time,
+				 int hs_pol_inv, int vs_pol_inv, int div);
+	int  (*enable_tearsync) (int enable, unsigned line);
+
+	unsigned long		max_transmit_size;
+};
+
+struct omapfb_notifier_block {
+	struct notifier_block	nb;
+	void			*data;
+	int			plane_idx;
+};
+
+typedef int (*omapfb_notifier_callback_t)(struct notifier_block *,
+					  unsigned long event,
+					  void *fbi);
+
+struct lcd_ctrl {
+	const char	*name;
+	void		*data;
+
+	int		(*init)		  (struct omapfb_device *fbdev,
+					   int ext_mode,
+					   struct omapfb_mem_desc *req_md);
+	void		(*cleanup)	  (void);
+	void		(*bind_client)	  (struct omapfb_notifier_block *nb);
+	void		(*get_caps)	  (int plane, struct omapfb_caps *caps);
+	int		(*set_update_mode)(enum omapfb_update_mode mode);
+	enum omapfb_update_mode (*get_update_mode)(void);
+	int		(*setup_plane)	  (int plane, int channel_out,
+					   unsigned long offset,
+					   int screen_width,
+					   int pos_x, int pos_y, int width,
+					   int height, int color_mode);
+	int		(*set_rotate)	  (int angle);
+	int		(*setup_mem)	  (int plane, size_t size,
+					   int mem_type, unsigned long *paddr);
+	int		(*mmap)		  (struct fb_info *info,
+					   struct vm_area_struct *vma);
+	int		(*set_scale)	  (int plane,
+					   int orig_width, int orig_height,
+					   int out_width, int out_height);
+	int		(*enable_plane)	  (int plane, int enable);
+	int		(*update_window)  (struct fb_info *fbi,
+					   struct omapfb_update_window *win,
+					   void (*callback)(void *),
+					   void *callback_data);
+	void		(*sync)		  (void);
+	void		(*suspend)	  (void);
+	void		(*resume)	  (void);
+	int		(*run_test)	  (int test_num);
+	int		(*setcolreg)	  (u_int regno, u16 red, u16 green,
+					   u16 blue, u16 transp,
+					   int update_hw_mem);
+	int		(*set_color_key)  (struct omapfb_color_key *ck);
+	int		(*get_color_key)  (struct omapfb_color_key *ck);
+};
+
+enum omapfb_state {
+	OMAPFB_DISABLED		= 0,
+	OMAPFB_SUSPENDED	= 99,
+	OMAPFB_ACTIVE		= 100
+};
+
+struct omapfb_plane_struct {
+	int				idx;
+	struct omapfb_plane_info	info;
+	enum omapfb_color_format	color_mode;
+	struct omapfb_device		*fbdev;
+};
+
+struct omapfb_device {
+	int			state;
+	int                     ext_lcdc;		/* Using external
+							   LCD controller */
+	struct mutex		rqueue_mutex;
+
+	int			palette_size;
+	u32			pseudo_palette[17];
+
+	struct lcd_panel	*panel;			/* LCD panel */
+	const struct lcd_ctrl	*ctrl;			/* LCD controller */
+	const struct lcd_ctrl	*int_ctrl;		/* internal LCD ctrl */
+	struct lcd_ctrl_extif	*ext_if;		/* LCD ctrl external
+							   interface */
+	struct device		*dev;
+	struct fb_var_screeninfo	new_var;	/* for mode changes */
+
+	struct omapfb_mem_desc		mem_desc;
+	struct fb_info			*fb_info[OMAPFB_PLANE_NUM];
+};
+
+#ifdef CONFIG_ARCH_OMAP1
+extern struct lcd_ctrl omap1_lcd_ctrl;
+#else
+extern struct lcd_ctrl omap2_disp_ctrl;
+#endif
+
+extern void omapfb_register_panel(struct lcd_panel *panel);
+extern void omapfb_write_first_pixel(struct omapfb_device *fbdev, u16 pixval);
+extern void omapfb_notify_clients(struct omapfb_device *fbdev,
+				  unsigned long event);
+extern int  omapfb_register_client(struct omapfb_notifier_block *nb,
+				   omapfb_notifier_callback_t callback,
+				   void *callback_data);
+extern int  omapfb_unregister_client(struct omapfb_notifier_block *nb);
+extern int  omapfb_update_window_async(struct fb_info *fbi,
+				       struct omapfb_update_window *win,
+				       void (*callback)(void *),
+				       void *callback_data);
+
+#endif /* __OMAPFB_H */
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index f900a43db8d7..c7f59a5ccdbc 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -29,8 +29,8 @@
 #include <linux/uaccess.h>
 
 #include <plat/dma.h>
-#include <plat/omapfb.h>
 
+#include "omapfb.h"
 #include "lcdc.h"
 #include "dispc.h"
 
diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c
index c90fa39486b4..fed7b1bda19c 100644
--- a/drivers/video/omap/rfbi.c
+++ b/drivers/video/omap/rfbi.c
@@ -27,8 +27,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 
-#include <plat/omapfb.h>
-
+#include "omapfb.h"
 #include "dispc.h"
 
 /* To work around an RFBI transfer rate limitation */
diff --git a/drivers/video/omap/sossi.c b/drivers/video/omap/sossi.c
index 79dc84f09713..8fb7c708f563 100644
--- a/drivers/video/omap/sossi.c
+++ b/drivers/video/omap/sossi.c
@@ -23,10 +23,11 @@
 #include <linux/clk.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
 
 #include <plat/dma.h>
-#include <plat/omapfb.h>
 
+#include "omapfb.h"
 #include "lcdc.h"
 
 #define MODULE_NAME		"omapfb-sossi"
diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h
new file mode 100644
index 000000000000..a8efa92c6c35
--- /dev/null
+++ b/include/linux/omapfb.h
@@ -0,0 +1,197 @@
+/*
+ * File: include/linux/omapfb.h
+ *
+ * Framebuffer driver for TI OMAP boards
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Author: Imre Deak <imre.deak@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef __LINUX_OMAPFB_H__
+#define __LINUX_OMAPFB_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* IOCTL commands. */
+
+#define OMAP_IOW(num, dtype)	_IOW('O', num, dtype)
+#define OMAP_IOR(num, dtype)	_IOR('O', num, dtype)
+#define OMAP_IOWR(num, dtype)	_IOWR('O', num, dtype)
+#define OMAP_IO(num)		_IO('O', num)
+
+#define OMAPFB_MIRROR		OMAP_IOW(31, int)
+#define OMAPFB_SYNC_GFX		OMAP_IO(37)
+#define OMAPFB_VSYNC		OMAP_IO(38)
+#define OMAPFB_SET_UPDATE_MODE	OMAP_IOW(40, int)
+#define OMAPFB_GET_CAPS		OMAP_IOR(42, struct omapfb_caps)
+#define OMAPFB_GET_UPDATE_MODE	OMAP_IOW(43, int)
+#define OMAPFB_LCD_TEST		OMAP_IOW(45, int)
+#define OMAPFB_CTRL_TEST	OMAP_IOW(46, int)
+#define OMAPFB_UPDATE_WINDOW_OLD OMAP_IOW(47, struct omapfb_update_window_old)
+#define OMAPFB_SET_COLOR_KEY	OMAP_IOW(50, struct omapfb_color_key)
+#define OMAPFB_GET_COLOR_KEY	OMAP_IOW(51, struct omapfb_color_key)
+#define OMAPFB_SETUP_PLANE	OMAP_IOW(52, struct omapfb_plane_info)
+#define OMAPFB_QUERY_PLANE	OMAP_IOW(53, struct omapfb_plane_info)
+#define OMAPFB_UPDATE_WINDOW	OMAP_IOW(54, struct omapfb_update_window)
+#define OMAPFB_SETUP_MEM	OMAP_IOW(55, struct omapfb_mem_info)
+#define OMAPFB_QUERY_MEM	OMAP_IOW(56, struct omapfb_mem_info)
+
+#define OMAPFB_CAPS_GENERIC_MASK	0x00000fff
+#define OMAPFB_CAPS_LCDC_MASK		0x00fff000
+#define OMAPFB_CAPS_PANEL_MASK		0xff000000
+
+#define OMAPFB_CAPS_MANUAL_UPDATE	0x00001000
+#define OMAPFB_CAPS_TEARSYNC		0x00002000
+#define OMAPFB_CAPS_PLANE_RELOCATE_MEM	0x00004000
+#define OMAPFB_CAPS_PLANE_SCALE		0x00008000
+#define OMAPFB_CAPS_WINDOW_PIXEL_DOUBLE	0x00010000
+#define OMAPFB_CAPS_WINDOW_SCALE	0x00020000
+#define OMAPFB_CAPS_WINDOW_OVERLAY	0x00040000
+#define OMAPFB_CAPS_WINDOW_ROTATE	0x00080000
+#define OMAPFB_CAPS_SET_BACKLIGHT	0x01000000
+
+/* Values from DSP must map to lower 16-bits */
+#define OMAPFB_FORMAT_MASK		0x00ff
+#define OMAPFB_FORMAT_FLAG_DOUBLE	0x0100
+#define OMAPFB_FORMAT_FLAG_TEARSYNC	0x0200
+#define OMAPFB_FORMAT_FLAG_FORCE_VSYNC	0x0400
+#define OMAPFB_FORMAT_FLAG_ENABLE_OVERLAY	0x0800
+#define OMAPFB_FORMAT_FLAG_DISABLE_OVERLAY	0x1000
+
+#define OMAPFB_MEMTYPE_SDRAM		0
+#define OMAPFB_MEMTYPE_SRAM		1
+#define OMAPFB_MEMTYPE_MAX		1
+
+enum omapfb_color_format {
+	OMAPFB_COLOR_RGB565 = 0,
+	OMAPFB_COLOR_YUV422,
+	OMAPFB_COLOR_YUV420,
+	OMAPFB_COLOR_CLUT_8BPP,
+	OMAPFB_COLOR_CLUT_4BPP,
+	OMAPFB_COLOR_CLUT_2BPP,
+	OMAPFB_COLOR_CLUT_1BPP,
+	OMAPFB_COLOR_RGB444,
+	OMAPFB_COLOR_YUY422,
+};
+
+struct omapfb_update_window {
+	__u32 x, y;
+	__u32 width, height;
+	__u32 format;
+	__u32 out_x, out_y;
+	__u32 out_width, out_height;
+	__u32 reserved[8];
+};
+
+struct omapfb_update_window_old {
+	__u32 x, y;
+	__u32 width, height;
+	__u32 format;
+};
+
+enum omapfb_plane {
+	OMAPFB_PLANE_GFX = 0,
+	OMAPFB_PLANE_VID1,
+	OMAPFB_PLANE_VID2,
+};
+
+enum omapfb_channel_out {
+	OMAPFB_CHANNEL_OUT_LCD = 0,
+	OMAPFB_CHANNEL_OUT_DIGIT,
+};
+
+struct omapfb_plane_info {
+	__u32 pos_x;
+	__u32 pos_y;
+	__u8  enabled;
+	__u8  channel_out;
+	__u8  mirror;
+	__u8  reserved1;
+	__u32 out_width;
+	__u32 out_height;
+	__u32 reserved2[12];
+};
+
+struct omapfb_mem_info {
+	__u32 size;
+	__u8  type;
+	__u8  reserved[3];
+};
+
+struct omapfb_caps {
+	__u32 ctrl;
+	__u32 plane_color;
+	__u32 wnd_color;
+};
+
+enum omapfb_color_key_type {
+	OMAPFB_COLOR_KEY_DISABLED = 0,
+	OMAPFB_COLOR_KEY_GFX_DST,
+	OMAPFB_COLOR_KEY_VID_SRC,
+};
+
+struct omapfb_color_key {
+	__u8  channel_out;
+	__u32 background;
+	__u32 trans_key;
+	__u8  key_type;
+};
+
+enum omapfb_update_mode {
+	OMAPFB_UPDATE_DISABLED = 0,
+	OMAPFB_AUTO_UPDATE,
+	OMAPFB_MANUAL_UPDATE
+};
+
+#ifdef __KERNEL__
+
+#include <plat/board.h>
+
+#ifdef CONFIG_ARCH_OMAP1
+#define OMAPFB_PLANE_NUM		1
+#else
+#define OMAPFB_PLANE_NUM		3
+#endif
+
+struct omapfb_mem_region {
+	u32		paddr;
+	void __iomem	*vaddr;
+	unsigned long	size;
+	u8		type;		/* OMAPFB_PLANE_MEM_* */
+	unsigned	alloc:1;	/* allocated by the driver */
+	unsigned	map:1;		/* kernel mapped by the driver */
+};
+
+struct omapfb_mem_desc {
+	int				region_cnt;
+	struct omapfb_mem_region	region[OMAPFB_PLANE_NUM];
+};
+
+struct omapfb_platform_data {
+	struct omap_lcd_config		lcd;
+	struct omapfb_mem_desc		mem_desc;
+	void				*ctrl_platform_data;
+};
+
+/* in arch/arm/plat-omap/fb.c */
+extern void omapfb_set_ctrl_platform_data(void *pdata);
+extern void omapfb_reserve_sdram(void);
+
+#endif
+
+#endif /* __OMAPFB_H */
-- 
cgit v1.2.3


From b39a982ddecf1d95ed96f8457c39d3ea11df93f6 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Date: Tue, 4 Aug 2009 16:12:50 +0300
Subject: OMAP: DSS2: omapfb driver

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
---
 arch/arm/plat-omap/fb.c                   |   47 +-
 drivers/video/omap/Kconfig                |    5 +-
 drivers/video/omap2/Kconfig               |    1 +
 drivers/video/omap2/Makefile              |    1 +
 drivers/video/omap2/omapfb/Kconfig        |   37 +
 drivers/video/omap2/omapfb/Makefile       |    2 +
 drivers/video/omap2/omapfb/omapfb-ioctl.c |  755 ++++++++++
 drivers/video/omap2/omapfb/omapfb-main.c  | 2261 +++++++++++++++++++++++++++++
 drivers/video/omap2/omapfb/omapfb-sysfs.c |  507 +++++++
 drivers/video/omap2/omapfb/omapfb.h       |  146 ++
 include/linux/omapfb.h                    |   54 +
 11 files changed, 3813 insertions(+), 3 deletions(-)
 create mode 100644 drivers/video/omap2/omapfb/Kconfig
 create mode 100644 drivers/video/omap2/omapfb/Makefile
 create mode 100644 drivers/video/omap2/omapfb/omapfb-ioctl.c
 create mode 100644 drivers/video/omap2/omapfb/omapfb-main.c
 create mode 100644 drivers/video/omap2/omapfb/omapfb-sysfs.c
 create mode 100644 drivers/video/omap2/omapfb/omapfb.h

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 18cf1d4b7931..d3eea4f47533 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -55,6 +55,10 @@ static struct platform_device omap_fb_device = {
 	.num_resources = 0,
 };
 
+void omapfb_set_platform_data(struct omapfb_platform_data *data)
+{
+}
+
 static inline int ranges_overlap(unsigned long start1, unsigned long size1,
 				 unsigned long start2, unsigned long size2)
 {
@@ -327,7 +331,33 @@ static inline int omap_init_fb(void)
 
 arch_initcall(omap_init_fb);
 
-#else
+#elif defined(CONFIG_FB_OMAP2) || defined(CONFIG_FB_OMAP2_MODULE)
+
+static u64 omap_fb_dma_mask = ~(u32)0;
+static struct omapfb_platform_data omapfb_config;
+
+static struct platform_device omap_fb_device = {
+	.name		= "omapfb",
+	.id		= -1,
+	.dev = {
+		.dma_mask		= &omap_fb_dma_mask,
+		.coherent_dma_mask	= ~(u32)0,
+		.platform_data		= &omapfb_config,
+	},
+	.num_resources = 0,
+};
+
+void omapfb_set_platform_data(struct omapfb_platform_data *data)
+{
+	omapfb_config = *data;
+}
+
+static inline int omap_init_fb(void)
+{
+	return platform_device_register(&omap_fb_device);
+}
+
+arch_initcall(omap_init_fb);
 
 void omapfb_reserve_sdram(void) {}
 unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
@@ -339,5 +369,20 @@ unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
 	return 0;
 }
 
+#else
+
+void omapfb_set_platform_data(struct omapfb_platform_data *data)
+{
+}
+
+void omapfb_reserve_sdram(void) {}
+unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
+				  unsigned long sram_vstart,
+				  unsigned long sram_size,
+				  unsigned long start_avail,
+				  unsigned long size_avail)
+{
+	return 0;
+}
 
 #endif
diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index 551e3e9c4cbe..455c6055325d 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -1,6 +1,7 @@
 config FB_OMAP
 	tristate "OMAP frame buffer support (EXPERIMENTAL)"
-	depends on FB && ARCH_OMAP
+	depends on FB && ARCH_OMAP && (OMAP2_DSS = "n")
+
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -72,7 +73,7 @@ config FB_OMAP_LCD_MIPID
 
 config FB_OMAP_BOOTLOADER_INIT
 	bool "Check bootloader initialization"
-	depends on FB_OMAP
+	depends on FB_OMAP || FB_OMAP2
 	help
 	  Say Y here if you want to enable checking if the bootloader has
 	  already initialized the display controller. In this case the
diff --git a/drivers/video/omap2/Kconfig b/drivers/video/omap2/Kconfig
index 55b4c4265f57..3e60d7e88540 100644
--- a/drivers/video/omap2/Kconfig
+++ b/drivers/video/omap2/Kconfig
@@ -5,3 +5,4 @@ config OMAP2_VRFB
 	bool
 
 source "drivers/video/omap2/dss/Kconfig"
+source "drivers/video/omap2/omapfb/Kconfig"
diff --git a/drivers/video/omap2/Makefile b/drivers/video/omap2/Makefile
index ee0644f9d3c1..3ba6ef5e30d4 100644
--- a/drivers/video/omap2/Makefile
+++ b/drivers/video/omap2/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_OMAP2_VRAM) += vram.o
 obj-$(CONFIG_OMAP2_VRFB) += vrfb.o
 
 obj-y += dss/
+obj-y += omapfb/
diff --git a/drivers/video/omap2/omapfb/Kconfig b/drivers/video/omap2/omapfb/Kconfig
new file mode 100644
index 000000000000..bb694cc52a50
--- /dev/null
+++ b/drivers/video/omap2/omapfb/Kconfig
@@ -0,0 +1,37 @@
+menuconfig FB_OMAP2
+        tristate "OMAP2/3 frame buffer support (EXPERIMENTAL)"
+        depends on FB && OMAP2_DSS
+
+	select OMAP2_VRAM
+	select OMAP2_VRFB
+        select FB_CFB_FILLRECT
+        select FB_CFB_COPYAREA
+        select FB_CFB_IMAGEBLIT
+        help
+          Frame buffer driver for OMAP2/3 based boards.
+
+config FB_OMAP2_DEBUG_SUPPORT
+	bool "Debug support for OMAP2/3 FB"
+	default y
+	depends on FB_OMAP2
+	help
+	  Support for debug output. You have to enable the actual printing
+	  with debug module parameter.
+
+config FB_OMAP2_FORCE_AUTO_UPDATE
+	bool "Force main display to automatic update mode"
+	depends on FB_OMAP2
+	help
+	  Forces main display to automatic update mode (if possible),
+	  and also enables tearsync (if possible). By default
+	  displays that support manual update are started in manual
+	  update mode.
+
+config FB_OMAP2_NUM_FBS
+	int "Number of framebuffers"
+	range 1 10
+	default 3
+	depends on FB_OMAP2
+	help
+	  Select the number of framebuffers created. OMAP2/3 has 3 overlays
+	  so normally this would be 3.
diff --git a/drivers/video/omap2/omapfb/Makefile b/drivers/video/omap2/omapfb/Makefile
new file mode 100644
index 000000000000..51c2e00d9bf8
--- /dev/null
+++ b/drivers/video/omap2/omapfb/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_FB_OMAP2) += omapfb.o
+omapfb-y := omapfb-main.o omapfb-sysfs.o omapfb-ioctl.o
diff --git a/drivers/video/omap2/omapfb/omapfb-ioctl.c b/drivers/video/omap2/omapfb/omapfb-ioctl.c
new file mode 100644
index 000000000000..4c4bafdfaa43
--- /dev/null
+++ b/drivers/video/omap2/omapfb/omapfb-ioctl.c
@@ -0,0 +1,755 @@
+/*
+ * linux/drivers/video/omap2/omapfb-ioctl.c
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
+ *
+ * Some code and ideas taken from drivers/video/omap/ driver
+ * by Imre Deak.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fb.h>
+#include <linux/device.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mm.h>
+#include <linux/omapfb.h>
+#include <linux/vmalloc.h>
+
+#include <plat/display.h>
+#include <plat/vrfb.h>
+#include <plat/vram.h>
+
+#include "omapfb.h"
+
+static int omapfb_setup_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_overlay *ovl;
+	struct omap_overlay_info info;
+	int r = 0;
+
+	DBG("omapfb_setup_plane\n");
+
+	if (ofbi->num_overlays != 1) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	/* XXX uses only the first overlay */
+	ovl = ofbi->overlays[0];
+
+	if (pi->enabled && !ofbi->region.size) {
+		/*
+		 * This plane's memory was freed, can't enable it
+		 * until it's reallocated.
+		 */
+		r = -EINVAL;
+		goto out;
+	}
+
+	ovl->get_overlay_info(ovl, &info);
+
+	info.pos_x = pi->pos_x;
+	info.pos_y = pi->pos_y;
+	info.out_width = pi->out_width;
+	info.out_height = pi->out_height;
+	info.enabled = pi->enabled;
+
+	r = ovl->set_overlay_info(ovl, &info);
+	if (r)
+		goto out;
+
+	if (ovl->manager) {
+		r = ovl->manager->apply(ovl->manager);
+		if (r)
+			goto out;
+	}
+
+out:
+	if (r)
+		dev_err(fbdev->dev, "setup_plane failed\n");
+	return r;
+}
+
+static int omapfb_query_plane(struct fb_info *fbi, struct omapfb_plane_info *pi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	if (ofbi->num_overlays != 1) {
+		memset(pi, 0, sizeof(*pi));
+	} else {
+		struct omap_overlay_info *ovli;
+		struct omap_overlay *ovl;
+
+		ovl = ofbi->overlays[0];
+		ovli = &ovl->info;
+
+		pi->pos_x = ovli->pos_x;
+		pi->pos_y = ovli->pos_y;
+		pi->enabled = ovli->enabled;
+		pi->channel_out = 0; /* xxx */
+		pi->mirror = 0;
+		pi->out_width = ovli->out_width;
+		pi->out_height = ovli->out_height;
+	}
+
+	return 0;
+}
+
+static int omapfb_setup_mem(struct fb_info *fbi, struct omapfb_mem_info *mi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omapfb2_mem_region *rg;
+	int r, i;
+	size_t size;
+
+	if (mi->type > OMAPFB_MEMTYPE_MAX)
+		return -EINVAL;
+
+	size = PAGE_ALIGN(mi->size);
+
+	rg = &ofbi->region;
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ofbi->overlays[i]->info.enabled)
+			return -EBUSY;
+	}
+
+	if (rg->size != size || rg->type != mi->type) {
+		r = omapfb_realloc_fbmem(fbi, size, mi->type);
+		if (r) {
+			dev_err(fbdev->dev, "realloc fbmem failed\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+static int omapfb_query_mem(struct fb_info *fbi, struct omapfb_mem_info *mi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_mem_region *rg;
+
+	rg = &ofbi->region;
+	memset(mi, 0, sizeof(*mi));
+
+	mi->size = rg->size;
+	mi->type = rg->type;
+
+	return 0;
+}
+
+static int omapfb_update_window_nolock(struct fb_info *fbi,
+		u32 x, u32 y, u32 w, u32 h)
+{
+	struct omap_dss_device *display = fb2display(fbi);
+	u16 dw, dh;
+
+	if (!display)
+		return 0;
+
+	if (w == 0 || h == 0)
+		return 0;
+
+	display->get_resolution(display, &dw, &dh);
+
+	if (x + w > dw || y + h > dh)
+		return -EINVAL;
+
+	return display->update(display, x, y, w, h);
+}
+
+/* This function is exported for SGX driver use */
+int omapfb_update_window(struct fb_info *fbi,
+		u32 x, u32 y, u32 w, u32 h)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	int r;
+
+	omapfb_lock(fbdev);
+	lock_fb_info(fbi);
+
+	r = omapfb_update_window_nolock(fbi, x, y, w, h);
+
+	unlock_fb_info(fbi);
+	omapfb_unlock(fbdev);
+
+	return r;
+}
+EXPORT_SYMBOL(omapfb_update_window);
+
+static int omapfb_set_update_mode(struct fb_info *fbi,
+				   enum omapfb_update_mode mode)
+{
+	struct omap_dss_device *display = fb2display(fbi);
+	enum omap_dss_update_mode um;
+	int r;
+
+	if (!display || !display->set_update_mode)
+		return -EINVAL;
+
+	switch (mode) {
+	case OMAPFB_UPDATE_DISABLED:
+		um = OMAP_DSS_UPDATE_DISABLED;
+		break;
+
+	case OMAPFB_AUTO_UPDATE:
+		um = OMAP_DSS_UPDATE_AUTO;
+		break;
+
+	case OMAPFB_MANUAL_UPDATE:
+		um = OMAP_DSS_UPDATE_MANUAL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	r = display->set_update_mode(display, um);
+
+	return r;
+}
+
+static int omapfb_get_update_mode(struct fb_info *fbi,
+		enum omapfb_update_mode *mode)
+{
+	struct omap_dss_device *display = fb2display(fbi);
+	enum omap_dss_update_mode m;
+
+	if (!display || !display->get_update_mode)
+		return -EINVAL;
+
+	m = display->get_update_mode(display);
+
+	switch (m) {
+	case OMAP_DSS_UPDATE_DISABLED:
+		*mode = OMAPFB_UPDATE_DISABLED;
+		break;
+	case OMAP_DSS_UPDATE_AUTO:
+		*mode = OMAPFB_AUTO_UPDATE;
+		break;
+	case OMAP_DSS_UPDATE_MANUAL:
+		*mode = OMAPFB_MANUAL_UPDATE;
+		break;
+	default:
+		BUG();
+	}
+
+	return 0;
+}
+
+/* XXX this color key handling is a hack... */
+static struct omapfb_color_key omapfb_color_keys[2];
+
+static int _omapfb_set_color_key(struct omap_overlay_manager *mgr,
+		struct omapfb_color_key *ck)
+{
+	struct omap_overlay_manager_info info;
+	enum omap_dss_trans_key_type kt;
+	int r;
+
+	mgr->get_manager_info(mgr, &info);
+
+	if (ck->key_type == OMAPFB_COLOR_KEY_DISABLED) {
+		info.trans_enabled = false;
+		omapfb_color_keys[mgr->id] = *ck;
+
+		r = mgr->set_manager_info(mgr, &info);
+		if (r)
+			return r;
+
+		r = mgr->apply(mgr);
+
+		return r;
+	}
+
+	switch (ck->key_type) {
+	case OMAPFB_COLOR_KEY_GFX_DST:
+		kt = OMAP_DSS_COLOR_KEY_GFX_DST;
+		break;
+	case OMAPFB_COLOR_KEY_VID_SRC:
+		kt = OMAP_DSS_COLOR_KEY_VID_SRC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	info.default_color = ck->background;
+	info.trans_key = ck->trans_key;
+	info.trans_key_type = kt;
+	info.trans_enabled = true;
+
+	omapfb_color_keys[mgr->id] = *ck;
+
+	r = mgr->set_manager_info(mgr, &info);
+	if (r)
+		return r;
+
+	r = mgr->apply(mgr);
+
+	return r;
+}
+
+static int omapfb_set_color_key(struct fb_info *fbi,
+		struct omapfb_color_key *ck)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	int r;
+	int i;
+	struct omap_overlay_manager *mgr = NULL;
+
+	omapfb_lock(fbdev);
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ofbi->overlays[i]->manager) {
+			mgr = ofbi->overlays[i]->manager;
+			break;
+		}
+	}
+
+	if (!mgr) {
+		r = -EINVAL;
+		goto err;
+	}
+
+	r = _omapfb_set_color_key(mgr, ck);
+err:
+	omapfb_unlock(fbdev);
+
+	return r;
+}
+
+static int omapfb_get_color_key(struct fb_info *fbi,
+		struct omapfb_color_key *ck)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_overlay_manager *mgr = NULL;
+	int r = 0;
+	int i;
+
+	omapfb_lock(fbdev);
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ofbi->overlays[i]->manager) {
+			mgr = ofbi->overlays[i]->manager;
+			break;
+		}
+	}
+
+	if (!mgr) {
+		r = -EINVAL;
+		goto err;
+	}
+
+	*ck = omapfb_color_keys[mgr->id];
+err:
+	omapfb_unlock(fbdev);
+
+	return r;
+}
+
+static int omapfb_memory_read(struct fb_info *fbi,
+		struct omapfb_memory_read *mr)
+{
+	struct omap_dss_device *display = fb2display(fbi);
+	void *buf;
+	int r;
+
+	if (!display || !display->memory_read)
+		return -ENOENT;
+
+	if (!access_ok(VERIFY_WRITE, mr->buffer, mr->buffer_size))
+		return -EFAULT;
+
+	if (mr->w * mr->h * 3 > mr->buffer_size)
+		return -EINVAL;
+
+	buf = vmalloc(mr->buffer_size);
+	if (!buf) {
+		DBG("vmalloc failed\n");
+		return -ENOMEM;
+	}
+
+	r = display->memory_read(display, buf, mr->buffer_size,
+			mr->x, mr->y, mr->w, mr->h);
+
+	if (r > 0) {
+		if (copy_to_user(mr->buffer, buf, mr->buffer_size))
+			r = -EFAULT;
+	}
+
+	vfree(buf);
+
+	return r;
+}
+
+static int omapfb_get_ovl_colormode(struct omapfb2_device *fbdev,
+			     struct omapfb_ovl_colormode *mode)
+{
+	int ovl_idx = mode->overlay_idx;
+	int mode_idx = mode->mode_idx;
+	struct omap_overlay *ovl;
+	enum omap_color_mode supported_modes;
+	struct fb_var_screeninfo var;
+	int i;
+
+	if (ovl_idx >= fbdev->num_overlays)
+		return -ENODEV;
+	ovl = fbdev->overlays[ovl_idx];
+	supported_modes = ovl->supported_modes;
+
+	mode_idx = mode->mode_idx;
+
+	for (i = 0; i < sizeof(supported_modes) * 8; i++) {
+		if (!(supported_modes & (1 << i)))
+			continue;
+		/*
+		 * It's possible that the FB doesn't support a mode
+		 * that is supported by the overlay, so call the
+		 * following here.
+		 */
+		if (dss_mode_to_fb_mode(1 << i, &var) < 0)
+			continue;
+
+		mode_idx--;
+		if (mode_idx < 0)
+			break;
+	}
+
+	if (i == sizeof(supported_modes) * 8)
+		return -ENOENT;
+
+	mode->bits_per_pixel = var.bits_per_pixel;
+	mode->nonstd = var.nonstd;
+	mode->red = var.red;
+	mode->green = var.green;
+	mode->blue = var.blue;
+	mode->transp = var.transp;
+
+	return 0;
+}
+
+static int omapfb_wait_for_go(struct fb_info *fbi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	int r = 0;
+	int i;
+
+	for (i = 0; i < ofbi->num_overlays; ++i) {
+		struct omap_overlay *ovl = ofbi->overlays[i];
+		r = ovl->wait_for_go(ovl);
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_dss_device *display = fb2display(fbi);
+
+	union {
+		struct omapfb_update_window_old	uwnd_o;
+		struct omapfb_update_window	uwnd;
+		struct omapfb_plane_info	plane_info;
+		struct omapfb_caps		caps;
+		struct omapfb_mem_info          mem_info;
+		struct omapfb_color_key		color_key;
+		struct omapfb_ovl_colormode	ovl_colormode;
+		enum omapfb_update_mode		update_mode;
+		int test_num;
+		struct omapfb_memory_read	memory_read;
+		struct omapfb_vram_info		vram_info;
+		struct omapfb_tearsync_info	tearsync_info;
+	} p;
+
+	int r = 0;
+
+	switch (cmd) {
+	case OMAPFB_SYNC_GFX:
+		DBG("ioctl SYNC_GFX\n");
+		if (!display || !display->sync) {
+			/* DSS1 never returns an error here, so we neither */
+			/*r = -EINVAL;*/
+			break;
+		}
+
+		r = display->sync(display);
+		break;
+
+	case OMAPFB_UPDATE_WINDOW_OLD:
+		DBG("ioctl UPDATE_WINDOW_OLD\n");
+		if (!display || !display->update) {
+			r = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&p.uwnd_o,
+					(void __user *)arg,
+					sizeof(p.uwnd_o))) {
+			r = -EFAULT;
+			break;
+		}
+
+		r = omapfb_update_window_nolock(fbi, p.uwnd_o.x, p.uwnd_o.y,
+				p.uwnd_o.width, p.uwnd_o.height);
+		break;
+
+	case OMAPFB_UPDATE_WINDOW:
+		DBG("ioctl UPDATE_WINDOW\n");
+		if (!display || !display->update) {
+			r = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&p.uwnd, (void __user *)arg,
+					sizeof(p.uwnd))) {
+			r = -EFAULT;
+			break;
+		}
+
+		r = omapfb_update_window_nolock(fbi, p.uwnd.x, p.uwnd.y,
+				p.uwnd.width, p.uwnd.height);
+		break;
+
+	case OMAPFB_SETUP_PLANE:
+		DBG("ioctl SETUP_PLANE\n");
+		if (copy_from_user(&p.plane_info, (void __user *)arg,
+					sizeof(p.plane_info)))
+			r = -EFAULT;
+		else
+			r = omapfb_setup_plane(fbi, &p.plane_info);
+		break;
+
+	case OMAPFB_QUERY_PLANE:
+		DBG("ioctl QUERY_PLANE\n");
+		r = omapfb_query_plane(fbi, &p.plane_info);
+		if (r < 0)
+			break;
+		if (copy_to_user((void __user *)arg, &p.plane_info,
+					sizeof(p.plane_info)))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_SETUP_MEM:
+		DBG("ioctl SETUP_MEM\n");
+		if (copy_from_user(&p.mem_info, (void __user *)arg,
+					sizeof(p.mem_info)))
+			r = -EFAULT;
+		else
+			r = omapfb_setup_mem(fbi, &p.mem_info);
+		break;
+
+	case OMAPFB_QUERY_MEM:
+		DBG("ioctl QUERY_MEM\n");
+		r = omapfb_query_mem(fbi, &p.mem_info);
+		if (r < 0)
+			break;
+		if (copy_to_user((void __user *)arg, &p.mem_info,
+					sizeof(p.mem_info)))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_GET_CAPS:
+		DBG("ioctl GET_CAPS\n");
+		if (!display) {
+			r = -EINVAL;
+			break;
+		}
+
+		memset(&p.caps, 0, sizeof(p.caps));
+		if (display->caps & OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE)
+			p.caps.ctrl |= OMAPFB_CAPS_MANUAL_UPDATE;
+		if (display->caps & OMAP_DSS_DISPLAY_CAP_TEAR_ELIM)
+			p.caps.ctrl |= OMAPFB_CAPS_TEARSYNC;
+
+		if (copy_to_user((void __user *)arg, &p.caps, sizeof(p.caps)))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_GET_OVERLAY_COLORMODE:
+		DBG("ioctl GET_OVERLAY_COLORMODE\n");
+		if (copy_from_user(&p.ovl_colormode, (void __user *)arg,
+				   sizeof(p.ovl_colormode))) {
+			r = -EFAULT;
+			break;
+		}
+		r = omapfb_get_ovl_colormode(fbdev, &p.ovl_colormode);
+		if (r < 0)
+			break;
+		if (copy_to_user((void __user *)arg, &p.ovl_colormode,
+				 sizeof(p.ovl_colormode)))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_SET_UPDATE_MODE:
+		DBG("ioctl SET_UPDATE_MODE\n");
+		if (get_user(p.update_mode, (int __user *)arg))
+			r = -EFAULT;
+		else
+			r = omapfb_set_update_mode(fbi, p.update_mode);
+		break;
+
+	case OMAPFB_GET_UPDATE_MODE:
+		DBG("ioctl GET_UPDATE_MODE\n");
+		r = omapfb_get_update_mode(fbi, &p.update_mode);
+		if (r)
+			break;
+		if (put_user(p.update_mode,
+					(enum omapfb_update_mode __user *)arg))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_SET_COLOR_KEY:
+		DBG("ioctl SET_COLOR_KEY\n");
+		if (copy_from_user(&p.color_key, (void __user *)arg,
+				   sizeof(p.color_key)))
+			r = -EFAULT;
+		else
+			r = omapfb_set_color_key(fbi, &p.color_key);
+		break;
+
+	case OMAPFB_GET_COLOR_KEY:
+		DBG("ioctl GET_COLOR_KEY\n");
+		r = omapfb_get_color_key(fbi, &p.color_key);
+		if (r)
+			break;
+		if (copy_to_user((void __user *)arg, &p.color_key,
+				 sizeof(p.color_key)))
+			r = -EFAULT;
+		break;
+
+	case OMAPFB_WAITFORVSYNC:
+		DBG("ioctl WAITFORVSYNC\n");
+		if (!display) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = display->wait_vsync(display);
+		break;
+
+	case OMAPFB_WAITFORGO:
+		DBG("ioctl WAITFORGO\n");
+		if (!display) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = omapfb_wait_for_go(fbi);
+		break;
+
+	/* LCD and CTRL tests do the same thing for backward
+	 * compatibility */
+	case OMAPFB_LCD_TEST:
+		DBG("ioctl LCD_TEST\n");
+		if (get_user(p.test_num, (int __user *)arg)) {
+			r = -EFAULT;
+			break;
+		}
+		if (!display || !display->run_test) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = display->run_test(display, p.test_num);
+
+		break;
+
+	case OMAPFB_CTRL_TEST:
+		DBG("ioctl CTRL_TEST\n");
+		if (get_user(p.test_num, (int __user *)arg)) {
+			r = -EFAULT;
+			break;
+		}
+		if (!display || !display->run_test) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = display->run_test(display, p.test_num);
+
+		break;
+
+	case OMAPFB_MEMORY_READ:
+		DBG("ioctl MEMORY_READ\n");
+
+		if (copy_from_user(&p.memory_read, (void __user *)arg,
+					sizeof(p.memory_read))) {
+			r = -EFAULT;
+			break;
+		}
+
+		r = omapfb_memory_read(fbi, &p.memory_read);
+
+		break;
+
+	case OMAPFB_GET_VRAM_INFO: {
+		unsigned long vram, free, largest;
+
+		DBG("ioctl GET_VRAM_INFO\n");
+
+		omap_vram_get_info(&vram, &free, &largest);
+		p.vram_info.total = vram;
+		p.vram_info.free = free;
+		p.vram_info.largest_free_block = largest;
+
+		if (copy_to_user((void __user *)arg, &p.vram_info,
+					sizeof(p.vram_info)))
+			r = -EFAULT;
+		break;
+	}
+
+	case OMAPFB_SET_TEARSYNC: {
+		DBG("ioctl SET_TEARSYNC\n");
+
+		if (copy_from_user(&p.tearsync_info, (void __user *)arg,
+					sizeof(p.tearsync_info))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (!display->enable_te) {
+			r = -ENODEV;
+			break;
+		}
+
+		r = display->enable_te(display, !!p.tearsync_info.enabled);
+
+		break;
+	}
+
+	default:
+		dev_err(fbdev->dev, "Unknown ioctl 0x%x\n", cmd);
+		r = -EINVAL;
+	}
+
+	if (r < 0)
+		DBG("ioctl failed: %d\n", r);
+
+	return r;
+}
+
+
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
new file mode 100644
index 000000000000..ef299839858a
--- /dev/null
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -0,0 +1,2261 @@
+/*
+ * linux/drivers/video/omap2/omapfb-main.c
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
+ *
+ * Some code and ideas taken from drivers/video/omap/ driver
+ * by Imre Deak.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/omapfb.h>
+
+#include <plat/display.h>
+#include <plat/vram.h>
+#include <plat/vrfb.h>
+
+#include "omapfb.h"
+
+#define MODULE_NAME     "omapfb"
+
+#define OMAPFB_PLANE_XRES_MIN		8
+#define OMAPFB_PLANE_YRES_MIN		8
+
+static char *def_mode;
+static char *def_vram;
+static int def_vrfb;
+static int def_rotate;
+static int def_mirror;
+
+#ifdef DEBUG
+unsigned int omapfb_debug;
+module_param_named(debug, omapfb_debug, bool, 0644);
+static unsigned int omapfb_test_pattern;
+module_param_named(test, omapfb_test_pattern, bool, 0644);
+#endif
+
+static int omapfb_fb_init(struct omapfb2_device *fbdev, struct fb_info *fbi);
+
+#ifdef DEBUG
+static void draw_pixel(struct fb_info *fbi, int x, int y, unsigned color)
+{
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct fb_fix_screeninfo *fix = &fbi->fix;
+	void __iomem *addr = fbi->screen_base;
+	const unsigned bytespp = var->bits_per_pixel >> 3;
+	const unsigned line_len = fix->line_length / bytespp;
+
+	int r = (color >> 16) & 0xff;
+	int g = (color >> 8) & 0xff;
+	int b = (color >> 0) & 0xff;
+
+	if (var->bits_per_pixel == 16) {
+		u16 __iomem *p = (u16 __iomem *)addr;
+		p += y * line_len + x;
+
+		r = r * 32 / 256;
+		g = g * 64 / 256;
+		b = b * 32 / 256;
+
+		__raw_writew((r << 11) | (g << 5) | (b << 0), p);
+	} else if (var->bits_per_pixel == 24) {
+		u8 __iomem *p = (u8 __iomem *)addr;
+		p += (y * line_len + x) * 3;
+
+		__raw_writeb(b, p + 0);
+		__raw_writeb(g, p + 1);
+		__raw_writeb(r, p + 2);
+	} else if (var->bits_per_pixel == 32) {
+		u32 __iomem *p = (u32 __iomem *)addr;
+		p += y * line_len + x;
+		__raw_writel(color, p);
+	}
+}
+
+static void fill_fb(struct fb_info *fbi)
+{
+	struct fb_var_screeninfo *var = &fbi->var;
+	const short w = var->xres_virtual;
+	const short h = var->yres_virtual;
+	void __iomem *addr = fbi->screen_base;
+	int y, x;
+
+	if (!addr)
+		return;
+
+	DBG("fill_fb %dx%d, line_len %d bytes\n", w, h, fbi->fix.line_length);
+
+	for (y = 0; y < h; y++) {
+		for (x = 0; x < w; x++) {
+			if (x < 20 && y < 20)
+				draw_pixel(fbi, x, y, 0xffffff);
+			else if (x < 20 && (y > 20 && y < h - 20))
+				draw_pixel(fbi, x, y, 0xff);
+			else if (y < 20 && (x > 20 && x < w - 20))
+				draw_pixel(fbi, x, y, 0xff00);
+			else if (x > w - 20 && (y > 20 && y < h - 20))
+				draw_pixel(fbi, x, y, 0xff0000);
+			else if (y > h - 20 && (x > 20 && x < w - 20))
+				draw_pixel(fbi, x, y, 0xffff00);
+			else if (x == 20 || x == w - 20 ||
+					y == 20 || y == h - 20)
+				draw_pixel(fbi, x, y, 0xffffff);
+			else if (x == y || w - x == h - y)
+				draw_pixel(fbi, x, y, 0xff00ff);
+			else if (w - x == y || x == h - y)
+				draw_pixel(fbi, x, y, 0x00ffff);
+			else if (x > 20 && y > 20 && x < w - 20 && y < h - 20) {
+				int t = x * 3 / w;
+				unsigned r = 0, g = 0, b = 0;
+				unsigned c;
+				if (var->bits_per_pixel == 16) {
+					if (t == 0)
+						b = (y % 32) * 256 / 32;
+					else if (t == 1)
+						g = (y % 64) * 256 / 64;
+					else if (t == 2)
+						r = (y % 32) * 256 / 32;
+				} else {
+					if (t == 0)
+						b = (y % 256);
+					else if (t == 1)
+						g = (y % 256);
+					else if (t == 2)
+						r = (y % 256);
+				}
+				c = (r << 16) | (g << 8) | (b << 0);
+				draw_pixel(fbi, x, y, c);
+			} else {
+				draw_pixel(fbi, x, y, 0);
+			}
+		}
+	}
+}
+#endif
+
+static unsigned omapfb_get_vrfb_offset(struct omapfb_info *ofbi, int rot)
+{
+	struct vrfb *vrfb = &ofbi->region.vrfb;
+	unsigned offset;
+
+	switch (rot) {
+	case FB_ROTATE_UR:
+		offset = 0;
+		break;
+	case FB_ROTATE_CW:
+		offset = vrfb->yoffset;
+		break;
+	case FB_ROTATE_UD:
+		offset = vrfb->yoffset * OMAP_VRFB_LINE_LEN + vrfb->xoffset;
+		break;
+	case FB_ROTATE_CCW:
+		offset = vrfb->xoffset * OMAP_VRFB_LINE_LEN;
+		break;
+	default:
+		BUG();
+	}
+
+	offset *= vrfb->bytespp;
+
+	return offset;
+}
+
+static u32 omapfb_get_region_rot_paddr(struct omapfb_info *ofbi, int rot)
+{
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+		return ofbi->region.vrfb.paddr[rot]
+			+ omapfb_get_vrfb_offset(ofbi, rot);
+	} else {
+		return ofbi->region.paddr;
+	}
+}
+
+static u32 omapfb_get_region_paddr(struct omapfb_info *ofbi)
+{
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
+		return ofbi->region.vrfb.paddr[0];
+	else
+		return ofbi->region.paddr;
+}
+
+static void __iomem *omapfb_get_region_vaddr(struct omapfb_info *ofbi)
+{
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
+		return ofbi->region.vrfb.vaddr[0];
+	else
+		return ofbi->region.vaddr;
+}
+
+static struct omapfb_colormode omapfb_colormodes[] = {
+	{
+		.dssmode = OMAP_DSS_COLOR_UYVY,
+		.bits_per_pixel = 16,
+		.nonstd = OMAPFB_COLOR_YUV422,
+	}, {
+		.dssmode = OMAP_DSS_COLOR_YUV2,
+		.bits_per_pixel = 16,
+		.nonstd = OMAPFB_COLOR_YUY422,
+	}, {
+		.dssmode = OMAP_DSS_COLOR_ARGB16,
+		.bits_per_pixel = 16,
+		.red	= { .length = 4, .offset = 8, .msb_right = 0 },
+		.green	= { .length = 4, .offset = 4, .msb_right = 0 },
+		.blue	= { .length = 4, .offset = 0, .msb_right = 0 },
+		.transp	= { .length = 4, .offset = 12, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_RGB16,
+		.bits_per_pixel = 16,
+		.red	= { .length = 5, .offset = 11, .msb_right = 0 },
+		.green	= { .length = 6, .offset = 5, .msb_right = 0 },
+		.blue	= { .length = 5, .offset = 0, .msb_right = 0 },
+		.transp	= { .length = 0, .offset = 0, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_RGB24P,
+		.bits_per_pixel = 24,
+		.red	= { .length = 8, .offset = 16, .msb_right = 0 },
+		.green	= { .length = 8, .offset = 8, .msb_right = 0 },
+		.blue	= { .length = 8, .offset = 0, .msb_right = 0 },
+		.transp	= { .length = 0, .offset = 0, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_RGB24U,
+		.bits_per_pixel = 32,
+		.red	= { .length = 8, .offset = 16, .msb_right = 0 },
+		.green	= { .length = 8, .offset = 8, .msb_right = 0 },
+		.blue	= { .length = 8, .offset = 0, .msb_right = 0 },
+		.transp	= { .length = 0, .offset = 0, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_ARGB32,
+		.bits_per_pixel = 32,
+		.red	= { .length = 8, .offset = 16, .msb_right = 0 },
+		.green	= { .length = 8, .offset = 8, .msb_right = 0 },
+		.blue	= { .length = 8, .offset = 0, .msb_right = 0 },
+		.transp	= { .length = 8, .offset = 24, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_RGBA32,
+		.bits_per_pixel = 32,
+		.red	= { .length = 8, .offset = 24, .msb_right = 0 },
+		.green	= { .length = 8, .offset = 16, .msb_right = 0 },
+		.blue	= { .length = 8, .offset = 8, .msb_right = 0 },
+		.transp	= { .length = 8, .offset = 0, .msb_right = 0 },
+	}, {
+		.dssmode = OMAP_DSS_COLOR_RGBX32,
+		.bits_per_pixel = 32,
+		.red	= { .length = 8, .offset = 24, .msb_right = 0 },
+		.green	= { .length = 8, .offset = 16, .msb_right = 0 },
+		.blue	= { .length = 8, .offset = 8, .msb_right = 0 },
+		.transp	= { .length = 0, .offset = 0, .msb_right = 0 },
+	},
+};
+
+static bool cmp_var_to_colormode(struct fb_var_screeninfo *var,
+		struct omapfb_colormode *color)
+{
+	bool cmp_component(struct fb_bitfield *f1, struct fb_bitfield *f2)
+	{
+		return f1->length == f2->length &&
+			f1->offset == f2->offset &&
+			f1->msb_right == f2->msb_right;
+	}
+
+	if (var->bits_per_pixel == 0 ||
+			var->red.length == 0 ||
+			var->blue.length == 0 ||
+			var->green.length == 0)
+		return 0;
+
+	return var->bits_per_pixel == color->bits_per_pixel &&
+		cmp_component(&var->red, &color->red) &&
+		cmp_component(&var->green, &color->green) &&
+		cmp_component(&var->blue, &color->blue) &&
+		cmp_component(&var->transp, &color->transp);
+}
+
+static void assign_colormode_to_var(struct fb_var_screeninfo *var,
+		struct omapfb_colormode *color)
+{
+	var->bits_per_pixel = color->bits_per_pixel;
+	var->nonstd = color->nonstd;
+	var->red = color->red;
+	var->green = color->green;
+	var->blue = color->blue;
+	var->transp = color->transp;
+}
+
+static int fb_mode_to_dss_mode(struct fb_var_screeninfo *var,
+		enum omap_color_mode *mode)
+{
+	enum omap_color_mode dssmode;
+	int i;
+
+	/* first match with nonstd field */
+	if (var->nonstd) {
+		for (i = 0; i < ARRAY_SIZE(omapfb_colormodes); ++i) {
+			struct omapfb_colormode *m = &omapfb_colormodes[i];
+			if (var->nonstd == m->nonstd) {
+				assign_colormode_to_var(var, m);
+				*mode = m->dssmode;
+				return 0;
+			}
+		}
+
+		return -EINVAL;
+	}
+
+	/* then try exact match of bpp and colors */
+	for (i = 0; i < ARRAY_SIZE(omapfb_colormodes); ++i) {
+		struct omapfb_colormode *m = &omapfb_colormodes[i];
+		if (cmp_var_to_colormode(var, m)) {
+			assign_colormode_to_var(var, m);
+			*mode = m->dssmode;
+			return 0;
+		}
+	}
+
+	/* match with bpp if user has not filled color fields
+	 * properly */
+	switch (var->bits_per_pixel) {
+	case 1:
+		dssmode = OMAP_DSS_COLOR_CLUT1;
+		break;
+	case 2:
+		dssmode = OMAP_DSS_COLOR_CLUT2;
+		break;
+	case 4:
+		dssmode = OMAP_DSS_COLOR_CLUT4;
+		break;
+	case 8:
+		dssmode = OMAP_DSS_COLOR_CLUT8;
+		break;
+	case 12:
+		dssmode = OMAP_DSS_COLOR_RGB12U;
+		break;
+	case 16:
+		dssmode = OMAP_DSS_COLOR_RGB16;
+		break;
+	case 24:
+		dssmode = OMAP_DSS_COLOR_RGB24P;
+		break;
+	case 32:
+		dssmode = OMAP_DSS_COLOR_RGB24U;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(omapfb_colormodes); ++i) {
+		struct omapfb_colormode *m = &omapfb_colormodes[i];
+		if (dssmode == m->dssmode) {
+			assign_colormode_to_var(var, m);
+			*mode = m->dssmode;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int check_fb_res_bounds(struct fb_var_screeninfo *var)
+{
+	int xres_min = OMAPFB_PLANE_XRES_MIN;
+	int xres_max = 2048;
+	int yres_min = OMAPFB_PLANE_YRES_MIN;
+	int yres_max = 2048;
+
+	/* XXX: some applications seem to set virtual res to 0. */
+	if (var->xres_virtual == 0)
+		var->xres_virtual = var->xres;
+
+	if (var->yres_virtual == 0)
+		var->yres_virtual = var->yres;
+
+	if (var->xres_virtual < xres_min || var->yres_virtual < yres_min)
+		return -EINVAL;
+
+	if (var->xres < xres_min)
+		var->xres = xres_min;
+	if (var->yres < yres_min)
+		var->yres = yres_min;
+	if (var->xres > xres_max)
+		var->xres = xres_max;
+	if (var->yres > yres_max)
+		var->yres = yres_max;
+
+	if (var->xres > var->xres_virtual)
+		var->xres = var->xres_virtual;
+	if (var->yres > var->yres_virtual)
+		var->yres = var->yres_virtual;
+
+	return 0;
+}
+
+static void shrink_height(unsigned long max_frame_size,
+		struct fb_var_screeninfo *var)
+{
+	DBG("can't fit FB into memory, reducing y\n");
+	var->yres_virtual = max_frame_size /
+		(var->xres_virtual * var->bits_per_pixel >> 3);
+
+	if (var->yres_virtual < OMAPFB_PLANE_YRES_MIN)
+		var->yres_virtual = OMAPFB_PLANE_YRES_MIN;
+
+	if (var->yres > var->yres_virtual)
+		var->yres = var->yres_virtual;
+}
+
+static void shrink_width(unsigned long max_frame_size,
+		struct fb_var_screeninfo *var)
+{
+	DBG("can't fit FB into memory, reducing x\n");
+	var->xres_virtual = max_frame_size / var->yres_virtual /
+		(var->bits_per_pixel >> 3);
+
+	if (var->xres_virtual < OMAPFB_PLANE_XRES_MIN)
+		var->xres_virtual = OMAPFB_PLANE_XRES_MIN;
+
+	if (var->xres > var->xres_virtual)
+		var->xres = var->xres_virtual;
+}
+
+static int check_vrfb_fb_size(unsigned long region_size,
+		const struct fb_var_screeninfo *var)
+{
+	unsigned long min_phys_size = omap_vrfb_min_phys_size(var->xres_virtual,
+		var->yres_virtual, var->bits_per_pixel >> 3);
+
+	return min_phys_size > region_size ? -EINVAL : 0;
+}
+
+static int check_fb_size(const struct omapfb_info *ofbi,
+		struct fb_var_screeninfo *var)
+{
+	unsigned long max_frame_size = ofbi->region.size;
+	int bytespp = var->bits_per_pixel >> 3;
+	unsigned long line_size = var->xres_virtual * bytespp;
+
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+		/* One needs to check for both VRFB and OMAPFB limitations. */
+		if (check_vrfb_fb_size(max_frame_size, var))
+			shrink_height(omap_vrfb_max_height(
+				max_frame_size, var->xres_virtual, bytespp) *
+				line_size, var);
+
+		if (check_vrfb_fb_size(max_frame_size, var)) {
+			DBG("cannot fit FB to memory\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	DBG("max frame size %lu, line size %lu\n", max_frame_size, line_size);
+
+	if (line_size * var->yres_virtual > max_frame_size)
+		shrink_height(max_frame_size, var);
+
+	if (line_size * var->yres_virtual > max_frame_size) {
+		shrink_width(max_frame_size, var);
+		line_size = var->xres_virtual * bytespp;
+	}
+
+	if (line_size * var->yres_virtual > max_frame_size) {
+		DBG("cannot fit FB to memory\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Consider if VRFB assisted rotation is in use and if the virtual space for
+ * the zero degree view needs to be mapped. The need for mapping also acts as
+ * the trigger for setting up the hardware on the context in question. This
+ * ensures that one does not attempt to access the virtual view before the
+ * hardware is serving the address translations.
+ */
+static int setup_vrfb_rotation(struct fb_info *fbi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_mem_region *rg = &ofbi->region;
+	struct vrfb *vrfb = &rg->vrfb;
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct fb_fix_screeninfo *fix = &fbi->fix;
+	unsigned bytespp;
+	bool yuv_mode;
+	enum omap_color_mode mode;
+	int r;
+	bool reconf;
+
+	if (!rg->size || ofbi->rotation_type != OMAP_DSS_ROT_VRFB)
+		return 0;
+
+	DBG("setup_vrfb_rotation\n");
+
+	r = fb_mode_to_dss_mode(var, &mode);
+	if (r)
+		return r;
+
+	bytespp = var->bits_per_pixel >> 3;
+
+	yuv_mode = mode == OMAP_DSS_COLOR_YUV2 || mode == OMAP_DSS_COLOR_UYVY;
+
+	/* We need to reconfigure VRFB if the resolution changes, if yuv mode
+	 * is enabled/disabled, or if bytes per pixel changes */
+
+	/* XXX we shouldn't allow this when framebuffer is mmapped */
+
+	reconf = false;
+
+	if (yuv_mode != vrfb->yuv_mode)
+		reconf = true;
+	else if (bytespp != vrfb->bytespp)
+		reconf = true;
+	else if (vrfb->xres != var->xres_virtual ||
+			vrfb->yres != var->yres_virtual)
+		reconf = true;
+
+	if (vrfb->vaddr[0] && reconf) {
+		fbi->screen_base = NULL;
+		fix->smem_start = 0;
+		fix->smem_len = 0;
+		iounmap(vrfb->vaddr[0]);
+		vrfb->vaddr[0] = NULL;
+		DBG("setup_vrfb_rotation: reset fb\n");
+	}
+
+	if (vrfb->vaddr[0])
+		return 0;
+
+	omap_vrfb_setup(&rg->vrfb, rg->paddr,
+			var->xres_virtual,
+			var->yres_virtual,
+			bytespp, yuv_mode);
+
+	/* Now one can ioremap the 0 angle view */
+	r = omap_vrfb_map_angle(vrfb, var->yres_virtual, 0);
+	if (r)
+		return r;
+
+	/* used by open/write in fbmem.c */
+	fbi->screen_base = ofbi->region.vrfb.vaddr[0];
+
+	fix->smem_start = ofbi->region.vrfb.paddr[0];
+
+	switch (var->nonstd) {
+	case OMAPFB_COLOR_YUV422:
+	case OMAPFB_COLOR_YUY422:
+		fix->line_length =
+			(OMAP_VRFB_LINE_LEN * var->bits_per_pixel) >> 2;
+		break;
+	default:
+		fix->line_length =
+			(OMAP_VRFB_LINE_LEN * var->bits_per_pixel) >> 3;
+		break;
+	}
+
+	fix->smem_len = var->yres_virtual * fix->line_length;
+
+	return 0;
+}
+
+int dss_mode_to_fb_mode(enum omap_color_mode dssmode,
+			struct fb_var_screeninfo *var)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(omapfb_colormodes); ++i) {
+		struct omapfb_colormode *mode = &omapfb_colormodes[i];
+		if (dssmode == mode->dssmode) {
+			assign_colormode_to_var(var, mode);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+void set_fb_fix(struct fb_info *fbi)
+{
+	struct fb_fix_screeninfo *fix = &fbi->fix;
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_mem_region *rg = &ofbi->region;
+
+	DBG("set_fb_fix\n");
+
+	/* used by open/write in fbmem.c */
+	fbi->screen_base = (char __iomem *)omapfb_get_region_vaddr(ofbi);
+
+	/* used by mmap in fbmem.c */
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+		switch (var->nonstd) {
+		case OMAPFB_COLOR_YUV422:
+		case OMAPFB_COLOR_YUY422:
+			fix->line_length =
+				(OMAP_VRFB_LINE_LEN * var->bits_per_pixel) >> 2;
+			break;
+		default:
+			fix->line_length =
+				(OMAP_VRFB_LINE_LEN * var->bits_per_pixel) >> 3;
+			break;
+		}
+
+		fix->smem_len = var->yres_virtual * fix->line_length;
+	} else {
+		fix->line_length =
+			(var->xres_virtual * var->bits_per_pixel) >> 3;
+		fix->smem_len = rg->size;
+	}
+
+	fix->smem_start = omapfb_get_region_paddr(ofbi);
+
+	fix->type = FB_TYPE_PACKED_PIXELS;
+
+	if (var->nonstd)
+		fix->visual = FB_VISUAL_PSEUDOCOLOR;
+	else {
+		switch (var->bits_per_pixel) {
+		case 32:
+		case 24:
+		case 16:
+		case 12:
+			fix->visual = FB_VISUAL_TRUECOLOR;
+			/* 12bpp is stored in 16 bits */
+			break;
+		case 1:
+		case 2:
+		case 4:
+		case 8:
+			fix->visual = FB_VISUAL_PSEUDOCOLOR;
+			break;
+		}
+	}
+
+	fix->accel = FB_ACCEL_NONE;
+
+	fix->xpanstep = 1;
+	fix->ypanstep = 1;
+}
+
+/* check new var and possibly modify it to be ok */
+int check_fb_var(struct fb_info *fbi, struct fb_var_screeninfo *var)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omap_dss_device *display = fb2display(fbi);
+	enum omap_color_mode mode = 0;
+	int i;
+	int r;
+
+	DBG("check_fb_var %d\n", ofbi->id);
+
+	if (ofbi->region.size == 0)
+		return 0;
+
+	r = fb_mode_to_dss_mode(var, &mode);
+	if (r) {
+		DBG("cannot convert var to omap dss mode\n");
+		return r;
+	}
+
+	for (i = 0; i < ofbi->num_overlays; ++i) {
+		if ((ofbi->overlays[i]->supported_modes & mode) == 0) {
+			DBG("invalid mode\n");
+			return -EINVAL;
+		}
+	}
+
+	if (var->rotate < 0 || var->rotate > 3)
+		return -EINVAL;
+
+	if (check_fb_res_bounds(var))
+		return -EINVAL;
+
+	if (check_fb_size(ofbi, var))
+		return -EINVAL;
+
+	if (var->xres + var->xoffset > var->xres_virtual)
+		var->xoffset = var->xres_virtual - var->xres;
+	if (var->yres + var->yoffset > var->yres_virtual)
+		var->yoffset = var->yres_virtual - var->yres;
+
+	DBG("xres = %d, yres = %d, vxres = %d, vyres = %d\n",
+			var->xres, var->yres,
+			var->xres_virtual, var->yres_virtual);
+
+	var->height             = -1;
+	var->width              = -1;
+	var->grayscale          = 0;
+
+	if (display && display->get_timings) {
+		struct omap_video_timings timings;
+		display->get_timings(display, &timings);
+
+		/* pixclock in ps, the rest in pixclock */
+		var->pixclock = timings.pixel_clock != 0 ?
+			KHZ2PICOS(timings.pixel_clock) :
+			0;
+		var->left_margin = timings.hfp;
+		var->right_margin = timings.hbp;
+		var->upper_margin = timings.vfp;
+		var->lower_margin = timings.vbp;
+		var->hsync_len = timings.hsw;
+		var->vsync_len = timings.vsw;
+	} else {
+		var->pixclock = 0;
+		var->left_margin = 0;
+		var->right_margin = 0;
+		var->upper_margin = 0;
+		var->lower_margin = 0;
+		var->hsync_len = 0;
+		var->vsync_len = 0;
+	}
+
+	/* TODO: get these from panel->config */
+	var->vmode              = FB_VMODE_NONINTERLACED;
+	var->sync               = 0;
+
+	return 0;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * fbdev framework callbacks
+ * ---------------------------------------------------------------------------
+ */
+static int omapfb_open(struct fb_info *fbi, int user)
+{
+	return 0;
+}
+
+static int omapfb_release(struct fb_info *fbi, int user)
+{
+#if 0
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_dss_device *display = fb2display(fbi);
+
+	DBG("Closing fb with plane index %d\n", ofbi->id);
+
+	omapfb_lock(fbdev);
+
+	if (display && display->get_update_mode && display->update) {
+		/* XXX this update should be removed, I think. But it's
+		 * good for debugging */
+		if (display->get_update_mode(display) ==
+				OMAP_DSS_UPDATE_MANUAL) {
+			u16 w, h;
+
+			if (display->sync)
+				display->sync(display);
+
+			display->get_resolution(display, &w, &h);
+			display->update(display, 0, 0, w, h);
+		}
+	}
+
+	if (display && display->sync)
+		display->sync(display);
+
+	omapfb_unlock(fbdev);
+#endif
+	return 0;
+}
+
+static unsigned calc_rotation_offset_dma(struct fb_var_screeninfo *var,
+		struct fb_fix_screeninfo *fix, int rotation)
+{
+	unsigned offset;
+
+	offset = var->yoffset * fix->line_length +
+		var->xoffset * (var->bits_per_pixel >> 3);
+
+	return offset;
+}
+
+static unsigned calc_rotation_offset_vrfb(struct fb_var_screeninfo *var,
+		struct fb_fix_screeninfo *fix, int rotation)
+{
+	unsigned offset;
+
+	if (rotation == FB_ROTATE_UD)
+		offset = (var->yres_virtual - var->yres) *
+			fix->line_length;
+	else if (rotation == FB_ROTATE_CW)
+		offset = (var->yres_virtual - var->yres) *
+			(var->bits_per_pixel >> 3);
+	else
+		offset = 0;
+
+	if (rotation == FB_ROTATE_UR)
+		offset += var->yoffset * fix->line_length +
+			var->xoffset * (var->bits_per_pixel >> 3);
+	else if (rotation == FB_ROTATE_UD)
+		offset -= var->yoffset * fix->line_length +
+			var->xoffset * (var->bits_per_pixel >> 3);
+	else if (rotation == FB_ROTATE_CW)
+		offset -= var->xoffset * fix->line_length +
+			var->yoffset * (var->bits_per_pixel >> 3);
+	else if (rotation == FB_ROTATE_CCW)
+		offset += var->xoffset * fix->line_length +
+			var->yoffset * (var->bits_per_pixel >> 3);
+
+	return offset;
+}
+
+
+/* setup overlay according to the fb */
+static int omapfb_setup_overlay(struct fb_info *fbi, struct omap_overlay *ovl,
+		u16 posx, u16 posy, u16 outw, u16 outh)
+{
+	int r = 0;
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct fb_fix_screeninfo *fix = &fbi->fix;
+	enum omap_color_mode mode = 0;
+	int offset;
+	u32 data_start_p;
+	void __iomem *data_start_v;
+	struct omap_overlay_info info;
+	int xres, yres;
+	int screen_width;
+	int mirror;
+	int rotation = var->rotate;
+	int i;
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ovl != ofbi->overlays[i])
+			continue;
+
+		rotation = (rotation + ofbi->rotation[i]) % 4;
+		break;
+	}
+
+	DBG("setup_overlay %d, posx %d, posy %d, outw %d, outh %d\n", ofbi->id,
+			posx, posy, outw, outh);
+
+	if (rotation == FB_ROTATE_CW || rotation == FB_ROTATE_CCW) {
+		xres = var->yres;
+		yres = var->xres;
+	} else {
+		xres = var->xres;
+		yres = var->yres;
+	}
+
+
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+		data_start_p = omapfb_get_region_rot_paddr(ofbi, rotation);
+		data_start_v = NULL;
+	} else {
+		data_start_p = omapfb_get_region_paddr(ofbi);
+		data_start_v = omapfb_get_region_vaddr(ofbi);
+	}
+
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
+		offset = calc_rotation_offset_vrfb(var, fix, rotation);
+	else
+		offset = calc_rotation_offset_dma(var, fix, rotation);
+
+	data_start_p += offset;
+	data_start_v += offset;
+
+	if (offset)
+		DBG("offset %d, %d = %d\n",
+				var->xoffset, var->yoffset, offset);
+
+	DBG("paddr %x, vaddr %p\n", data_start_p, data_start_v);
+
+	r = fb_mode_to_dss_mode(var, &mode);
+	if (r) {
+		DBG("fb_mode_to_dss_mode failed");
+		goto err;
+	}
+
+	switch (var->nonstd) {
+	case OMAPFB_COLOR_YUV422:
+	case OMAPFB_COLOR_YUY422:
+		if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+			screen_width = fix->line_length
+				/ (var->bits_per_pixel >> 2);
+			break;
+		}
+	default:
+		screen_width = fix->line_length / (var->bits_per_pixel >> 3);
+		break;
+	}
+
+	ovl->get_overlay_info(ovl, &info);
+
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB)
+		mirror = 0;
+	else
+		mirror = ofbi->mirror;
+
+	info.paddr = data_start_p;
+	info.vaddr = data_start_v;
+	info.screen_width = screen_width;
+	info.width = xres;
+	info.height = yres;
+	info.color_mode = mode;
+	info.rotation_type = ofbi->rotation_type;
+	info.rotation = rotation;
+	info.mirror = mirror;
+
+	info.pos_x = posx;
+	info.pos_y = posy;
+	info.out_width = outw;
+	info.out_height = outh;
+
+	r = ovl->set_overlay_info(ovl, &info);
+	if (r) {
+		DBG("ovl->setup_overlay_info failed\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	DBG("setup_overlay failed\n");
+	return r;
+}
+
+/* apply var to the overlay */
+int omapfb_apply_changes(struct fb_info *fbi, int init)
+{
+	int r = 0;
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct omap_overlay *ovl;
+	u16 posx, posy;
+	u16 outw, outh;
+	int i;
+
+#ifdef DEBUG
+	if (omapfb_test_pattern)
+		fill_fb(fbi);
+#endif
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		ovl = ofbi->overlays[i];
+
+		DBG("apply_changes, fb %d, ovl %d\n", ofbi->id, ovl->id);
+
+		if (ofbi->region.size == 0) {
+			/* the fb is not available. disable the overlay */
+			omapfb_overlay_enable(ovl, 0);
+			if (!init && ovl->manager)
+				ovl->manager->apply(ovl->manager);
+			continue;
+		}
+
+		if (init || (ovl->caps & OMAP_DSS_OVL_CAP_SCALE) == 0) {
+			int rotation = (var->rotate + ofbi->rotation[i]) % 4;
+			if (rotation == FB_ROTATE_CW ||
+					rotation == FB_ROTATE_CCW) {
+				outw = var->yres;
+				outh = var->xres;
+			} else {
+				outw = var->xres;
+				outh = var->yres;
+			}
+		} else {
+			outw = ovl->info.out_width;
+			outh = ovl->info.out_height;
+		}
+
+		if (init) {
+			posx = 0;
+			posy = 0;
+		} else {
+			posx = ovl->info.pos_x;
+			posy = ovl->info.pos_y;
+		}
+
+		r = omapfb_setup_overlay(fbi, ovl, posx, posy, outw, outh);
+		if (r)
+			goto err;
+
+		if (!init && ovl->manager)
+			ovl->manager->apply(ovl->manager);
+	}
+	return 0;
+err:
+	DBG("apply_changes failed\n");
+	return r;
+}
+
+/* checks var and eventually tweaks it to something supported,
+ * DO NOT MODIFY PAR */
+static int omapfb_check_var(struct fb_var_screeninfo *var, struct fb_info *fbi)
+{
+	int r;
+
+	DBG("check_var(%d)\n", FB2OFB(fbi)->id);
+
+	r = check_fb_var(fbi, var);
+
+	return r;
+}
+
+/* set the video mode according to info->var */
+static int omapfb_set_par(struct fb_info *fbi)
+{
+	int r;
+
+	DBG("set_par(%d)\n", FB2OFB(fbi)->id);
+
+	set_fb_fix(fbi);
+
+	r = setup_vrfb_rotation(fbi);
+	if (r)
+		return r;
+
+	r = omapfb_apply_changes(fbi, 0);
+
+	return r;
+}
+
+static int omapfb_pan_display(struct fb_var_screeninfo *var,
+		struct fb_info *fbi)
+{
+	struct fb_var_screeninfo new_var;
+	int r;
+
+	DBG("pan_display(%d)\n", FB2OFB(fbi)->id);
+
+	if (var->xoffset == fbi->var.xoffset &&
+	    var->yoffset == fbi->var.yoffset)
+		return 0;
+
+	new_var = fbi->var;
+	new_var.xoffset = var->xoffset;
+	new_var.yoffset = var->yoffset;
+
+	fbi->var = new_var;
+
+	r = omapfb_apply_changes(fbi, 0);
+
+	return r;
+}
+
+static void mmap_user_open(struct vm_area_struct *vma)
+{
+	struct omapfb_info *ofbi = (struct omapfb_info *)vma->vm_private_data;
+
+	atomic_inc(&ofbi->map_count);
+}
+
+static void mmap_user_close(struct vm_area_struct *vma)
+{
+	struct omapfb_info *ofbi = (struct omapfb_info *)vma->vm_private_data;
+
+	atomic_dec(&ofbi->map_count);
+}
+
+static struct vm_operations_struct mmap_user_ops = {
+	.open = mmap_user_open,
+	.close = mmap_user_close,
+};
+
+static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct fb_fix_screeninfo *fix = &fbi->fix;
+	unsigned long off;
+	unsigned long start;
+	u32 len;
+
+	if (vma->vm_end - vma->vm_start == 0)
+		return 0;
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+		return -EINVAL;
+	off = vma->vm_pgoff << PAGE_SHIFT;
+
+	start = omapfb_get_region_paddr(ofbi);
+	len = fix->smem_len;
+	if (off >= len)
+		return -EINVAL;
+	if ((vma->vm_end - vma->vm_start + off) > len)
+		return -EINVAL;
+
+	off += start;
+
+	DBG("user mmap region start %lx, len %d, off %lx\n", start, len, off);
+
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mmap_user_ops;
+	vma->vm_private_data = ofbi;
+	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+	/* vm_ops.open won't be called for mmap itself. */
+	atomic_inc(&ofbi->map_count);
+	return 0;
+}
+
+/* Store a single color palette entry into a pseudo palette or the hardware
+ * palette if one is available. For now we support only 16bpp and thus store
+ * the entry only to the pseudo palette.
+ */
+static int _setcolreg(struct fb_info *fbi, u_int regno, u_int red, u_int green,
+		u_int blue, u_int transp, int update_hw_pal)
+{
+	/*struct omapfb_info *ofbi = FB2OFB(fbi);*/
+	/*struct omapfb2_device *fbdev = ofbi->fbdev;*/
+	struct fb_var_screeninfo *var = &fbi->var;
+	int r = 0;
+
+	enum omapfb_color_format mode = OMAPFB_COLOR_RGB24U; /* XXX */
+
+	/*switch (plane->color_mode) {*/
+	switch (mode) {
+	case OMAPFB_COLOR_YUV422:
+	case OMAPFB_COLOR_YUV420:
+	case OMAPFB_COLOR_YUY422:
+		r = -EINVAL;
+		break;
+	case OMAPFB_COLOR_CLUT_8BPP:
+	case OMAPFB_COLOR_CLUT_4BPP:
+	case OMAPFB_COLOR_CLUT_2BPP:
+	case OMAPFB_COLOR_CLUT_1BPP:
+		/*
+		   if (fbdev->ctrl->setcolreg)
+		   r = fbdev->ctrl->setcolreg(regno, red, green, blue,
+		   transp, update_hw_pal);
+		   */
+		/* Fallthrough */
+		r = -EINVAL;
+		break;
+	case OMAPFB_COLOR_RGB565:
+	case OMAPFB_COLOR_RGB444:
+	case OMAPFB_COLOR_RGB24P:
+	case OMAPFB_COLOR_RGB24U:
+		if (r != 0)
+			break;
+
+		if (regno < 0) {
+			r = -EINVAL;
+			break;
+		}
+
+		if (regno < 16) {
+			u16 pal;
+			pal = ((red >> (16 - var->red.length)) <<
+					var->red.offset) |
+				((green >> (16 - var->green.length)) <<
+				 var->green.offset) |
+				(blue >> (16 - var->blue.length));
+			((u32 *)(fbi->pseudo_palette))[regno] = pal;
+		}
+		break;
+	default:
+		BUG();
+	}
+	return r;
+}
+
+static int omapfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+		u_int transp, struct fb_info *info)
+{
+	DBG("setcolreg\n");
+
+	return _setcolreg(info, regno, red, green, blue, transp, 1);
+}
+
+static int omapfb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
+{
+	int count, index, r;
+	u16 *red, *green, *blue, *transp;
+	u16 trans = 0xffff;
+
+	DBG("setcmap\n");
+
+	red     = cmap->red;
+	green   = cmap->green;
+	blue    = cmap->blue;
+	transp  = cmap->transp;
+	index   = cmap->start;
+
+	for (count = 0; count < cmap->len; count++) {
+		if (transp)
+			trans = *transp++;
+		r = _setcolreg(info, index++, *red++, *green++, *blue++, trans,
+				count == cmap->len - 1);
+		if (r != 0)
+			return r;
+	}
+
+	return 0;
+}
+
+static int omapfb_blank(int blank, struct fb_info *fbi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_dss_device *display = fb2display(fbi);
+	int do_update = 0;
+	int r = 0;
+
+	omapfb_lock(fbdev);
+
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		if (display->state != OMAP_DSS_DISPLAY_SUSPENDED)
+			goto exit;
+
+		if (display->resume)
+			r = display->resume(display);
+
+		if (r == 0 && display->get_update_mode &&
+				display->get_update_mode(display) ==
+				OMAP_DSS_UPDATE_MANUAL)
+			do_update = 1;
+
+		break;
+
+	case FB_BLANK_NORMAL:
+		/* FB_BLANK_NORMAL could be implemented.
+		 * Needs DSS additions. */
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	case FB_BLANK_POWERDOWN:
+		if (display->state != OMAP_DSS_DISPLAY_ACTIVE)
+			goto exit;
+
+		if (display->suspend)
+			r = display->suspend(display);
+
+		break;
+
+	default:
+		r = -EINVAL;
+	}
+
+exit:
+	omapfb_unlock(fbdev);
+
+	if (r == 0 && do_update && display->update) {
+		u16 w, h;
+		display->get_resolution(display, &w, &h);
+
+		r = display->update(display, 0, 0, w, h);
+	}
+
+	return r;
+}
+
+#if 0
+/* XXX fb_read and fb_write are needed for VRFB */
+ssize_t omapfb_write(struct fb_info *info, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	DBG("omapfb_write %d, %lu\n", count, (unsigned long)*ppos);
+	/* XXX needed for VRFB */
+	return count;
+}
+#endif
+
+static struct fb_ops omapfb_ops = {
+	.owner          = THIS_MODULE,
+	.fb_open        = omapfb_open,
+	.fb_release     = omapfb_release,
+	.fb_fillrect    = cfb_fillrect,
+	.fb_copyarea    = cfb_copyarea,
+	.fb_imageblit   = cfb_imageblit,
+	.fb_blank       = omapfb_blank,
+	.fb_ioctl       = omapfb_ioctl,
+	.fb_check_var   = omapfb_check_var,
+	.fb_set_par     = omapfb_set_par,
+	.fb_pan_display = omapfb_pan_display,
+	.fb_mmap	= omapfb_mmap,
+	.fb_setcolreg	= omapfb_setcolreg,
+	.fb_setcmap	= omapfb_setcmap,
+	/*.fb_write	= omapfb_write,*/
+};
+
+static void omapfb_free_fbmem(struct fb_info *fbi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omapfb2_mem_region *rg;
+
+	rg = &ofbi->region;
+
+	if (rg->paddr)
+		if (omap_vram_free(rg->paddr, rg->size))
+			dev_err(fbdev->dev, "VRAM FREE failed\n");
+
+	if (rg->vaddr)
+		iounmap(rg->vaddr);
+
+	if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+		/* unmap the 0 angle rotation */
+		if (rg->vrfb.vaddr[0]) {
+			iounmap(rg->vrfb.vaddr[0]);
+			omap_vrfb_release_ctx(&rg->vrfb);
+		}
+	}
+
+	rg->vaddr = NULL;
+	rg->paddr = 0;
+	rg->alloc = 0;
+	rg->size = 0;
+}
+
+static void clear_fb_info(struct fb_info *fbi)
+{
+	memset(&fbi->var, 0, sizeof(fbi->var));
+	memset(&fbi->fix, 0, sizeof(fbi->fix));
+	strlcpy(fbi->fix.id, MODULE_NAME, sizeof(fbi->fix.id));
+}
+
+static int omapfb_free_all_fbmem(struct omapfb2_device *fbdev)
+{
+	int i;
+
+	DBG("free all fbmem\n");
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		struct fb_info *fbi = fbdev->fbs[i];
+		omapfb_free_fbmem(fbi);
+		clear_fb_info(fbi);
+	}
+
+	return 0;
+}
+
+static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size,
+		unsigned long paddr)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omapfb2_mem_region *rg;
+	void __iomem *vaddr;
+	int r;
+
+	rg = &ofbi->region;
+	memset(rg, 0, sizeof(*rg));
+
+	size = PAGE_ALIGN(size);
+
+	if (!paddr) {
+		DBG("allocating %lu bytes for fb %d\n", size, ofbi->id);
+		r = omap_vram_alloc(OMAP_VRAM_MEMTYPE_SDRAM, size, &paddr);
+	} else {
+		DBG("reserving %lu bytes at %lx for fb %d\n", size, paddr,
+				ofbi->id);
+		r = omap_vram_reserve(paddr, size);
+	}
+
+	if (r) {
+		dev_err(fbdev->dev, "failed to allocate framebuffer\n");
+		return -ENOMEM;
+	}
+
+	if (ofbi->rotation_type != OMAP_DSS_ROT_VRFB) {
+		vaddr = ioremap_wc(paddr, size);
+
+		if (!vaddr) {
+			dev_err(fbdev->dev, "failed to ioremap framebuffer\n");
+			omap_vram_free(paddr, size);
+			return -ENOMEM;
+		}
+
+		DBG("allocated VRAM paddr %lx, vaddr %p\n", paddr, vaddr);
+	} else {
+		r = omap_vrfb_request_ctx(&rg->vrfb);
+		if (r) {
+			dev_err(fbdev->dev, "vrfb create ctx failed\n");
+			return r;
+		}
+
+		vaddr = NULL;
+	}
+
+	rg->paddr = paddr;
+	rg->vaddr = vaddr;
+	rg->size = size;
+	rg->alloc = 1;
+
+	return 0;
+}
+
+/* allocate fbmem using display resolution as reference */
+static int omapfb_alloc_fbmem_display(struct fb_info *fbi, unsigned long size,
+		unsigned long paddr)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omap_dss_device *display;
+	int bytespp;
+
+	display =  fb2display(fbi);
+
+	if (!display)
+		return 0;
+
+	switch (display->get_recommended_bpp(display)) {
+	case 16:
+		bytespp = 2;
+		break;
+	case 24:
+		bytespp = 4;
+		break;
+	default:
+		bytespp = 4;
+		break;
+	}
+
+	if (!size) {
+		u16 w, h;
+
+		display->get_resolution(display, &w, &h);
+
+		if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) {
+			size = max(omap_vrfb_min_phys_size(w, h, bytespp),
+					omap_vrfb_min_phys_size(h, w, bytespp));
+
+			DBG("adjusting fb mem size for VRFB, %u -> %lu\n",
+					w * h * bytespp, size);
+		} else {
+			size = w * h * bytespp;
+		}
+	}
+
+	if (!size)
+		return 0;
+
+	return omapfb_alloc_fbmem(fbi, size, paddr);
+}
+
+static enum omap_color_mode fb_format_to_dss_mode(enum omapfb_color_format fmt)
+{
+	enum omap_color_mode mode;
+
+	switch (fmt) {
+	case OMAPFB_COLOR_RGB565:
+		mode = OMAP_DSS_COLOR_RGB16;
+		break;
+	case OMAPFB_COLOR_YUV422:
+		mode = OMAP_DSS_COLOR_YUV2;
+		break;
+	case OMAPFB_COLOR_CLUT_8BPP:
+		mode = OMAP_DSS_COLOR_CLUT8;
+		break;
+	case OMAPFB_COLOR_CLUT_4BPP:
+		mode = OMAP_DSS_COLOR_CLUT4;
+		break;
+	case OMAPFB_COLOR_CLUT_2BPP:
+		mode = OMAP_DSS_COLOR_CLUT2;
+		break;
+	case OMAPFB_COLOR_CLUT_1BPP:
+		mode = OMAP_DSS_COLOR_CLUT1;
+		break;
+	case OMAPFB_COLOR_RGB444:
+		mode = OMAP_DSS_COLOR_RGB12U;
+		break;
+	case OMAPFB_COLOR_YUY422:
+		mode = OMAP_DSS_COLOR_UYVY;
+		break;
+	case OMAPFB_COLOR_ARGB16:
+		mode = OMAP_DSS_COLOR_ARGB16;
+		break;
+	case OMAPFB_COLOR_RGB24U:
+		mode = OMAP_DSS_COLOR_RGB24U;
+		break;
+	case OMAPFB_COLOR_RGB24P:
+		mode = OMAP_DSS_COLOR_RGB24P;
+		break;
+	case OMAPFB_COLOR_ARGB32:
+		mode = OMAP_DSS_COLOR_ARGB32;
+		break;
+	case OMAPFB_COLOR_RGBA32:
+		mode = OMAP_DSS_COLOR_RGBA32;
+		break;
+	case OMAPFB_COLOR_RGBX32:
+		mode = OMAP_DSS_COLOR_RGBX32;
+		break;
+	default:
+		mode = -EINVAL;
+	}
+
+	return mode;
+}
+
+static int omapfb_parse_vram_param(const char *param, int max_entries,
+		unsigned long *sizes, unsigned long *paddrs)
+{
+	int fbnum;
+	unsigned long size;
+	unsigned long paddr = 0;
+	char *p, *start;
+
+	start = (char *)param;
+
+	while (1) {
+		p = start;
+
+		fbnum = simple_strtoul(p, &p, 10);
+
+		if (p == param)
+			return -EINVAL;
+
+		if (*p != ':')
+			return -EINVAL;
+
+		if (fbnum >= max_entries)
+			return -EINVAL;
+
+		size = memparse(p + 1, &p);
+
+		if (!size)
+			return -EINVAL;
+
+		paddr = 0;
+
+		if (*p == '@') {
+			paddr = simple_strtoul(p + 1, &p, 16);
+
+			if (!paddr)
+				return -EINVAL;
+
+		}
+
+		paddrs[fbnum] = paddr;
+		sizes[fbnum] = size;
+
+		if (*p == 0)
+			break;
+
+		if (*p != ',')
+			return -EINVAL;
+
+		++p;
+
+		start = p;
+	}
+
+	return 0;
+}
+
+static int omapfb_allocate_all_fbs(struct omapfb2_device *fbdev)
+{
+	int i, r;
+	unsigned long vram_sizes[10];
+	unsigned long vram_paddrs[10];
+
+	memset(&vram_sizes, 0, sizeof(vram_sizes));
+	memset(&vram_paddrs, 0, sizeof(vram_paddrs));
+
+	if (def_vram &&	omapfb_parse_vram_param(def_vram, 10,
+				vram_sizes, vram_paddrs)) {
+		dev_err(fbdev->dev, "failed to parse vram parameter\n");
+
+		memset(&vram_sizes, 0, sizeof(vram_sizes));
+		memset(&vram_paddrs, 0, sizeof(vram_paddrs));
+	}
+
+	if (fbdev->dev->platform_data) {
+		struct omapfb_platform_data *opd;
+		opd = fbdev->dev->platform_data;
+		for (i = 0; i < opd->mem_desc.region_cnt; ++i) {
+			if (!vram_sizes[i]) {
+				unsigned long size;
+				unsigned long paddr;
+
+				size = opd->mem_desc.region[i].size;
+				paddr = opd->mem_desc.region[i].paddr;
+
+				vram_sizes[i] = size;
+				vram_paddrs[i] = paddr;
+			}
+		}
+	}
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		/* allocate memory automatically only for fb0, or if
+		 * excplicitly defined with vram or plat data option */
+		if (i == 0 || vram_sizes[i] != 0) {
+			r = omapfb_alloc_fbmem_display(fbdev->fbs[i],
+					vram_sizes[i], vram_paddrs[i]);
+
+			if (r)
+				return r;
+		}
+	}
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		struct omapfb_info *ofbi = FB2OFB(fbdev->fbs[i]);
+		struct omapfb2_mem_region *rg;
+		rg = &ofbi->region;
+
+		DBG("region%d phys %08x virt %p size=%lu\n",
+				i,
+				rg->paddr,
+				rg->vaddr,
+				rg->size);
+	}
+
+	return 0;
+}
+
+int omapfb_realloc_fbmem(struct fb_info *fbi, unsigned long size, int type)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_dss_device *display = fb2display(fbi);
+	struct omapfb2_mem_region *rg = &ofbi->region;
+	unsigned long old_size = rg->size;
+	unsigned long old_paddr = rg->paddr;
+	int old_type = rg->type;
+	int r;
+
+	if (type > OMAPFB_MEMTYPE_MAX)
+		return -EINVAL;
+
+	size = PAGE_ALIGN(size);
+
+	if (old_size == size && old_type == type)
+		return 0;
+
+	if (display && display->sync)
+			display->sync(display);
+
+	omapfb_free_fbmem(fbi);
+
+	if (size == 0) {
+		clear_fb_info(fbi);
+		return 0;
+	}
+
+	r = omapfb_alloc_fbmem(fbi, size, 0);
+
+	if (r) {
+		if (old_size)
+			omapfb_alloc_fbmem(fbi, old_size, old_paddr);
+
+		if (rg->size == 0)
+			clear_fb_info(fbi);
+
+		return r;
+	}
+
+	if (old_size == size)
+		return 0;
+
+	if (old_size == 0) {
+		DBG("initializing fb %d\n", ofbi->id);
+		r = omapfb_fb_init(fbdev, fbi);
+		if (r) {
+			DBG("omapfb_fb_init failed\n");
+			goto err;
+		}
+		r = omapfb_apply_changes(fbi, 1);
+		if (r) {
+			DBG("omapfb_apply_changes failed\n");
+			goto err;
+		}
+	} else {
+		struct fb_var_screeninfo new_var;
+		memcpy(&new_var, &fbi->var, sizeof(new_var));
+		r = check_fb_var(fbi, &new_var);
+		if (r)
+			goto err;
+		memcpy(&fbi->var, &new_var, sizeof(fbi->var));
+		set_fb_fix(fbi);
+		r = setup_vrfb_rotation(fbi);
+		if (r)
+			goto err;
+	}
+
+	return 0;
+err:
+	omapfb_free_fbmem(fbi);
+	clear_fb_info(fbi);
+	return r;
+}
+
+/* initialize fb_info, var, fix to something sane based on the display */
+static int omapfb_fb_init(struct omapfb2_device *fbdev, struct fb_info *fbi)
+{
+	struct fb_var_screeninfo *var = &fbi->var;
+	struct omap_dss_device *display = fb2display(fbi);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	int r = 0;
+
+	fbi->fbops = &omapfb_ops;
+	fbi->flags = FBINFO_FLAG_DEFAULT;
+	fbi->pseudo_palette = fbdev->pseudo_palette;
+
+	if (ofbi->region.size == 0) {
+		clear_fb_info(fbi);
+		return 0;
+	}
+
+	var->nonstd = 0;
+	var->bits_per_pixel = 0;
+
+	var->rotate = def_rotate;
+
+	/*
+	 * Check if there is a default color format set in the board file,
+	 * and use this format instead the default deducted from the
+	 * display bpp.
+	 */
+	if (fbdev->dev->platform_data) {
+		struct omapfb_platform_data *opd;
+		int id = ofbi->id;
+
+		opd = fbdev->dev->platform_data;
+		if (opd->mem_desc.region[id].format_used) {
+			enum omap_color_mode mode;
+			enum omapfb_color_format format;
+
+			format = opd->mem_desc.region[id].format;
+			mode = fb_format_to_dss_mode(format);
+			if (mode < 0) {
+				r = mode;
+				goto err;
+			}
+			r = dss_mode_to_fb_mode(mode, var);
+			if (r < 0)
+				goto err;
+		}
+	}
+
+	if (display) {
+		u16 w, h;
+		int rotation = (var->rotate + ofbi->rotation[0]) % 4;
+
+		display->get_resolution(display, &w, &h);
+
+		if (rotation == FB_ROTATE_CW ||
+				rotation == FB_ROTATE_CCW) {
+			var->xres = h;
+			var->yres = w;
+		} else {
+			var->xres = w;
+			var->yres = h;
+		}
+
+		var->xres_virtual = var->xres;
+		var->yres_virtual = var->yres;
+
+		if (!var->bits_per_pixel) {
+			switch (display->get_recommended_bpp(display)) {
+			case 16:
+				var->bits_per_pixel = 16;
+				break;
+			case 24:
+				var->bits_per_pixel = 32;
+				break;
+			default:
+				dev_err(fbdev->dev, "illegal display "
+						"bpp\n");
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* if there's no display, let's just guess some basic values */
+		var->xres = 320;
+		var->yres = 240;
+		var->xres_virtual = var->xres;
+		var->yres_virtual = var->yres;
+		if (!var->bits_per_pixel)
+			var->bits_per_pixel = 16;
+	}
+
+	r = check_fb_var(fbi, var);
+	if (r)
+		goto err;
+
+	set_fb_fix(fbi);
+	r = setup_vrfb_rotation(fbi);
+	if (r)
+		goto err;
+
+	r = fb_alloc_cmap(&fbi->cmap, 256, 0);
+	if (r)
+		dev_err(fbdev->dev, "unable to allocate color map memory\n");
+
+err:
+	return r;
+}
+
+static void fbinfo_cleanup(struct omapfb2_device *fbdev, struct fb_info *fbi)
+{
+	fb_dealloc_cmap(&fbi->cmap);
+}
+
+
+static void omapfb_free_resources(struct omapfb2_device *fbdev)
+{
+	int i;
+
+	DBG("free_resources\n");
+
+	if (fbdev == NULL)
+		return;
+
+	for (i = 0; i < fbdev->num_fbs; i++)
+		unregister_framebuffer(fbdev->fbs[i]);
+
+	/* free the reserved fbmem */
+	omapfb_free_all_fbmem(fbdev);
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		fbinfo_cleanup(fbdev, fbdev->fbs[i]);
+		framebuffer_release(fbdev->fbs[i]);
+	}
+
+	for (i = 0; i < fbdev->num_displays; i++) {
+		if (fbdev->displays[i]->state != OMAP_DSS_DISPLAY_DISABLED)
+			fbdev->displays[i]->disable(fbdev->displays[i]);
+
+		omap_dss_put_device(fbdev->displays[i]);
+	}
+
+	dev_set_drvdata(fbdev->dev, NULL);
+	kfree(fbdev);
+}
+
+static int omapfb_create_framebuffers(struct omapfb2_device *fbdev)
+{
+	int r, i;
+
+	fbdev->num_fbs = 0;
+
+	DBG("create %d framebuffers\n",	CONFIG_FB_OMAP2_NUM_FBS);
+
+	/* allocate fb_infos */
+	for (i = 0; i < CONFIG_FB_OMAP2_NUM_FBS; i++) {
+		struct fb_info *fbi;
+		struct omapfb_info *ofbi;
+
+		fbi = framebuffer_alloc(sizeof(struct omapfb_info),
+				fbdev->dev);
+
+		if (fbi == NULL) {
+			dev_err(fbdev->dev,
+				"unable to allocate memory for plane info\n");
+			return -ENOMEM;
+		}
+
+		clear_fb_info(fbi);
+
+		fbdev->fbs[i] = fbi;
+
+		ofbi = FB2OFB(fbi);
+		ofbi->fbdev = fbdev;
+		ofbi->id = i;
+
+		/* assign these early, so that fb alloc can use them */
+		ofbi->rotation_type = def_vrfb ? OMAP_DSS_ROT_VRFB :
+			OMAP_DSS_ROT_DMA;
+		ofbi->mirror = def_mirror;
+
+		fbdev->num_fbs++;
+	}
+
+	DBG("fb_infos allocated\n");
+
+	/* assign overlays for the fbs */
+	for (i = 0; i < min(fbdev->num_fbs, fbdev->num_overlays); i++) {
+		struct omapfb_info *ofbi = FB2OFB(fbdev->fbs[i]);
+
+		ofbi->overlays[0] = fbdev->overlays[i];
+		ofbi->num_overlays = 1;
+	}
+
+	/* allocate fb memories */
+	r = omapfb_allocate_all_fbs(fbdev);
+	if (r) {
+		dev_err(fbdev->dev, "failed to allocate fbmem\n");
+		return r;
+	}
+
+	DBG("fbmems allocated\n");
+
+	/* setup fb_infos */
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		r = omapfb_fb_init(fbdev, fbdev->fbs[i]);
+		if (r) {
+			dev_err(fbdev->dev, "failed to setup fb_info\n");
+			return r;
+		}
+	}
+
+	DBG("fb_infos initialized\n");
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		r = register_framebuffer(fbdev->fbs[i]);
+		if (r != 0) {
+			dev_err(fbdev->dev,
+				"registering framebuffer %d failed\n", i);
+			return r;
+		}
+	}
+
+	DBG("framebuffers registered\n");
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		r = omapfb_apply_changes(fbdev->fbs[i], 1);
+		if (r) {
+			dev_err(fbdev->dev, "failed to change mode\n");
+			return r;
+		}
+	}
+
+	DBG("create sysfs for fbs\n");
+	r = omapfb_create_sysfs(fbdev);
+	if (r) {
+		dev_err(fbdev->dev, "failed to create sysfs entries\n");
+		return r;
+	}
+
+	/* Enable fb0 */
+	if (fbdev->num_fbs > 0) {
+		struct omapfb_info *ofbi = FB2OFB(fbdev->fbs[0]);
+
+		if (ofbi->num_overlays > 0) {
+			struct omap_overlay *ovl = ofbi->overlays[0];
+
+			r = omapfb_overlay_enable(ovl, 1);
+
+			if (r) {
+				dev_err(fbdev->dev,
+						"failed to enable overlay\n");
+				return r;
+			}
+		}
+	}
+
+	DBG("create_framebuffers done\n");
+
+	return 0;
+}
+
+static int omapfb_mode_to_timings(const char *mode_str,
+		struct omap_video_timings *timings, u8 *bpp)
+{
+	struct fb_info fbi;
+	struct fb_var_screeninfo var;
+	struct fb_ops fbops;
+	int r;
+
+#ifdef CONFIG_OMAP2_DSS_VENC
+	if (strcmp(mode_str, "pal") == 0) {
+		*timings = omap_dss_pal_timings;
+		*bpp = 0;
+		return 0;
+	} else if (strcmp(mode_str, "ntsc") == 0) {
+		*timings = omap_dss_ntsc_timings;
+		*bpp = 0;
+		return 0;
+	}
+#endif
+
+	/* this is quite a hack, but I wanted to use the modedb and for
+	 * that we need fb_info and var, so we create dummy ones */
+
+	memset(&fbi, 0, sizeof(fbi));
+	memset(&var, 0, sizeof(var));
+	memset(&fbops, 0, sizeof(fbops));
+	fbi.fbops = &fbops;
+
+	r = fb_find_mode(&var, &fbi, mode_str, NULL, 0, NULL, 24);
+
+	if (r != 0) {
+		timings->pixel_clock = PICOS2KHZ(var.pixclock);
+		timings->hfp = var.left_margin;
+		timings->hbp = var.right_margin;
+		timings->vfp = var.upper_margin;
+		timings->vbp = var.lower_margin;
+		timings->hsw = var.hsync_len;
+		timings->vsw = var.vsync_len;
+		timings->x_res = var.xres;
+		timings->y_res = var.yres;
+
+		switch (var.bits_per_pixel) {
+		case 16:
+			*bpp = 16;
+			break;
+		case 24:
+		case 32:
+		default:
+			*bpp = 24;
+			break;
+		}
+
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+static int omapfb_set_def_mode(struct omap_dss_device *display, char *mode_str)
+{
+	int r;
+	u8 bpp;
+	struct omap_video_timings timings;
+
+	r = omapfb_mode_to_timings(mode_str, &timings, &bpp);
+	if (r)
+		return r;
+
+	display->panel.recommended_bpp = bpp;
+
+	if (!display->check_timings || !display->set_timings)
+		return -EINVAL;
+
+	r = display->check_timings(display, &timings);
+	if (r)
+		return r;
+
+	display->set_timings(display, &timings);
+
+	return 0;
+}
+
+static int omapfb_parse_def_modes(struct omapfb2_device *fbdev)
+{
+	char *str, *options, *this_opt;
+	int r = 0;
+
+	str = kmalloc(strlen(def_mode) + 1, GFP_KERNEL);
+	strcpy(str, def_mode);
+	options = str;
+
+	while (!r && (this_opt = strsep(&options, ",")) != NULL) {
+		char *p, *display_str, *mode_str;
+		struct omap_dss_device *display;
+		int i;
+
+		p = strchr(this_opt, ':');
+		if (!p) {
+			r = -EINVAL;
+			break;
+		}
+
+		*p = 0;
+		display_str = this_opt;
+		mode_str = p + 1;
+
+		display = NULL;
+		for (i = 0; i < fbdev->num_displays; ++i) {
+			if (strcmp(fbdev->displays[i]->name,
+						display_str) == 0) {
+				display = fbdev->displays[i];
+				break;
+			}
+		}
+
+		if (!display) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = omapfb_set_def_mode(display, mode_str);
+		if (r)
+			break;
+	}
+
+	kfree(str);
+
+	return r;
+}
+
+static int omapfb_probe(struct platform_device *pdev)
+{
+	struct omapfb2_device *fbdev = NULL;
+	int r = 0;
+	int i;
+	struct omap_overlay *ovl;
+	struct omap_dss_device *def_display;
+	struct omap_dss_device *dssdev;
+
+	DBG("omapfb_probe\n");
+
+	if (pdev->num_resources != 0) {
+		dev_err(&pdev->dev, "probed for an unknown device\n");
+		r = -ENODEV;
+		goto err0;
+	}
+
+	fbdev = kzalloc(sizeof(struct omapfb2_device), GFP_KERNEL);
+	if (fbdev == NULL) {
+		r = -ENOMEM;
+		goto err0;
+	}
+
+	mutex_init(&fbdev->mtx);
+
+	fbdev->dev = &pdev->dev;
+	platform_set_drvdata(pdev, fbdev);
+
+	fbdev->num_displays = 0;
+	dssdev = NULL;
+	for_each_dss_dev(dssdev) {
+		omap_dss_get_device(dssdev);
+		fbdev->displays[fbdev->num_displays++] = dssdev;
+	}
+
+	if (fbdev->num_displays == 0) {
+		dev_err(&pdev->dev, "no displays\n");
+		r = -EINVAL;
+		goto cleanup;
+	}
+
+	fbdev->num_overlays = omap_dss_get_num_overlays();
+	for (i = 0; i < fbdev->num_overlays; i++)
+		fbdev->overlays[i] = omap_dss_get_overlay(i);
+
+	fbdev->num_managers = omap_dss_get_num_overlay_managers();
+	for (i = 0; i < fbdev->num_managers; i++)
+		fbdev->managers[i] = omap_dss_get_overlay_manager(i);
+
+	if (def_mode && strlen(def_mode) > 0) {
+		if (omapfb_parse_def_modes(fbdev))
+			dev_warn(&pdev->dev, "cannot parse default modes\n");
+	}
+
+	r = omapfb_create_framebuffers(fbdev);
+	if (r)
+		goto cleanup;
+
+	for (i = 0; i < fbdev->num_managers; i++) {
+		struct omap_overlay_manager *mgr;
+		mgr = fbdev->managers[i];
+		r = mgr->apply(mgr);
+		if (r)
+			dev_warn(fbdev->dev, "failed to apply dispc config\n");
+	}
+
+	DBG("mgr->apply'ed\n");
+
+	/* gfx overlay should be the default one. find a display
+	 * connected to that, and use it as default display */
+	ovl = omap_dss_get_overlay(0);
+	if (ovl->manager && ovl->manager->device) {
+		def_display = ovl->manager->device;
+	} else {
+		dev_warn(&pdev->dev, "cannot find default display\n");
+		def_display = NULL;
+	}
+
+	if (def_display) {
+#ifndef CONFIG_FB_OMAP2_FORCE_AUTO_UPDATE
+		u16 w, h;
+#endif
+		r = def_display->enable(def_display);
+		if (r)
+			dev_warn(fbdev->dev, "Failed to enable display '%s'\n",
+					def_display->name);
+
+		/* set the update mode */
+		if (def_display->caps & OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) {
+#ifdef CONFIG_FB_OMAP2_FORCE_AUTO_UPDATE
+			if (def_display->enable_te)
+				def_display->enable_te(def_display, 1);
+			if (def_display->set_update_mode)
+				def_display->set_update_mode(def_display,
+						OMAP_DSS_UPDATE_AUTO);
+#else /* MANUAL_UPDATE */
+			if (def_display->enable_te)
+				def_display->enable_te(def_display, 0);
+			if (def_display->set_update_mode)
+				def_display->set_update_mode(def_display,
+						OMAP_DSS_UPDATE_MANUAL);
+
+			def_display->get_resolution(def_display, &w, &h);
+			def_display->update(def_display, 0, 0, w, h);
+#endif
+		} else {
+			if (def_display->set_update_mode)
+				def_display->set_update_mode(def_display,
+						OMAP_DSS_UPDATE_AUTO);
+		}
+	}
+
+	return 0;
+
+cleanup:
+	omapfb_free_resources(fbdev);
+err0:
+	dev_err(&pdev->dev, "failed to setup omapfb\n");
+	return r;
+}
+
+static int omapfb_remove(struct platform_device *pdev)
+{
+	struct omapfb2_device *fbdev = platform_get_drvdata(pdev);
+
+	/* FIXME: wait till completion of pending events */
+
+	omapfb_remove_sysfs(fbdev);
+
+	omapfb_free_resources(fbdev);
+
+	return 0;
+}
+
+static struct platform_driver omapfb_driver = {
+	.probe          = omapfb_probe,
+	.remove         = omapfb_remove,
+	.driver         = {
+		.name   = "omapfb",
+		.owner  = THIS_MODULE,
+	},
+};
+
+static int __init omapfb_init(void)
+{
+	DBG("omapfb_init\n");
+
+	if (platform_driver_register(&omapfb_driver)) {
+		printk(KERN_ERR "failed to register omapfb driver\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void __exit omapfb_exit(void)
+{
+	DBG("omapfb_exit\n");
+	platform_driver_unregister(&omapfb_driver);
+}
+
+module_param_named(mode, def_mode, charp, 0);
+module_param_named(vram, def_vram, charp, 0);
+module_param_named(rotate, def_rotate, int, 0);
+module_param_named(vrfb, def_vrfb, bool, 0);
+module_param_named(mirror, def_mirror, bool, 0);
+
+/* late_initcall to let panel/ctrl drivers loaded first.
+ * I guess better option would be a more dynamic approach,
+ * so that omapfb reacts to new panels when they are loaded */
+late_initcall(omapfb_init);
+/*module_init(omapfb_init);*/
+module_exit(omapfb_exit);
+
+MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@nokia.com>");
+MODULE_DESCRIPTION("OMAP2/3 Framebuffer");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/omap2/omapfb/omapfb-sysfs.c b/drivers/video/omap2/omapfb/omapfb-sysfs.c
new file mode 100644
index 000000000000..62bb88f5c192
--- /dev/null
+++ b/drivers/video/omap2/omapfb/omapfb-sysfs.c
@@ -0,0 +1,507 @@
+/*
+ * linux/drivers/video/omap2/omapfb-sysfs.c
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
+ *
+ * Some code and ideas taken from drivers/video/omap/ driver
+ * by Imre Deak.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fb.h>
+#include <linux/sysfs.h>
+#include <linux/device.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/omapfb.h>
+
+#include <plat/display.h>
+#include <plat/vrfb.h>
+
+#include "omapfb.h"
+
+static ssize_t show_rotate_type(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ofbi->rotation_type);
+}
+
+static ssize_t store_rotate_type(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	enum omap_dss_rotation_type rot_type;
+	int r;
+
+	rot_type = simple_strtoul(buf, NULL, 0);
+
+	if (rot_type != OMAP_DSS_ROT_DMA && rot_type != OMAP_DSS_ROT_VRFB)
+		return -EINVAL;
+
+	lock_fb_info(fbi);
+
+	r = 0;
+	if (rot_type == ofbi->rotation_type)
+		goto out;
+
+	if (ofbi->region.size) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	ofbi->rotation_type = rot_type;
+
+	/*
+	 * Since the VRAM for this FB is not allocated at the moment we don't
+	 * need to do any further parameter checking at this point.
+	 */
+out:
+	unlock_fb_info(fbi);
+
+	return r ? r : count;
+}
+
+
+static ssize_t show_mirror(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ofbi->mirror);
+}
+
+static ssize_t store_mirror(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	bool mirror;
+	int r;
+	struct fb_var_screeninfo new_var;
+
+	mirror = simple_strtoul(buf, NULL, 0);
+
+	if (mirror != 0 && mirror != 1)
+		return -EINVAL;
+
+	lock_fb_info(fbi);
+
+	ofbi->mirror = mirror;
+
+	memcpy(&new_var, &fbi->var, sizeof(new_var));
+	r = check_fb_var(fbi, &new_var);
+	if (r)
+		goto out;
+	memcpy(&fbi->var, &new_var, sizeof(fbi->var));
+
+	set_fb_fix(fbi);
+
+	r = omapfb_apply_changes(fbi, 0);
+	if (r)
+		goto out;
+
+	r = count;
+out:
+	unlock_fb_info(fbi);
+
+	return r;
+}
+
+static ssize_t show_overlays(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	ssize_t l = 0;
+	int t;
+
+	omapfb_lock(fbdev);
+	lock_fb_info(fbi);
+
+	for (t = 0; t < ofbi->num_overlays; t++) {
+		struct omap_overlay *ovl = ofbi->overlays[t];
+		int ovlnum;
+
+		for (ovlnum = 0; ovlnum < fbdev->num_overlays; ++ovlnum)
+			if (ovl == fbdev->overlays[ovlnum])
+				break;
+
+		l += snprintf(buf + l, PAGE_SIZE - l, "%s%d",
+				t == 0 ? "" : ",", ovlnum);
+	}
+
+	l += snprintf(buf + l, PAGE_SIZE - l, "\n");
+
+	unlock_fb_info(fbi);
+	omapfb_unlock(fbdev);
+
+	return l;
+}
+
+static struct omapfb_info *get_overlay_fb(struct omapfb2_device *fbdev,
+		struct omap_overlay *ovl)
+{
+	int i, t;
+
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		struct omapfb_info *ofbi = FB2OFB(fbdev->fbs[i]);
+
+		for (t = 0; t < ofbi->num_overlays; t++) {
+			if (ofbi->overlays[t] == ovl)
+				return ofbi;
+		}
+	}
+
+	return NULL;
+}
+
+static ssize_t store_overlays(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	struct omapfb2_device *fbdev = ofbi->fbdev;
+	struct omap_overlay *ovls[OMAPFB_MAX_OVL_PER_FB];
+	struct omap_overlay *ovl;
+	int num_ovls, r, i;
+	int len;
+	bool added = false;
+
+	num_ovls = 0;
+
+	len = strlen(buf);
+	if (buf[len - 1] == '\n')
+		len = len - 1;
+
+	omapfb_lock(fbdev);
+	lock_fb_info(fbi);
+
+	if (len > 0) {
+		char *p = (char *)buf;
+		int ovlnum;
+
+		while (p < buf + len) {
+			int found;
+			if (num_ovls == OMAPFB_MAX_OVL_PER_FB) {
+				r = -EINVAL;
+				goto out;
+			}
+
+			ovlnum = simple_strtoul(p, &p, 0);
+			if (ovlnum > fbdev->num_overlays) {
+				r = -EINVAL;
+				goto out;
+			}
+
+			found = 0;
+			for (i = 0; i < num_ovls; ++i) {
+				if (ovls[i] == fbdev->overlays[ovlnum]) {
+					found = 1;
+					break;
+				}
+			}
+
+			if (!found)
+				ovls[num_ovls++] = fbdev->overlays[ovlnum];
+
+			p++;
+		}
+	}
+
+	for (i = 0; i < num_ovls; ++i) {
+		struct omapfb_info *ofbi2 = get_overlay_fb(fbdev, ovls[i]);
+		if (ofbi2 && ofbi2 != ofbi) {
+			dev_err(fbdev->dev, "overlay already in use\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* detach unused overlays */
+	for (i = 0; i < ofbi->num_overlays; ++i) {
+		int t, found;
+
+		ovl = ofbi->overlays[i];
+
+		found = 0;
+
+		for (t = 0; t < num_ovls; ++t) {
+			if (ovl == ovls[t]) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (found)
+			continue;
+
+		DBG("detaching %d\n", ofbi->overlays[i]->id);
+
+		omapfb_overlay_enable(ovl, 0);
+
+		if (ovl->manager)
+			ovl->manager->apply(ovl->manager);
+
+		for (t = i + 1; t < ofbi->num_overlays; t++) {
+			ofbi->rotation[t-1] = ofbi->rotation[t];
+			ofbi->overlays[t-1] = ofbi->overlays[t];
+		}
+
+		ofbi->num_overlays--;
+		i--;
+	}
+
+	for (i = 0; i < num_ovls; ++i) {
+		int t, found;
+
+		ovl = ovls[i];
+
+		found = 0;
+
+		for (t = 0; t < ofbi->num_overlays; ++t) {
+			if (ovl == ofbi->overlays[t]) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (found)
+			continue;
+		ofbi->rotation[ofbi->num_overlays] = 0;
+		ofbi->overlays[ofbi->num_overlays++] = ovl;
+
+		added = true;
+	}
+
+	if (added) {
+		r = omapfb_apply_changes(fbi, 0);
+		if (r)
+			goto out;
+	}
+
+	r = count;
+out:
+	unlock_fb_info(fbi);
+	omapfb_unlock(fbdev);
+
+	return r;
+}
+
+static ssize_t show_overlays_rotate(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	ssize_t l = 0;
+	int t;
+
+	lock_fb_info(fbi);
+
+	for (t = 0; t < ofbi->num_overlays; t++) {
+		l += snprintf(buf + l, PAGE_SIZE - l, "%s%d",
+				t == 0 ? "" : ",", ofbi->rotation[t]);
+	}
+
+	l += snprintf(buf + l, PAGE_SIZE - l, "\n");
+
+	unlock_fb_info(fbi);
+
+	return l;
+}
+
+static ssize_t store_overlays_rotate(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	int num_ovls = 0, r, i;
+	int len;
+	bool changed = false;
+	u8 rotation[OMAPFB_MAX_OVL_PER_FB];
+
+	len = strlen(buf);
+	if (buf[len - 1] == '\n')
+		len = len - 1;
+
+	lock_fb_info(fbi);
+
+	if (len > 0) {
+		char *p = (char *)buf;
+
+		while (p < buf + len) {
+			int rot;
+
+			if (num_ovls == ofbi->num_overlays) {
+				r = -EINVAL;
+				goto out;
+			}
+
+			rot = simple_strtoul(p, &p, 0);
+			if (rot < 0 || rot > 3) {
+				r = -EINVAL;
+				goto out;
+			}
+
+			if (ofbi->rotation[num_ovls] != rot)
+				changed = true;
+
+			rotation[num_ovls++] = rot;
+
+			p++;
+		}
+	}
+
+	if (num_ovls != ofbi->num_overlays) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (changed) {
+		for (i = 0; i < num_ovls; ++i)
+			ofbi->rotation[i] = rotation[i];
+
+		r = omapfb_apply_changes(fbi, 0);
+		if (r)
+			goto out;
+
+		/* FIXME error handling? */
+	}
+
+	r = count;
+out:
+	unlock_fb_info(fbi);
+
+	return r;
+}
+
+static ssize_t show_size(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	return snprintf(buf, PAGE_SIZE, "%lu\n", ofbi->region.size);
+}
+
+static ssize_t store_size(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	unsigned long size;
+	int r;
+	int i;
+
+	size = PAGE_ALIGN(simple_strtoul(buf, NULL, 0));
+
+	lock_fb_info(fbi);
+
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ofbi->overlays[i]->info.enabled) {
+			r = -EBUSY;
+			goto out;
+		}
+	}
+
+	if (size != ofbi->region.size) {
+		r = omapfb_realloc_fbmem(fbi, size, ofbi->region.type);
+		if (r) {
+			dev_err(dev, "realloc fbmem failed\n");
+			goto out;
+		}
+	}
+
+	r = count;
+out:
+	unlock_fb_info(fbi);
+
+	return r;
+}
+
+static ssize_t show_phys(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	return snprintf(buf, PAGE_SIZE, "%0x\n", ofbi->region.paddr);
+}
+
+static ssize_t show_virt(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *fbi = dev_get_drvdata(dev);
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+
+	return snprintf(buf, PAGE_SIZE, "%p\n", ofbi->region.vaddr);
+}
+
+static struct device_attribute omapfb_attrs[] = {
+	__ATTR(rotate_type, S_IRUGO | S_IWUSR, show_rotate_type,
+			store_rotate_type),
+	__ATTR(mirror, S_IRUGO | S_IWUSR, show_mirror, store_mirror),
+	__ATTR(size, S_IRUGO | S_IWUSR, show_size, store_size),
+	__ATTR(overlays, S_IRUGO | S_IWUSR, show_overlays, store_overlays),
+	__ATTR(overlays_rotate, S_IRUGO | S_IWUSR, show_overlays_rotate,
+			store_overlays_rotate),
+	__ATTR(phys_addr, S_IRUGO, show_phys, NULL),
+	__ATTR(virt_addr, S_IRUGO, show_virt, NULL),
+};
+
+int omapfb_create_sysfs(struct omapfb2_device *fbdev)
+{
+	int i;
+	int r;
+
+	DBG("create sysfs for fbs\n");
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		int t;
+		for (t = 0; t < ARRAY_SIZE(omapfb_attrs); t++) {
+			r = device_create_file(fbdev->fbs[i]->dev,
+					&omapfb_attrs[t]);
+
+			if (r) {
+				dev_err(fbdev->dev, "failed to create sysfs "
+						"file\n");
+				return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
+void omapfb_remove_sysfs(struct omapfb2_device *fbdev)
+{
+	int i, t;
+
+	DBG("remove sysfs for fbs\n");
+	for (i = 0; i < fbdev->num_fbs; i++) {
+		for (t = 0; t < ARRAY_SIZE(omapfb_attrs); t++)
+			device_remove_file(fbdev->fbs[i]->dev,
+					&omapfb_attrs[t]);
+	}
+}
+
diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h
new file mode 100644
index 000000000000..f7c9c739e5ef
--- /dev/null
+++ b/drivers/video/omap2/omapfb/omapfb.h
@@ -0,0 +1,146 @@
+/*
+ * linux/drivers/video/omap2/omapfb.h
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
+ *
+ * Some code and ideas taken from drivers/video/omap/ driver
+ * by Imre Deak.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __DRIVERS_VIDEO_OMAP2_OMAPFB_H__
+#define __DRIVERS_VIDEO_OMAP2_OMAPFB_H__
+
+#ifdef CONFIG_FB_OMAP2_DEBUG_SUPPORT
+#define DEBUG
+#endif
+
+#include <plat/display.h>
+
+#ifdef DEBUG
+extern unsigned int omapfb_debug;
+#define DBG(format, ...) \
+	if (omapfb_debug) \
+		printk(KERN_DEBUG "OMAPFB: " format, ## __VA_ARGS__)
+#else
+#define DBG(format, ...)
+#endif
+
+#define FB2OFB(fb_info) ((struct omapfb_info *)(fb_info->par))
+
+/* max number of overlays to which a framebuffer data can be direct */
+#define OMAPFB_MAX_OVL_PER_FB 3
+
+struct omapfb2_mem_region {
+	u32		paddr;
+	void __iomem	*vaddr;
+	struct vrfb	vrfb;
+	unsigned long	size;
+	u8		type;		/* OMAPFB_PLANE_MEM_* */
+	bool		alloc;		/* allocated by the driver */
+	bool		map;		/* kernel mapped by the driver */
+};
+
+/* appended to fb_info */
+struct omapfb_info {
+	int id;
+	struct omapfb2_mem_region region;
+	atomic_t map_count;
+	int num_overlays;
+	struct omap_overlay *overlays[OMAPFB_MAX_OVL_PER_FB];
+	struct omapfb2_device *fbdev;
+	enum omap_dss_rotation_type rotation_type;
+	u8 rotation[OMAPFB_MAX_OVL_PER_FB];
+	bool mirror;
+};
+
+struct omapfb2_device {
+	struct device *dev;
+	struct mutex  mtx;
+
+	u32 pseudo_palette[17];
+
+	int state;
+
+	unsigned num_fbs;
+	struct fb_info *fbs[10];
+
+	unsigned num_displays;
+	struct omap_dss_device *displays[10];
+	unsigned num_overlays;
+	struct omap_overlay *overlays[10];
+	unsigned num_managers;
+	struct omap_overlay_manager *managers[10];
+};
+
+struct omapfb_colormode {
+	enum omap_color_mode dssmode;
+	u32 bits_per_pixel;
+	u32 nonstd;
+	struct fb_bitfield red;
+	struct fb_bitfield green;
+	struct fb_bitfield blue;
+	struct fb_bitfield transp;
+};
+
+void set_fb_fix(struct fb_info *fbi);
+int check_fb_var(struct fb_info *fbi, struct fb_var_screeninfo *var);
+int omapfb_realloc_fbmem(struct fb_info *fbi, unsigned long size, int type);
+int omapfb_apply_changes(struct fb_info *fbi, int init);
+
+int omapfb_create_sysfs(struct omapfb2_device *fbdev);
+void omapfb_remove_sysfs(struct omapfb2_device *fbdev);
+
+int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg);
+
+int dss_mode_to_fb_mode(enum omap_color_mode dssmode,
+			struct fb_var_screeninfo *var);
+
+/* find the display connected to this fb, if any */
+static inline struct omap_dss_device *fb2display(struct fb_info *fbi)
+{
+	struct omapfb_info *ofbi = FB2OFB(fbi);
+	int i;
+
+	/* XXX: returns the display connected to first attached overlay */
+	for (i = 0; i < ofbi->num_overlays; i++) {
+		if (ofbi->overlays[i]->manager)
+			return ofbi->overlays[i]->manager->device;
+	}
+
+	return NULL;
+}
+
+static inline void omapfb_lock(struct omapfb2_device *fbdev)
+{
+	mutex_lock(&fbdev->mtx);
+}
+
+static inline void omapfb_unlock(struct omapfb2_device *fbdev)
+{
+	mutex_unlock(&fbdev->mtx);
+}
+
+static inline int omapfb_overlay_enable(struct omap_overlay *ovl,
+		int enable)
+{
+	struct omap_overlay_info info;
+
+	ovl->get_overlay_info(ovl, &info);
+	info.enabled = enable;
+	return ovl->set_overlay_info(ovl, &info);
+}
+
+#endif
diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h
index a8efa92c6c35..f46c40ac6d45 100644
--- a/include/linux/omapfb.h
+++ b/include/linux/omapfb.h
@@ -24,6 +24,7 @@
 #ifndef __LINUX_OMAPFB_H__
 #define __LINUX_OMAPFB_H__
 
+#include <linux/fb.h>
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
@@ -50,6 +51,12 @@
 #define OMAPFB_UPDATE_WINDOW	OMAP_IOW(54, struct omapfb_update_window)
 #define OMAPFB_SETUP_MEM	OMAP_IOW(55, struct omapfb_mem_info)
 #define OMAPFB_QUERY_MEM	OMAP_IOW(56, struct omapfb_mem_info)
+#define OMAPFB_WAITFORVSYNC	OMAP_IO(57)
+#define OMAPFB_MEMORY_READ	OMAP_IOR(58, struct omapfb_memory_read)
+#define OMAPFB_GET_OVERLAY_COLORMODE OMAP_IOR(59, struct omapfb_ovl_colormode)
+#define OMAPFB_WAITFORGO	OMAP_IO(60)
+#define OMAPFB_GET_VRAM_INFO	OMAP_IOR(61, struct omapfb_vram_info)
+#define OMAPFB_SET_TEARSYNC	OMAP_IOW(62, struct omapfb_tearsync_info)
 
 #define OMAPFB_CAPS_GENERIC_MASK	0x00000fff
 #define OMAPFB_CAPS_LCDC_MASK		0x00fff000
@@ -87,6 +94,13 @@ enum omapfb_color_format {
 	OMAPFB_COLOR_CLUT_1BPP,
 	OMAPFB_COLOR_RGB444,
 	OMAPFB_COLOR_YUY422,
+
+	OMAPFB_COLOR_ARGB16,
+	OMAPFB_COLOR_RGB24U,	/* RGB24, 32-bit container */
+	OMAPFB_COLOR_RGB24P,	/* RGB24, 24-bit container */
+	OMAPFB_COLOR_ARGB32,
+	OMAPFB_COLOR_RGBA32,
+	OMAPFB_COLOR_RGBX32,
 };
 
 struct omapfb_update_window {
@@ -158,6 +172,40 @@ enum omapfb_update_mode {
 	OMAPFB_MANUAL_UPDATE
 };
 
+struct omapfb_memory_read {
+	__u16 x;
+	__u16 y;
+	__u16 w;
+	__u16 h;
+	size_t buffer_size;
+	void __user *buffer;
+};
+
+struct omapfb_ovl_colormode {
+	__u8 overlay_idx;
+	__u8 mode_idx;
+	__u32 bits_per_pixel;
+	__u32 nonstd;
+	struct fb_bitfield red;
+	struct fb_bitfield green;
+	struct fb_bitfield blue;
+	struct fb_bitfield transp;
+};
+
+struct omapfb_vram_info {
+	__u32 total;
+	__u32 free;
+	__u32 largest_free_block;
+	__u32 reserved[5];
+};
+
+struct omapfb_tearsync_info {
+	__u8 enabled;
+	__u8 reserved1[3];
+	__u16 line;
+	__u16 reserved2;
+};
+
 #ifdef __KERNEL__
 
 #include <plat/board.h>
@@ -173,6 +221,11 @@ struct omapfb_mem_region {
 	void __iomem	*vaddr;
 	unsigned long	size;
 	u8		type;		/* OMAPFB_PLANE_MEM_* */
+	enum omapfb_color_format format;/* OMAPFB_COLOR_* */
+	unsigned	format_used:1;	/* Must be set when format is set.
+					 * Needed b/c of the badly chosen 0
+					 * base for OMAPFB_COLOR_* values
+					 */
 	unsigned	alloc:1;	/* allocated by the driver */
 	unsigned	map:1;		/* kernel mapped by the driver */
 };
@@ -189,6 +242,7 @@ struct omapfb_platform_data {
 };
 
 /* in arch/arm/plat-omap/fb.c */
+extern void omapfb_set_platform_data(struct omapfb_platform_data *data);
 extern void omapfb_set_ctrl_platform_data(void *pdata);
 extern void omapfb_reserve_sdram(void);
 
-- 
cgit v1.2.3


From a63ce5b306855bccdacba95c03bfc293316c8ae3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 7 Dec 2009 09:11:39 -0500
Subject: tracing: Buffer the output of seq_file in case of filled buffer

If the seq_read fills the buffer it will call s_start again on the next
itertation with the same position. This causes a problem with the
function_graph tracer because it consumes the iteration in order to
determine leaf functions.

What happens is that the iterator stores the entry, and the function
graph plugin will look at the next entry. If that next entry is a return
of the same function and task, then the function is a leaf and the
function_graph plugin calls ring_buffer_read which moves the ring buffer
iterator forward (the trace iterator still points to the function start
entry).

The copying of the trace_seq to the seq_file buffer will fail if the
seq_file buffer is full. The seq_read will not show this entry.
The next read by userspace will cause seq_read to again call s_start
which will reuse the trace iterator entry (the function start entry).
But the function return entry was already consumed. The function graph
plugin will think that this entry is a nested function and not a leaf.

To solve this, the trace code now checks the return status of the
seq_printf (trace_print_seq). If the writing to the seq_file buffer
fails, we set a flag in the iterator (leftover) and we do not reset
the trace_seq buffer. On the next call to s_start, we check the leftover
flag, and if it is set, we just reuse the trace_seq buffer and do not
call into the plugin print functions.

Before this patch:

 2)               |      fput() {
 2)               |        __fput() {
 2)   0.550 us    |          inotify_inode_queue_event();
 2)               |          __fsnotify_parent() {
 2)   0.540 us    |          inotify_dentry_parent_queue_event();

After the patch:

 2)               |      fput() {
 2)               |        __fput() {
 2)   0.550 us    |          inotify_inode_queue_event();
 2)   0.548 us    |          __fsnotify_parent();
 2)   0.540 us    |          inotify_dentry_parent_queue_event();

[
  Updated the patch to fix a missing return 0 from the trace_print_seq()
  stub when CONFIG_TRACING is disabled.

  Reported-by: Ingo Molnar <mingo@elte.hu>
]

Reported-by: Jiri Olsa <jolsa@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  1 +
 include/linux/trace_seq.h    |  5 +++--
 kernel/trace/trace.c         | 35 ++++++++++++++++++++++++++++++++---
 kernel/trace/trace_output.c  | 14 +++++++++++---
 4 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47bbdf9c38d0..38f8d6553831 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -57,6 +57,7 @@ struct trace_iterator {
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
+	int			leftover;
 	int			cpu;
 	u64			ts;
 
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 09077f6ed128..fad6857bc0f3 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -33,7 +33,7 @@ extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	__attribute__ ((format (printf, 2, 0)));
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
-extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern int trace_seq_puts(struct trace_seq *s, const char *str);
@@ -55,8 +55,9 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
-static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
+	return 0;
 }
 static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc937e1baa91..484114d70743 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1516,6 +1516,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
 	int i = (int)*pos;
 	void *ent;
 
+	WARN_ON_ONCE(iter->leftover);
+
 	(*pos)++;
 
 	/* can't go backwards */
@@ -1614,8 +1616,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 			;
 
 	} else {
-		l = *pos - 1;
-		p = s_next(m, p, &l);
+		/*
+		 * If we overflowed the seq_file before, then we want
+		 * to just reuse the trace_seq buffer again.
+		 */
+		if (iter->leftover)
+			p = iter;
+		else {
+			l = *pos - 1;
+			p = s_next(m, p, &l);
+		}
 	}
 
 	trace_event_read_lock();
@@ -1923,6 +1933,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
+	int ret;
 
 	if (iter->ent == NULL) {
 		if (iter->tr) {
@@ -1942,9 +1953,27 @@ static int s_show(struct seq_file *m, void *v)
 			if (!(trace_flags & TRACE_ITER_VERBOSE))
 				print_func_help_header(m);
 		}
+	} else if (iter->leftover) {
+		/*
+		 * If we filled the seq_file buffer earlier, we
+		 * want to just show it now.
+		 */
+		ret = trace_print_seq(m, &iter->seq);
+
+		/* ret should this time be zero, but you never know */
+		iter->leftover = ret;
+
 	} else {
 		print_trace_line(iter);
-		trace_print_seq(m, &iter->seq);
+		ret = trace_print_seq(m, &iter->seq);
+		/*
+		 * If we overflow the seq_file buffer, then it will
+		 * ask us for this data again at start up.
+		 * Use that instead.
+		 *  ret is 0 if seq_file write succeeded.
+		 *        -1 otherwise.
+		 */
+		iter->leftover = ret;
 	}
 
 	return 0;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..e5cf90fef34e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+	int ret;
+
+	ret = seq_write(m, s->buffer, len);
 
-	seq_write(m, s->buffer, len);
+	/*
+	 * Only reset this buffer if we successfully wrote to the
+	 * seq_file buffer.
+	 */
+	if (!ret)
+		trace_seq_init(s);
 
-	trace_seq_init(s);
+	return ret;
 }
 
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
-- 
cgit v1.2.3


From d184b31c0e403580aafb3f8955ecc185a3d04801 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 25 Nov 2009 16:10:14 +0100
Subject: tracing: Add full state to trace_seq

The trace_seq buffer might fill up, and right now one needs to check the
return value of each printf into the buffer to check for that.

Instead, have the buffer keep track of whether it is full or not, and
reject more input if it is full or would have overflowed with an input
that wasn't added.

Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h   |  2 ++
 kernel/trace/trace_output.c | 61 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index fad6857bc0f3..5cf397ceb726 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -14,6 +14,7 @@ struct trace_seq {
 	unsigned char		buffer[PAGE_SIZE];
 	unsigned int		len;
 	unsigned int		readpos;
+	int			full;
 };
 
 static inline void
@@ -21,6 +22,7 @@ trace_seq_init(struct trace_seq *s)
 {
 	s->len = 0;
 	s->readpos = 0;
+	s->full = 0;
 }
 
 /*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e5cf90fef34e..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -93,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	va_list ap;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	va_start(ap, fmt);
@@ -101,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	va_end(ap);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -127,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	int len = (PAGE_SIZE - 1) - s->len;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -147,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	int len = (PAGE_SIZE - 1) - s->len;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -175,9 +181,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	int len = strlen(str);
 
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
 		return 0;
 
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return 0;
+	}
+
 	memcpy(s->buffer + s->len, str, len);
 	s->len += len;
 
@@ -186,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-	if (s->len >= (PAGE_SIZE - 1))
+	if (s->full)
 		return 0;
 
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
+		return 0;
+	}
+
 	s->buffer[s->len++] = c;
 
 	return 1;
@@ -196,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
 
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
 		return 0;
 
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return 0;
+	}
+
 	memcpy(s->buffer + s->len, mem, len);
 	s->len += len;
 
@@ -211,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
 	const unsigned char *data = mem;
 	int i, j;
 
+	if (s->full)
+		return 0;
+
 #ifdef __BIG_ENDIAN
 	for (i = 0, j = 0; i < len; i++) {
 #else
@@ -228,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
 {
 	void *ret;
 
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
+		return 0;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
 		return NULL;
+	}
 
 	ret = s->buffer + s->len;
 	s->len += len;
@@ -241,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 {
 	unsigned char *p;
 
-	if (s->len >= (PAGE_SIZE - 1))
+	if (s->full)
 		return 0;
+
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
+		return 0;
+	}
+
 	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
 	if (!IS_ERR(p)) {
 		p = mangle_path(s->buffer + s->len, p, "\n");
@@ -255,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 		return 1;
 	}
 
+	s->full = 1;
 	return 0;
 }
 
@@ -381,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 	unsigned long vmstart = 0;
 	int ret = 1;
 
+	if (s->full)
+		return 0;
+
 	if (mm) {
 		const struct vm_area_struct *vma;
 
-- 
cgit v1.2.3


From 876fba43cc810e3c37ce26995933f9547b83cb0e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 11 Nov 2009 15:22:15 +0100
Subject: ACPI: add const to acpi_check_resource_conflict()

acpi_check_resource_conflict() doesn't change the resource
it operates on, so the res parameter can be marked const.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c   | 2 +-
 include/linux/acpi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 7c1c59ea9ec6..02e8464e480f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1118,7 +1118,7 @@ __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
 
 /* Check for resource conflicts between ACPI OperationRegions and native
  * drivers */
-int acpi_check_resource_conflict(struct resource *res)
+int acpi_check_resource_conflict(const struct resource *res)
 {
 	struct acpi_res_list *res_list_elem;
 	int ioport;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..c920d2def4d3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -240,7 +240,7 @@ extern int pnpacpi_disabled;
 #define PXM_INVAL	(-1)
 #define NID_INVAL	(-1)
 
-int acpi_check_resource_conflict(struct resource *res);
+int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
-- 
cgit v1.2.3


From 1bbfa6f25673019dc0acc9308b667c96f6cda8bf Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 9 Dec 2009 20:07:03 -0500
Subject: sched: Mark sched_clock() as notrace

The core ftrace code (trace_clock_local) calls sched_clock()
directly, so we don't want to recurisvely trigger the ftrace
code.  Rather than update every sched_clock() definition, tag
the prototype for everyone as notrace.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1260407223-10900-1-git-send-email-vapier@gentoo.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..576d838adf68 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1836,7 +1836,8 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 extern int sched_clock_stable;
 #endif
 
-extern unsigned long long sched_clock(void);
+/* ftrace calls sched_clock() directly */
+extern unsigned long long notrace sched_clock(void);
 
 extern void sched_clock_init(void);
 extern u64 sched_clock_cpu(int cpu);
-- 
cgit v1.2.3


From 41d2e494937715d3150e5c75d01f0e75ae899337 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Nov 2009 17:05:44 +0100
Subject: hrtimer: Tune hrtimer_interrupt hang logic

The hrtimer_interrupt hang logic adjusts min_delta_ns based on the
execution time of the hrtimer callbacks.

This is error-prone for virtual machines, where a guest vcpu can be
scheduled out during the execution of the callbacks (and the callbacks
themselves can do operations that translate to blocking operations in
the hypervisor), which in can lead to large min_delta_ns rendering the
system unusable.

Replace the current heuristics with something more reliable. Allow the
interrupt code to try 3 times to catch up with the lost time. If that
fails use the total time spent in the interrupt handler to defer the
next timer interrupt so the system can catch up with other things
which got delayed. Limit that deferment to 100ms.

The retry events and the maximum time spent in the interrupt handler
are recorded and exposed via /proc/timer_list

Inspired by a patch from Marcelo.

Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org
---
 include/linux/hrtimer.h  | 13 +++++--
 kernel/hrtimer.c         | 97 ++++++++++++++++++++++++++++--------------------
 kernel/time/timer_list.c |  5 ++-
 3 files changed, 70 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 9bace4b9f4fe..040b6796ab4d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -162,10 +162,11 @@ struct hrtimer_clock_base {
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @hres_active:	State of high resolution mode
- * @check_clocks:	Indictator, when set evaluate time source and clock
- *			event devices whether high resolution mode can be
- *			activated.
- * @nr_events:		Total number of timer interrupt events
+ * @hang_detected:	The last hrtimer interrupt detected a hang
+ * @nr_events:		Total number of hrtimer interrupt events
+ * @nr_retries:		Total number of hrtimer interrupt retries
+ * @nr_hangs:		Total number of hrtimer interrupt hangs
+ * @max_hang_time:	Maximum time spent in hrtimer_interrupt
  */
 struct hrtimer_cpu_base {
 	spinlock_t			lock;
@@ -173,7 +174,11 @@ struct hrtimer_cpu_base {
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
+	int				hang_detected;
 	unsigned long			nr_events;
+	unsigned long			nr_retries;
+	unsigned long			nr_hangs;
+	ktime_t				max_hang_time;
 #endif
 };
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ede527708123..931a4d99bc55 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 static int hrtimer_reprogram(struct hrtimer *timer,
 			     struct hrtimer_clock_base *base)
 {
-	ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
+	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 	ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 	int res;
 
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
 	if (expires.tv64 < 0)
 		return -ETIME;
 
-	if (expires.tv64 >= expires_next->tv64)
+	if (expires.tv64 >= cpu_base->expires_next.tv64)
+		return 0;
+
+	/*
+	 * If a hang was detected in the last timer interrupt then we
+	 * do not schedule a timer which is earlier than the expiry
+	 * which we enforced in the hang detection. We want the system
+	 * to make progress.
+	 */
+	if (cpu_base->hang_detected)
 		return 0;
 
 	/*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
 	 */
 	res = tick_program_event(expires, 0);
 	if (!IS_ERR_VALUE(res))
-		*expires_next = expires;
+		cpu_base->expires_next = expires;
 	return res;
 }
 
@@ -1217,30 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 
 #ifdef CONFIG_HIGH_RES_TIMERS
 
-static int force_clock_reprogram;
-
-/*
- * After 5 iteration's attempts, we consider that hrtimer_interrupt()
- * is hanging, which could happen with something that slows the interrupt
- * such as the tracing. Then we force the clock reprogramming for each future
- * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
- * threshold that we will overwrite.
- * The next tick event will be scheduled to 3 times we currently spend on
- * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
- * 1/4 of their time to process the hrtimer interrupts. This is enough to
- * let it running without serious starvation.
- */
-
-static inline void
-hrtimer_interrupt_hanging(struct clock_event_device *dev,
-			ktime_t try_time)
-{
-	force_clock_reprogram = 1;
-	dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
-	printk(KERN_WARNING "hrtimer: interrupt too slow, "
-	       "forcing clock min delta to %llu ns\n",
-	       (unsigned long long) dev->min_delta_ns);
-}
 /*
  * High resolution timer interrupt
  * Called with interrupts disabled
@@ -1249,21 +1234,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 {
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 	struct hrtimer_clock_base *base;
-	ktime_t expires_next, now;
-	int nr_retries = 0;
-	int i;
+	ktime_t expires_next, now, entry_time, delta;
+	int i, retries = 0;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
 	dev->next_event.tv64 = KTIME_MAX;
 
- retry:
-	/* 5 retries is enough to notice a hang */
-	if (!(++nr_retries % 5))
-		hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
-
-	now = ktime_get();
-
+	entry_time = now = ktime_get();
+retry:
 	expires_next.tv64 = KTIME_MAX;
 
 	spin_lock(&cpu_base->lock);
@@ -1325,10 +1304,48 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	spin_unlock(&cpu_base->lock);
 
 	/* Reprogramming necessary ? */
-	if (expires_next.tv64 != KTIME_MAX) {
-		if (tick_program_event(expires_next, force_clock_reprogram))
-			goto retry;
+	if (expires_next.tv64 == KTIME_MAX ||
+	    !tick_program_event(expires_next, 0)) {
+		cpu_base->hang_detected = 0;
+		return;
 	}
+
+	/*
+	 * The next timer was already expired due to:
+	 * - tracing
+	 * - long lasting callbacks
+	 * - being scheduled away when running in a VM
+	 *
+	 * We need to prevent that we loop forever in the hrtimer
+	 * interrupt routine. We give it 3 attempts to avoid
+	 * overreacting on some spurious event.
+	 */
+	now = ktime_get();
+	cpu_base->nr_retries++;
+	if (++retries < 3)
+		goto retry;
+	/*
+	 * Give the system a chance to do something else than looping
+	 * here. We stored the entry time, so we know exactly how long
+	 * we spent here. We schedule the next event this amount of
+	 * time away.
+	 */
+	cpu_base->nr_hangs++;
+	cpu_base->hang_detected = 1;
+	delta = ktime_sub(now, entry_time);
+	if (delta.tv64 > cpu_base->max_hang_time.tv64)
+		cpu_base->max_hang_time = delta;
+	/*
+	 * Limit it to a sensible value as we enforce a longer
+	 * delay. Give the CPU at least 100ms to catch up.
+	 */
+	if (delta.tv64 > 100 * NSEC_PER_MSEC)
+		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+	else
+		expires_next = ktime_add(now, delta);
+	tick_program_event(expires_next, 1);
+	printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+		    ktime_to_ns(delta));
 }
 
 /*
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 665c76edbf17..9d80db4747d4 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 	P_ns(expires_next);
 	P(hres_active);
 	P(nr_events);
+	P(nr_retries);
+	P(nr_hangs);
+	P_ns(max_hang_time);
 #endif
 #undef P
 #undef P_ns
@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.4\n");
+	SEQ_printf(m, "Timer List Version: v0.5\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
-- 
cgit v1.2.3


From 5f201907dfe4ad42c44006ddfcec00ed12e59497 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 10 Dec 2009 10:56:29 +0100
Subject: hrtimer: move timer stats helper functions to hrtimer.c

There is no reason to make timer_stats_hrtimer_set_start_info and
friends visible to the rest of the kernel. So move all of them to
hrtimer.c.  Also make timer_stats_hrtimer_set_start_info a static
inline function so it gets inlined and we avoid another function call.
Based on a patch by Thomas Gleixner.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <20091210095629.GC4144@osiris.boeblingen.de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 43 -------------------------------------------
 kernel/hrtimer.c        | 24 ++++++++++++++++++++----
 2 files changed, 20 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 040b6796ab4d..af634e95871d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -440,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div);
 /* Show pending timers: */
 extern void sysrq_timer_list_show(void);
 
-/*
- * Timer-statistics info:
- */
-#ifdef CONFIG_TIMER_STATS
-
-extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-				     void *timerf, char *comm,
-				     unsigned int timer_flag);
-
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-	if (likely(!timer_stats_active))
-		return;
-	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-				 timer->function, timer->start_comm, 0);
-}
-
-extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
-						 void *addr);
-
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-	__timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-	timer->start_site = NULL;
-}
-#else
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-}
-
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-}
-#endif
-
 #endif
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 931a4d99bc55..d2f9239dc6ba 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -756,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
+static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
 {
+#ifdef CONFIG_TIMER_STATS
 	if (timer->start_site)
 		return;
-
-	timer->start_site = addr;
+	timer->start_site = __builtin_return_address(0);
 	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 	timer->start_pid = current->pid;
+#endif
 }
+
+static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+	timer->start_site = NULL;
 #endif
+}
+
+static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+	if (likely(!timer_stats_active))
+		return;
+	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+				 timer->function, timer->start_comm, 0);
+#endif
+}
 
 /*
  * Counterpart to lock_hrtimer_base above:
-- 
cgit v1.2.3


From 94004ed726f38a841cc51f97c4a3f9eda9fbd0d9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 30 Sep 2009 22:16:33 +0200
Subject: kill wait_on_page_writeback_range

All callers really want the more logical filemap_fdatawait_range interface,
so convert them to use it and merge wait_on_page_writeback_range into
filemap_fdatawait_range.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/vfs.txt |  2 +-
 fs/jbd2/commit.c                  |  2 +-
 fs/sync.c                         |  8 ++-----
 include/linux/fs.h                |  2 --
 mm/filemap.c                      | 49 +++++++++++----------------------------
 5 files changed, 18 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 623f094c9d8d..3de2f32edd90 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -472,7 +472,7 @@ __sync_single_inode) to check if ->writepages has been successful in
 writing out the whole address_space.
 
 The Writeback tag is used by filemap*wait* and sync_page* functions,
-via wait_on_page_writeback_range, to wait for all writeback to
+via filemap_fdatawait_range, to wait for all writeback to
 complete.  While waiting ->sync_page (if defined) will be called on
 each page that is found to require writeback.
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index d4cfd6d2779e..c5edc13ccdd0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,7 +286,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 		if (err) {
 			/*
 			 * Because AS_EIO is cleared by
-			 * wait_on_page_writeback_range(), set it again so
+			 * filemap_fdatawait_range(), set it again so
 			 * that user process can get -EIO from fsync().
 			 */
 			set_bit(AS_EIO,
diff --git a/fs/sync.c b/fs/sync.c
index b75ca68dc081..36752a683481 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -453,9 +453,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
 
 	ret = 0;
 	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
-		ret = wait_on_page_writeback_range(mapping,
-					offset >> PAGE_CACHE_SHIFT,
-					endbyte >> PAGE_CACHE_SHIFT);
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
 		if (ret < 0)
 			goto out;
 	}
@@ -468,9 +466,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
 	}
 
 	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
-		ret = wait_on_page_writeback_range(mapping,
-					offset >> PAGE_CACHE_SHIFT,
-					endbyte >> PAGE_CACHE_SHIFT);
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
 	}
 out:
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 891f7d642e5c..a057f48eb156 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2091,8 +2091,6 @@ extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
-extern int wait_on_page_writeback_range(struct address_space *mapping,
-				pgoff_t start, pgoff_t end);
 extern int __filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end, int sync_mode);
 extern int filemap_fdatawrite_range(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index c3d3506ecaba..8b4d88f9249e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -260,27 +260,27 @@ int filemap_flush(struct address_space *mapping)
 EXPORT_SYMBOL(filemap_flush);
 
 /**
- * wait_on_page_writeback_range - wait for writeback to complete
- * @mapping:	target address_space
- * @start:	beginning page index
- * @end:	ending page index
+ * filemap_fdatawait_range - wait for writeback to complete
+ * @mapping:		address space structure to wait for
+ * @start_byte:		offset in bytes where the range starts
+ * @end_byte:		offset in bytes where the range ends (inclusive)
  *
- * Wait for writeback to complete against pages indexed by start->end
- * inclusive
+ * Walk the list of under-writeback pages of the given address space
+ * in the given range and wait for all of them.
  */
-int wait_on_page_writeback_range(struct address_space *mapping,
-				pgoff_t start, pgoff_t end)
+int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
+			    loff_t end_byte)
 {
+	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
 	struct pagevec pvec;
 	int nr_pages;
 	int ret = 0;
-	pgoff_t index;
 
-	if (end < start)
+	if (end_byte < start_byte)
 		return 0;
 
 	pagevec_init(&pvec, 0);
-	index = start;
 	while ((index <= end) &&
 			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 			PAGECACHE_TAG_WRITEBACK,
@@ -310,25 +310,6 @@ int wait_on_page_writeback_range(struct address_space *mapping,
 
 	return ret;
 }
-
-/**
- * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
- * @mapping: address space structure to wait for
- * @start:	offset in bytes where the range starts
- * @end:	offset in bytes where the range ends (inclusive)
- *
- * Walk the list of under-writeback pages of the given address space
- * in the given range and wait for all of them.
- *
- * This is just a simple wrapper so that callers don't have to convert offsets
- * to page indexes themselves
- */
-int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
-			    loff_t end)
-{
-	return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
-					    end >> PAGE_CACHE_SHIFT);
-}
 EXPORT_SYMBOL(filemap_fdatawait_range);
 
 /**
@@ -345,8 +326,7 @@ int filemap_fdatawait(struct address_space *mapping)
 	if (i_size == 0)
 		return 0;
 
-	return wait_on_page_writeback_range(mapping, 0,
-				(i_size - 1) >> PAGE_CACHE_SHIFT);
+	return filemap_fdatawait_range(mapping, 0, i_size - 1);
 }
 EXPORT_SYMBOL(filemap_fdatawait);
 
@@ -393,9 +373,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 						 WB_SYNC_ALL);
 		/* See comment of filemap_write_and_wait() */
 		if (err != -EIO) {
-			int err2 = wait_on_page_writeback_range(mapping,
-						lstart >> PAGE_CACHE_SHIFT,
-						lend >> PAGE_CACHE_SHIFT);
+			int err2 = filemap_fdatawait_range(mapping,
+						lstart, lend);
 			if (!err)
 				err = err2;
 		}
-- 
cgit v1.2.3


From 1472da5fdc65f0cd286c655758d629346001e126 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 16 Oct 2009 15:26:03 +0400
Subject: const: struct quota_format_ops

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ocfs2/quota_local.c | 2 +-
 fs/quota/quota_v1.c    | 2 +-
 fs/quota/quota_v2.c    | 2 +-
 include/linux/quota.h  | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 1a2c50a759fa..21f9e71223ca 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1325,7 +1325,7 @@ out:
 	return status;
 }
 
-static struct quota_format_ops ocfs2_format_ops = {
+static const struct quota_format_ops ocfs2_format_ops = {
 	.check_quota_file	= ocfs2_local_check_quota_file,
 	.read_file_info		= ocfs2_local_read_info,
 	.write_file_info	= ocfs2_global_write_info,
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 0edcf42b1778..2ae757e9c008 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -204,7 +204,7 @@ out:
 	return ret;
 }
 
-static struct quota_format_ops v1_format_ops = {
+static const struct quota_format_ops v1_format_ops = {
 	.check_quota_file	= v1_check_quota_file,
 	.read_file_info		= v1_read_file_info,
 	.write_file_info	= v1_write_file_info,
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a5475fb1ae44..01f25eae684d 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -207,7 +207,7 @@ static int v2_free_file_info(struct super_block *sb, int type)
 	return 0;
 }
 
-static struct quota_format_ops v2_format_ops = {
+static const struct quota_format_ops v2_format_ops = {
 	.check_quota_file	= v2_check_quota_file,
 	.read_file_info		= v2_read_file_info,
 	.write_file_info	= v2_write_file_info,
diff --git a/include/linux/quota.h b/include/linux/quota.h
index ce9a9b2e5cd4..f63c9d6ba784 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -334,7 +334,7 @@ struct quotactl_ops {
 
 struct quota_format_type {
 	int qf_fmt_id;	/* Quota format id */
-	struct quota_format_ops *qf_ops;	/* Operations of format */
+	const struct quota_format_ops *qf_ops;	/* Operations of format */
 	struct module *qf_owner;		/* Module implementing quota format */
 	struct quota_format_type *qf_next;
 };
@@ -394,7 +394,7 @@ struct quota_info {
 	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
-	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
+	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
 };
 
 int register_quota_format(struct quota_format_type *fmt);
-- 
cgit v1.2.3


From ad888a1f07a72fc7d19286b4ce5c154172a06eed Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Wed, 18 Nov 2009 17:10:56 +0100
Subject: ext2: Explicitly assign values to on-disk enum of filetypes

It is somewhat dangerous to use a straight enum here, because this will
reassign values of later variables if one of the earlier ones is removed.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Cc: Andreas Dilger <adilger@sun.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/ext2_fs.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 121720d74e15..2dfa7076e8b6 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -565,14 +565,14 @@ struct ext2_dir_entry_2 {
  * other bits are reserved for now.
  */
 enum {
-	EXT2_FT_UNKNOWN,
-	EXT2_FT_REG_FILE,
-	EXT2_FT_DIR,
-	EXT2_FT_CHRDEV,
-	EXT2_FT_BLKDEV,
-	EXT2_FT_FIFO,
-	EXT2_FT_SOCK,
-	EXT2_FT_SYMLINK,
+	EXT2_FT_UNKNOWN		= 0,
+	EXT2_FT_REG_FILE	= 1,
+	EXT2_FT_DIR		= 2,
+	EXT2_FT_CHRDEV		= 3,
+	EXT2_FT_BLKDEV		= 4,
+	EXT2_FT_FIFO		= 5,
+	EXT2_FT_SOCK		= 6,
+	EXT2_FT_SYMLINK		= 7,
 	EXT2_FT_MAX
 };
 
-- 
cgit v1.2.3


From 4cf46b67eb6de94532c1bea11d2479d085229d0e Mon Sep 17 00:00:00 2001
From: Alexey Fisher <bug-track@fisher-privat.net>
Date: Sun, 22 Nov 2009 20:38:55 +0100
Subject: ext3: Unify log messages in ext3

Make messages produced by ext3 more unified. It should be
easy to parse.

dmesg before patch:
[ 4893.684892] reservations ON
[ 4893.684896] xip option not supported
[ 4893.684964] EXT3-fs warning: maximal mount count reached, running
e2fsck is recommended

dmesg after patch:
[  873.300792] EXT3-fs (loop0): using internal journaln
[  873.300796] EXT3-fs (loop0): mounted filesystem with writeback data mode
[  924.163657] EXT3-fs (loop0): error: can't find ext3 filesystem on dev loop0.
[  723.755642] EXT3-fs (loop0): error: bad blocksize 8192
[  357.874687] EXT3-fs (loop0): error: no journal found. mounting ext3 over ext2?
[  873.300764] EXT3-fs (loop0): warning: maximal mount count reached, running e2fsck is recommended
[  924.163657] EXT3-fs (loop0): error: can't find ext3 filesystem on dev loop0.

Signed-off-by: Alexey Fisher <bug-track@fisher-privat.net>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/super.c         | 432 +++++++++++++++++++++++++-----------------------
 include/linux/ext3_fs.h |   2 +
 2 files changed, 229 insertions(+), 205 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c1d128d765b6..46433b30e45f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -135,12 +135,24 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
 	if (is_handle_aborted(handle))
 		return;
 
-	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
-	       caller, errstr, err_fn);
+	printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
+		caller, errstr, err_fn);
 
 	journal_abort_handle(handle);
 }
 
+void ext3_msg(struct super_block *sb, const char *prefix,
+		const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
  *
@@ -174,12 +186,13 @@ static void ext3_handle_error(struct super_block *sb)
 			journal_abort(journal, -EIO);
 	}
 	if (test_opt (sb, ERRORS_RO)) {
-		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		ext3_msg(sb, KERN_CRIT,
+			"error: remounting filesystem read-only");
 		sb->s_flags |= MS_RDONLY;
 	}
 	ext3_commit_super(sb, es, 1);
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs (device %s): panic forced after error\n",
+		panic("EXT3-fs (%s): panic forced after error\n",
 			sb->s_id);
 }
 
@@ -247,8 +260,7 @@ void __ext3_std_error (struct super_block * sb, const char * function,
 		return;
 
 	errstr = ext3_decode_error(sb, errno, nbuf);
-	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
-		sb->s_id, function, errstr);
+	ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
 
 	ext3_handle_error(sb);
 }
@@ -268,21 +280,20 @@ void ext3_abort (struct super_block * sb, const char * function,
 {
 	va_list args;
 
-	printk (KERN_CRIT "ext3_abort called.\n");
-
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs panic from previous error\n");
+		panic("EXT3-fs: panic from previous error\n");
 
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	ext3_msg(sb, KERN_CRIT,
+		"error: remounting filesystem read-only");
 	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
 	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
@@ -296,7 +307,7 @@ void ext3_warning (struct super_block * sb, const char * function,
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+	printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
 	       sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
@@ -310,10 +321,10 @@ void ext3_update_dynamic_rev(struct super_block *sb)
 	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
 		return;
 
-	ext3_warning(sb, __func__,
-		     "updating to rev %d because of new feature flag, "
-		     "running e2fsck is recommended",
-		     EXT3_DYNAMIC_REV);
+	ext3_msg(sb, KERN_WARNING,
+		"warning: updating to rev %d because of "
+		"new feature flag, running e2fsck is recommended",
+		EXT3_DYNAMIC_REV);
 
 	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
 	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
@@ -331,7 +342,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
 /*
  * Open the external journal device
  */
-static struct block_device *ext3_blkdev_get(dev_t dev)
+static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -342,8 +353,9 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
-			__bdevname(dev, b), PTR_ERR(bdev));
+	ext3_msg(sb, "error: failed to open journal device %s: %ld",
+		__bdevname(dev, b), PTR_ERR(bdev));
+
 	return NULL;
 }
 
@@ -378,13 +390,13 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 {
 	struct list_head *l;
 
-	printk(KERN_ERR "sb orphan head is %d\n",
+	ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
 	       le32_to_cpu(sbi->s_es->s_last_orphan));
 
-	printk(KERN_ERR "sb_info orphan list:\n");
+	ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
 	list_for_each(l, &sbi->s_orphan) {
 		struct inode *inode = orphan_list_entry(l);
-		printk(KERN_ERR "  "
+		ext3_msg(sb, KERN_ERR, "  "
 		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 		       inode->i_sb->s_id, inode->i_ino, inode,
 		       inode->i_mode, inode->i_nlink,
@@ -849,7 +861,7 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
-static ext3_fsblk_t get_sb_block(void **data)
+static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
 {
 	ext3_fsblk_t	sb_block;
 	char		*options = (char *) *data;
@@ -860,7 +872,7 @@ static ext3_fsblk_t get_sb_block(void **data)
 	/*todo: use simple_strtoll with >32bit ext3 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
-		printk("EXT3-fs: Invalid sb specification: %s\n",
+		ext3_msg(sb, "error: invalid sb specification: %s",
 		       (char *) *data);
 		return 1;
 	}
@@ -960,7 +972,8 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk("EXT3 (no)user_xattr options not supported\n");
+			ext3_msg(sb, KERN_INFO,
+				"(no)user_xattr options not supported");
 			break;
 #endif
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
@@ -973,7 +986,8 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk("EXT3 (no)acl options not supported\n");
+			ext3_msg(sb, KERN_INFO,
+				"(no)acl options not supported");
 			break;
 #endif
 		case Opt_reservation:
@@ -989,16 +1003,16 @@ static int parse_options (char *options, struct super_block *sb,
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
-				       "journal on remount\n");
+				ext3_msg(sb, KERN_ERR, "error: cannot specify "
+					"journal on remount");
 				return 0;
 			}
 			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
 			break;
 		case Opt_journal_inum:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
-				       "journal on remount\n");
+				ext3_msg(sb, KERN_ERR, "error: cannot specify "
+				       "journal on remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option))
@@ -1007,8 +1021,8 @@ static int parse_options (char *options, struct super_block *sb,
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
-				       "journal on remount\n");
+				ext3_msg(sb, KERN_ERR, "error: cannot specify "
+				       "journal on remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option))
@@ -1040,12 +1054,11 @@ static int parse_options (char *options, struct super_block *sb,
 				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
 						== data_opt)
 					break;
-				printk(KERN_ERR
-					"EXT3-fs (device %s): Cannot change "
+				ext3_msg(sb, KERN_ERR,
+					"error: cannot change "
 					"data mode on remount. The filesystem "
 					"is mounted in data=%s mode and you "
-					"try to remount it in data=%s mode.\n",
-					sb->s_id,
+					"try to remount it in data=%s mode.",
 					data_mode_string(sbi->s_mount_opt &
 							EXT3_MOUNT_DATA_FLAGS),
 					data_mode_string(data_opt));
@@ -1070,31 +1083,31 @@ static int parse_options (char *options, struct super_block *sb,
 set_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR
-					"EXT3-fs: Cannot change journaled "
-					"quota options when quota turned on.\n");
+				ext3_msg(sb, KERN_ERR,
+					"error: cannot change journaled "
+					"quota options when quota turned on.");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
-				printk(KERN_ERR
-					"EXT3-fs: not enough memory for "
-					"storing quotafile name.\n");
+				ext3_msg(sb, KERN_ERR,
+					"error: not enough memory for "
+					"storing quotafile name.");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				printk(KERN_ERR
-					"EXT3-fs: %s quota file already "
-					"specified.\n", QTYPE2NAME(qtype));
+				ext3_msg(sb, KERN_ERR,
+					"error: %s quota file already "
+					"specified.", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
 			}
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				printk(KERN_ERR
-					"EXT3-fs: quotafile must be on "
-					"filesystem root.\n");
+				ext3_msg(sb, KERN_ERR,
+					"error: quotafile must be on "
+					"filesystem root.");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
@@ -1109,9 +1122,9 @@ set_qf_name:
 clear_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR "EXT3-fs: Cannot change "
+				ext3_msg(sb, KERN_ERR, "error: cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on.");
 				return 0;
 			}
 			/*
@@ -1128,9 +1141,9 @@ clear_qf_name:
 set_qf_format:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
-				printk(KERN_ERR "EXT3-fs: Cannot change "
+				ext3_msg(sb, KERN_ERR, "error: cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on.");
 				return 0;
 			}
 			sbi->s_jquota_fmt = qfmt;
@@ -1146,8 +1159,8 @@ set_qf_format:
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_loaded(sb)) {
-				printk(KERN_ERR "EXT3-fs: Cannot change quota "
-					"options when quota turned on.\n");
+				ext3_msg(sb, KERN_ERR, "error: cannot change "
+					"quota options when quota turned on.");
 				return 0;
 			}
 			clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1158,8 +1171,8 @@ set_qf_format:
 		case Opt_quota:
 		case Opt_usrquota:
 		case Opt_grpquota:
-			printk(KERN_ERR
-				"EXT3-fs: quota options not supported.\n");
+			ext3_msg(sb, KERN_ERR,
+				"error: quota options not supported.");
 			break;
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -1167,9 +1180,9 @@ set_qf_format:
 		case Opt_offgrpjquota:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
-			printk(KERN_ERR
-				"EXT3-fs: journaled quota options not "
-				"supported.\n");
+			ext3_msg(sb, KERN_ERR,
+				"error: journaled quota options not "
+				"supported.");
 			break;
 		case Opt_noquota:
 			break;
@@ -1189,8 +1202,9 @@ set_qf_format:
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT3-fs: resize option only available "
-					"for remount\n");
+				ext3_msg(sb, KERN_ERR,
+					"error: resize option only available "
+					"for remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option) != 0)
@@ -1204,9 +1218,9 @@ set_qf_format:
 			clear_opt(sbi->s_mount_opt, NOBH);
 			break;
 		default:
-			printk (KERN_ERR
-				"EXT3-fs: Unrecognized mount option \"%s\" "
-				"or missing value\n", p);
+			ext3_msg(sb, KERN_ERR,
+				"error: unrecognized mount option \"%s\" "
+				"or missing value", p);
 			return 0;
 		}
 	}
@@ -1224,21 +1238,21 @@ set_qf_format:
 				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
 				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT3-fs: old and new quota "
-					"format mixing.\n");
+			ext3_msg(sb, KERN_ERR, "error: old and new quota "
+					"format mixing.");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journaled quota format "
-					"not specified.\n");
+			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
+					"not specified.");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journaled quota format "
+			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
 					"specified with no journaling "
-					"enabled.\n");
+					"enabled.");
 			return 0;
 		}
 	}
@@ -1253,31 +1267,33 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
 	int res = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
-		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
-			"forcing read-only mode\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: revision level too high, "
+			"forcing read-only mode");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
 	if (!(sbi->s_mount_state & EXT3_VALID_FS))
-		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
-			"running e2fsck is recommended\n");
+		ext3_msg(sb, KERN_WARNING,
+			"warning: mounting unchecked fs, "
+			"running e2fsck is recommended");
 	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
-		printk (KERN_WARNING
-			"EXT3-fs warning: mounting fs with errors, "
-			"running e2fsck is recommended\n");
+		ext3_msg(sb, KERN_WARNING,
+			"warning: mounting fs with errors, "
+			"running e2fsck is recommended");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-		printk (KERN_WARNING
-			"EXT3-fs warning: maximal mount count reached, "
-			"running e2fsck is recommended\n");
+		ext3_msg(sb, KERN_WARNING,
+			"warning: maximal mount count reached, "
+			"running e2fsck is recommended");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-		printk (KERN_WARNING
-			"EXT3-fs warning: checktime reached, "
-			"running e2fsck is recommended\n");
+		ext3_msg(sb, KERN_WARNING,
+			"warning: checktime reached, "
+			"running e2fsck is recommended");
 #if 0
 		/* @@@ We _will_ want to clear the valid bit if we find
                    inconsistencies, to force a fsck at reboot.  But for
@@ -1294,22 +1310,20 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
 
 	ext3_commit_super(sb, es, 1);
 	if (test_opt(sb, DEBUG))
-		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
-				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
+		ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
+				"bpg=%lu, ipg=%lu, mo=%04lx]",
 			sb->s_blocksize,
 			sbi->s_groups_count,
 			EXT3_BLOCKS_PER_GROUP(sb),
 			EXT3_INODES_PER_GROUP(sb),
 			sbi->s_mount_opt);
 
-	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
 	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
 		char b[BDEVNAME_SIZE];
-
-		printk("external journal on %s\n",
+		ext3_msg(sb, KERN_INFO, "using external journal on %s",
 			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
 	} else {
-		printk("internal journal\n");
+		ext3_msg(sb, KERN_INFO, "using internal journal");
 	}
 	return res;
 }
@@ -1403,8 +1417,8 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	}
 
 	if (bdev_read_only(sb->s_bdev)) {
-		printk(KERN_ERR "EXT3-fs: write access "
-			"unavailable, skipping orphan cleanup.\n");
+		ext3_msg(sb, KERN_ERR, "error: write access "
+			"unavailable, skipping orphan cleanup.");
 		return;
 	}
 
@@ -1418,8 +1432,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
-		       sb->s_id);
+		ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
 		sb->s_flags &= ~MS_RDONLY;
 	}
 #ifdef CONFIG_QUOTA
@@ -1430,9 +1443,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		if (EXT3_SB(sb)->s_qf_names[i]) {
 			int ret = ext3_quota_on_mount(sb, i);
 			if (ret < 0)
-				printk(KERN_ERR
-					"EXT3-fs: Cannot turn on journaled "
-					"quota: error %d\n", ret);
+				ext3_msg(sb, KERN_ERR,
+					"error: cannot turn on journaled "
+					"quota: %d", ret);
 		}
 	}
 #endif
@@ -1470,11 +1483,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 #define PLURAL(x) (x), ((x)==1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
-		       sb->s_id, PLURAL(nr_orphans));
+		ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+		       PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
-		       sb->s_id, PLURAL(nr_truncates));
+		ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+		       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
 	for (i = 0; i < MAXQUOTAS; i++) {
@@ -1558,7 +1571,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	struct ext3_super_block *es = NULL;
 	struct ext3_sb_info *sbi;
 	ext3_fsblk_t block;
-	ext3_fsblk_t sb_block = get_sb_block(&data);
+	ext3_fsblk_t sb_block = get_sb_block(&data, sb);
 	ext3_fsblk_t logic_sb_block;
 	unsigned long offset = 0;
 	unsigned int journal_inum = 0;
@@ -1594,7 +1607,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+		ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
 		goto out_fail;
 	}
 
@@ -1610,7 +1623,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (!(bh = sb_bread(sb, logic_sb_block))) {
-		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+		ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
 		goto out_fail;
 	}
 	/*
@@ -1669,9 +1682,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-		printk(KERN_WARNING
-		       "EXT3-fs warning: feature flags set on rev 0 fs, "
-		       "running e2fsck is recommended\n");
+		ext3_msg(sb, KERN_WARNING,
+			"warning: feature flags set on rev 0 fs, "
+			"running e2fsck is recommended");
 	/*
 	 * Check feature flags regardless of the revision level, since we
 	 * previously didn't change the revision level when setting the flags,
@@ -1679,25 +1692,25 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 */
 	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
-		       "unsupported optional features (%x).\n",
-		       sb->s_id, le32_to_cpu(features));
+		ext3_msg(sb, KERN_ERR,
+			"error: couldn't mount because of unsupported "
+			"optional features (%x)", le32_to_cpu(features));
 		goto failed_mount;
 	}
 	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
-		       "unsupported optional features (%x).\n",
-		       sb->s_id, le32_to_cpu(features));
+		ext3_msg(sb, KERN_ERR,
+			"error: couldn't mount RDWR because of unsupported "
+			"optional features (%x)", le32_to_cpu(features));
 		goto failed_mount;
 	}
 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
 
 	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
 	    blocksize > EXT3_MAX_BLOCK_SIZE) {
-		printk(KERN_ERR
-		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
-		       blocksize, sb->s_id);
+		ext3_msg(sb, KERN_ERR,
+			"error: couldn't mount because of unsupported "
+			"filesystem blocksize %d", blocksize);
 		goto failed_mount;
 	}
 
@@ -1708,30 +1721,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		 * than the hardware sectorsize for the machine.
 		 */
 		if (blocksize < hblock) {
-			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
-			       "device blocksize %d.\n", blocksize, hblock);
+			ext3_msg(sb, KERN_ERR,
+				"error: fsblocksize %d too small for "
+				"hardware sectorsize %d", blocksize, hblock);
 			goto failed_mount;
 		}
 
 		brelse (bh);
 		if (!sb_set_blocksize(sb, blocksize)) {
-			printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
-				blocksize);
+			ext3_msg(sb, KERN_ERR,
+				"error: bad blocksize %d", blocksize);
 			goto out_fail;
 		}
 		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
 		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
-			printk(KERN_ERR
-			       "EXT3-fs: Can't read superblock on 2nd try.\n");
+			ext3_msg(sb, KERN_ERR,
+			       "error: can't read superblock on 2nd try");
 			goto failed_mount;
 		}
 		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
-			printk (KERN_ERR
-				"EXT3-fs: Magic mismatch, very weird !\n");
+			ext3_msg(sb, KERN_ERR,
+				"error: magic mismatch");
 			goto failed_mount;
 		}
 	}
@@ -1747,8 +1761,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
-			printk (KERN_ERR
-				"EXT3-fs: unsupported inode size: %d\n",
+			ext3_msg(sb, KERN_ERR,
+				"error: unsupported inode size: %d",
 				sbi->s_inode_size);
 			goto failed_mount;
 		}
@@ -1756,8 +1770,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
 				   le32_to_cpu(es->s_log_frag_size);
 	if (blocksize != sbi->s_frag_size) {
-		printk(KERN_ERR
-		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+		ext3_msg(sb, KERN_ERR,
+		       "error: fragsize %lu != blocksize %u (unsupported)",
 		       sbi->s_frag_size, blocksize);
 		goto failed_mount;
 	}
@@ -1793,31 +1807,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT3-fs: #blocks per group too big: %lu\n",
+		ext3_msg(sb, KERN_ERR,
+			"#blocks per group too big: %lu",
 			sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_frags_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT3-fs: #fragments per group too big: %lu\n",
+		ext3_msg(sb, KERN_ERR,
+			"error: #fragments per group too big: %lu",
 			sbi->s_frags_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT3-fs: #inodes per group too big: %lu\n",
+		ext3_msg(sb, KERN_ERR,
+			"error: #inodes per group too big: %lu",
 			sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (le32_to_cpu(es->s_blocks_count) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
-			" too large to mount safely\n", sb->s_id);
+		ext3_msg(sb, KERN_ERR,
+			"error: filesystem is too large to mount safely");
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
-					"enabled\n");
+			ext3_msg(sb, KERN_ERR,
+				"error: CONFIG_LBDAF not enabled");
 		goto failed_mount;
 	}
 
@@ -1831,7 +1845,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: not enough memory");
 		goto failed_mount;
 	}
 
@@ -1841,14 +1856,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk (KERN_ERR "EXT3-fs: "
-				"can't read group descriptor %d\n", i);
+			ext3_msg(sb, KERN_ERR,
+				"error: can't read group descriptor %d", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
 	if (!ext3_check_descriptors (sb)) {
-		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: group descriptors corrupted");
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
@@ -1866,7 +1882,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 				ext3_count_dirs(sb));
 	}
 	if (err) {
-		printk(KERN_ERR "EXT3-fs: insufficient memory\n");
+		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
 		goto failed_mount3;
 	}
 
@@ -1914,9 +1930,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 			goto failed_mount3;
 	} else {
 		if (!silent)
-			printk (KERN_ERR
-				"ext3: No journal on filesystem on %s\n",
-				sb->s_id);
+			ext3_msg(sb, KERN_ERR,
+				"error: no journal found. "
+				"mounting ext3 over ext2?");
 		goto failed_mount3;
 	}
 
@@ -1938,8 +1954,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	case EXT3_MOUNT_WRITEBACK_DATA:
 		if (!journal_check_available_features
 		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT3-fs: Journal does not support "
-			       "requested data journaling mode\n");
+			ext3_msg(sb, KERN_ERR,
+				"error: journal does not support "
+				"requested data journaling mode");
 			goto failed_mount4;
 		}
 	default:
@@ -1948,8 +1965,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 	if (test_opt(sb, NOBH)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
-				"its supported only with writeback mode\n");
+			ext3_msg(sb, KERN_WARNING,
+				"warning: ignoring nobh option - "
+				"it is supported only with writeback mode");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
 	}
@@ -1960,18 +1978,18 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 	root = ext3_iget(sb, EXT3_ROOT_INO);
 	if (IS_ERR(root)) {
-		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+		ext3_msg(sb, KERN_ERR, "error: get root inode failed");
 		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		iput(root);
-		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+		ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
 		goto failed_mount4;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT3-fs: get root dentry failed\n");
+		ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
 		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount4;
@@ -1990,9 +2008,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	ext3_orphan_cleanup(sb, es);
 	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
 	if (needs_recovery)
-		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
+		ext3_msg(sb, KERN_INFO, "recovery complete");
 	ext3_mark_recovery_complete(sb, es);
-	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
+	ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
@@ -2002,7 +2020,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 cantfind_ext3:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+		ext3_msg(sb, KERN_INFO,
+			"error: can't find ext3 filesystem on dev %s.",
 		       sb->s_id);
 	goto failed_mount;
 
@@ -2070,27 +2089,27 @@ static journal_t *ext3_get_journal(struct super_block *sb,
 
 	journal_inode = ext3_iget(sb, journal_inum);
 	if (IS_ERR(journal_inode)) {
-		printk(KERN_ERR "EXT3-fs: no journal found.\n");
+		ext3_msg(sb, KERN_ERR, "error: no journal found");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+		ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (!S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+		ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+		ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
@@ -2112,13 +2131,13 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	struct ext3_super_block * es;
 	struct block_device *bdev;
 
-	bdev = ext3_blkdev_get(j_dev);
+	bdev = ext3_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
-		printk(KERN_ERR
-		        "EXT3: failed to claim external journal device.\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: failed to claim external journal device");
 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 		return NULL;
 	}
@@ -2126,8 +2145,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	blocksize = sb->s_blocksize;
 	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
-		printk(KERN_ERR
-			"EXT3-fs: blocksize too small for journal device.\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: blocksize too small for journal device");
 		goto out_bdev;
 	}
 
@@ -2135,8 +2154,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
-		       "external journal\n");
+		ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
+			"external journal");
 		goto out_bdev;
 	}
 
@@ -2144,14 +2163,14 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT3-fs: external journal has "
-					"bad superblock\n");
+		ext3_msg(sb, KERN_ERR, "error: external journal has "
+			"bad superblock");
 		brelse(bh);
 		goto out_bdev;
 	}
 
 	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+		ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -2163,19 +2182,21 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	journal = journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: failed to create device journal");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+		ext3_msg(sb, KERN_ERR, "I/O error on journal device");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT3-fs: External journal has more than one "
-					"user (unsupported) - %d\n",
+		ext3_msg(sb, KERN_ERR,
+			"error: external journal has more than one "
+			"user (unsupported) - %d",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
@@ -2201,8 +2222,8 @@ static int ext3_load_journal(struct super_block *sb,
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
-			"numbers have changed\n");
+		ext3_msg(sb, KERN_INFO, "external journal device major/minor "
+			"numbers have changed");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -2217,21 +2238,21 @@ static int ext3_load_journal(struct super_block *sb,
 
 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT3-fs: INFO: recovery "
-					"required on readonly filesystem.\n");
+			ext3_msg(sb, KERN_INFO,
+				"recovery required on readonly filesystem");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT3-fs: write access "
-					"unavailable, cannot proceed.\n");
+				ext3_msg(sb, KERN_ERR, "error: write access "
+					"unavailable, cannot proceed");
 				return -EROFS;
 			}
-			printk (KERN_INFO "EXT3-fs: write access will "
-					"be enabled during recovery.\n");
+			ext3_msg(sb, KERN_INFO,
+				"write access will be enabled during recovery");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
-		       "and inode journals!\n");
+		ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
+		       "and inode journals");
 		return -EINVAL;
 	}
 
@@ -2246,7 +2267,7 @@ static int ext3_load_journal(struct super_block *sb,
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+			ext3_msg(sb, KERN_ERR, "error updating journal");
 			journal_destroy(journal);
 			return err;
 		}
@@ -2258,7 +2279,7 @@ static int ext3_load_journal(struct super_block *sb,
 		err = journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+		ext3_msg(sb, KERN_ERR, "error loading journal");
 		journal_destroy(journal);
 		return err;
 	}
@@ -2277,16 +2298,17 @@ static int ext3_load_journal(struct super_block *sb,
 	return 0;
 }
 
-static int ext3_create_journal(struct super_block * sb,
-			       struct ext3_super_block * es,
+static int ext3_create_journal(struct super_block *sb,
+			       struct ext3_super_block *es,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
 	int err;
 
 	if (sb->s_flags & MS_RDONLY) {
-		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
-				"create journal.\n");
+		ext3_msg(sb, KERN_ERR,
+			"error: readonly filesystem when trying to "
+			"create journal");
 		return -EROFS;
 	}
 
@@ -2294,12 +2316,12 @@ static int ext3_create_journal(struct super_block * sb,
 	if (!journal)
 		return -EINVAL;
 
-	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+	ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
 	       journal_inum);
 
 	err = journal_create(journal);
 	if (err) {
-		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+		ext3_msg(sb, KERN_ERR, "error creating journal");
 		journal_destroy(journal);
 		return -EIO;
 	}
@@ -2380,8 +2402,8 @@ out:
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext3_clear_journal_err(struct super_block * sb,
-				   struct ext3_super_block * es)
+static void ext3_clear_journal_err(struct super_block *sb,
+				   struct ext3_super_block *es)
 {
 	journal_t *journal;
 	int j_errno;
@@ -2572,10 +2594,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			__le32 ret;
 			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
 					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
-				       "remount RDWR because of unsupported "
-				       "optional features (%x).\n",
-				       sb->s_id, le32_to_cpu(ret));
+				ext3_msg(sb, KERN_WARNING,
+					"warning: couldn't remount RDWR "
+					"because of unsupported optional "
+					"features (%x)", le32_to_cpu(ret));
 				err = -EROFS;
 				goto restore_opts;
 			}
@@ -2586,11 +2608,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 * require a full umount/remount for now.
 			 */
 			if (es->s_last_orphan) {
-				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+				ext3_msg(sb, KERN_WARNING, "warning: couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
-				       "umount/remount instead.\n",
-				       sb->s_id);
+				       "umount/remount instead.");
 				err = -EINVAL;
 				goto restore_opts;
 			}
@@ -2839,9 +2860,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	if (EXT3_SB(sb)->s_qf_names[type]) {
 		/* Quotafile not of fs root? */
 		if (path.dentry->d_parent != sb->s_root)
-			printk(KERN_WARNING
-				"EXT3-fs: Quota file not on filesystem root. "
-				"Journaled quota will not work.\n");
+			ext3_msg(sb, KERN_WARNING,
+				"warning: Quota file not on filesystem root. "
+				"Journaled quota will not work.");
 	}
 
 	/*
@@ -2923,8 +2944,9 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	handle_t *handle = journal_current_handle();
 
 	if (!handle) {
-		printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)"
-			" cancelled because transaction is not started.\n",
+		ext3_msg(sb, KERN_WARNING,
+			"warning: quota write (off=%llu, len=%llu)"
+			" cancelled because transaction is not started.",
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 7499b3667798..6b049030fbe6 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -918,6 +918,8 @@ extern void ext3_abort (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext3_warning (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
+extern void ext3_msg(struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
 extern void ext3_update_dynamic_rev (struct super_block *sb);
 
 #define ext3_std_error(sb, errno)				\
-- 
cgit v1.2.3


From 30673930051e5203d0b826b8b8f2454cab453b94 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 30 Nov 2009 22:17:41 +0100
Subject: quota: Move definition of QFMT_OCFS2 to linux/quota.h

Move definition of this constant to linux/quota.h so that it
cannot clash with other format IDs.

CC: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ocfs2/quota.h      | 4 ----
 include/linux/quota.h | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index e5df9d170b0c..123bc520a2c0 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -17,10 +17,6 @@
 
 #include "ocfs2.h"
 
-/* Common stuff */
-/* id number of quota format */
-#define QFMT_OCFS2 3
-
 /*
  * In-memory structures
  */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index f63c9d6ba784..7db3a005483f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -73,6 +73,7 @@
 /* Quota format type IDs */
 #define	QFMT_VFS_OLD 1
 #define	QFMT_VFS_V0 2
+#define QFMT_OCFS2 3
 
 /* Size of block in which space limits are passed through the quota
  * interface */
-- 
cgit v1.2.3


From 498c60153ebb8889d8944591383c5c12af1127d4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 16 Nov 2009 18:09:47 +0100
Subject: quota: Implement quota format with 64-bit space and inode limits

So far the maximum quota space limit was 4TB. Apparently this isn't enough
for Lustre guys anymore. So implement new quota format which raises block
limits to 2^64 bytes. Also store number of inodes and inode limits in
64-bit variables as 2^32 files isn't that insanely high anymore.

The first version of the patch has been developed by Andrew Perepechko
<Andrew.Perepechko@Sun.COM>.

CC: Andrew.Perepechko@Sun.COM
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/Kconfig      |   8 ++-
 fs/quota/quota_v2.c   | 165 ++++++++++++++++++++++++++++++++++++++++----------
 fs/quota/quotaio_v2.h |  19 +++++-
 include/linux/quota.h |   1 +
 4 files changed, 154 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 353e78a9ebee..efc02ebb8c70 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -46,12 +46,14 @@ config QFMT_V1
 	  format say Y here.
 
 config QFMT_V2
-	tristate "Quota format v2 support"
+	tristate "Quota format vfsv0 and vfsv1 support"
 	depends on QUOTA
 	select QUOTA_TREE
 	help
-	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
-	  need this functionality say Y here.
+	  This config option enables kernel support for vfsv0 and vfsv1 quota
+	  formats. Both these formats support 32-bit UIDs/GIDs and vfsv1 format
+	  also supports 64-bit inode and block quota limits. If you need this
+	  functionality say Y here.
 
 config QUOTACTL
 	bool
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 01f25eae684d..3dfc23e02135 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -23,14 +23,23 @@ MODULE_LICENSE("GPL");
 
 #define __QUOTA_V2_PARANOIA
 
-static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
-static void v2_disk2memdqb(struct dquot *dquot, void *dp);
-static int v2_is_id(void *dp, struct dquot *dquot);
-
-static struct qtree_fmt_operations v2_qtree_ops = {
-	.mem2disk_dqblk = v2_mem2diskdqb,
-	.disk2mem_dqblk = v2_disk2memdqb,
-	.is_id = v2_is_id,
+static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2r0_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2r0_is_id(void *dp, struct dquot *dquot);
+static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2r1_is_id(void *dp, struct dquot *dquot);
+
+static struct qtree_fmt_operations v2r0_qtree_ops = {
+	.mem2disk_dqblk = v2r0_mem2diskdqb,
+	.disk2mem_dqblk = v2r0_disk2memdqb,
+	.is_id = v2r0_is_id,
+};
+
+static struct qtree_fmt_operations v2r1_qtree_ops = {
+	.mem2disk_dqblk = v2r1_mem2diskdqb,
+	.disk2mem_dqblk = v2r1_disk2memdqb,
+	.is_id = v2r1_is_id,
 };
 
 #define QUOTABLOCK_BITS 10
@@ -46,23 +55,33 @@ static inline qsize_t v2_qbtos(qsize_t blocks)
 	return blocks << QUOTABLOCK_BITS;
 }
 
+static int v2_read_header(struct super_block *sb, int type,
+			  struct v2_disk_dqheader *dqhead)
+{
+	ssize_t size;
+
+	size = sb->s_op->quota_read(sb, type, (char *)dqhead,
+				    sizeof(struct v2_disk_dqheader), 0);
+	if (size != sizeof(struct v2_disk_dqheader)) {
+		printk(KERN_WARNING "quota_v2: Failed header read:"
+		       " expected=%zd got=%zd\n",
+			sizeof(struct v2_disk_dqheader), size);
+		return 0;
+	}
+	return 1;
+}
+
 /* Check whether given file is really vfsv0 quotafile */
 static int v2_check_quota_file(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
-	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
 	static const uint quota_versions[] = V2_INITQVERSIONS;
  
-	size = sb->s_op->quota_read(sb, type, (char *)&dqhead,
-				    sizeof(struct v2_disk_dqheader), 0);
-	if (size != sizeof(struct v2_disk_dqheader)) {
-		printk("quota_v2: failed read expected=%zd got=%zd\n",
-			sizeof(struct v2_disk_dqheader), size);
+	if (!v2_read_header(sb, type, &dqhead))
 		return 0;
-	}
 	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+	    le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
 		return 0;
 	return 1;
 }
@@ -71,14 +90,20 @@ static int v2_check_quota_file(struct super_block *sb, int type)
 static int v2_read_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
+	struct v2_disk_dqheader dqhead;
 	struct mem_dqinfo *info = sb_dqinfo(sb, type);
 	struct qtree_mem_dqinfo *qinfo;
 	ssize_t size;
+	unsigned int version;
+
+	if (!v2_read_header(sb, type, &dqhead))
+		return 0;
+	version = le32_to_cpu(dqhead.dqh_version);
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
-		printk(KERN_WARNING "Can't read info structure on device %s.\n",
+		printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
 			sb->s_id);
 		return -1;
 	}
@@ -89,9 +114,15 @@ static int v2_read_file_info(struct super_block *sb, int type)
 		return -1;
 	}
 	qinfo = info->dqi_priv;
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
+	if (version == 0) {
+		/* limits are stored as unsigned 32-bit data */
+		info->dqi_maxblimit = 0xffffffff;
+		info->dqi_maxilimit = 0xffffffff;
+	} else {
+		/* used space is stored as unsigned 64-bit value */
+		info->dqi_maxblimit = 0xffffffffffffffff;	/* 2^64-1 */
+		info->dqi_maxilimit = 0xffffffffffffffff;
+	}
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -103,8 +134,13 @@ static int v2_read_file_info(struct super_block *sb, int type)
 	qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
 	qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
 	qinfo->dqi_qtree_depth = qtree_depth(qinfo);
-	qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
-	qinfo->dqi_ops = &v2_qtree_ops;
+	if (version == 0) {
+		qinfo->dqi_entry_size = sizeof(struct v2r0_disk_dqblk);
+		qinfo->dqi_ops = &v2r0_qtree_ops;
+	} else {
+		qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
+		qinfo->dqi_ops = &v2r1_qtree_ops;
+	}
 	return 0;
 }
 
@@ -135,9 +171,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	return 0;
 }
 
-static void v2_disk2memdqb(struct dquot *dquot, void *dp)
+static void v2r0_disk2memdqb(struct dquot *dquot, void *dp)
 {
-	struct v2_disk_dqblk *d = dp, empty;
+	struct v2r0_disk_dqblk *d = dp, empty;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 
 	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
@@ -149,15 +185,15 @@ static void v2_disk2memdqb(struct dquot *dquot, void *dp)
 	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
 	/* We need to escape back all-zero structure */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+	memset(&empty, 0, sizeof(struct v2r0_disk_dqblk));
 	empty.dqb_itime = cpu_to_le64(1);
-	if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+	if (!memcmp(&empty, dp, sizeof(struct v2r0_disk_dqblk)))
 		m->dqb_itime = 0;
 }
 
-static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
+static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
 {
-	struct v2_disk_dqblk *d = dp;
+	struct v2r0_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 	struct qtree_mem_dqinfo *info =
 			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
@@ -175,9 +211,60 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
 		d->dqb_itime = cpu_to_le64(1);
 }
 
-static int v2_is_id(void *dp, struct dquot *dquot)
+static int v2r0_is_id(void *dp, struct dquot *dquot)
+{
+	struct v2r0_disk_dqblk *d = dp;
+	struct qtree_mem_dqinfo *info =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+	if (qtree_entry_unused(info, dp))
+		return 0;
+	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+}
+
+static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
+{
+	struct v2r1_disk_dqblk *d = dp, empty;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+
+	m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+	m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+	m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+	m->dqb_itime = le64_to_cpu(d->dqb_itime);
+	m->dqb_bhardlimit = v2_qbtos(le64_to_cpu(d->dqb_bhardlimit));
+	m->dqb_bsoftlimit = v2_qbtos(le64_to_cpu(d->dqb_bsoftlimit));
+	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+	m->dqb_btime = le64_to_cpu(d->dqb_btime);
+	/* We need to escape back all-zero structure */
+	memset(&empty, 0, sizeof(struct v2r1_disk_dqblk));
+	empty.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&empty, dp, sizeof(struct v2r1_disk_dqblk)))
+		m->dqb_itime = 0;
+}
+
+static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
+{
+	struct v2r1_disk_dqblk *d = dp;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+	struct qtree_mem_dqinfo *info =
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+	d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+	d->dqb_itime = cpu_to_le64(m->dqb_itime);
+	d->dqb_bhardlimit = cpu_to_le64(v2_stoqb(m->dqb_bhardlimit));
+	d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
+	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+	d->dqb_btime = cpu_to_le64(m->dqb_btime);
+	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	if (qtree_entry_unused(info, dp))
+		d->dqb_itime = cpu_to_le64(1);
+}
+
+static int v2r1_is_id(void *dp, struct dquot *dquot)
 {
-	struct v2_disk_dqblk *d = dp;
+	struct v2r1_disk_dqblk *d = dp;
 	struct qtree_mem_dqinfo *info =
 			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
 
@@ -217,20 +304,32 @@ static const struct quota_format_ops v2_format_ops = {
 	.release_dqblk		= v2_release_dquot,
 };
 
-static struct quota_format_type v2_quota_format = {
+static struct quota_format_type v2r0_quota_format = {
 	.qf_fmt_id	= QFMT_VFS_V0,
 	.qf_ops		= &v2_format_ops,
 	.qf_owner	= THIS_MODULE
 };
 
+static struct quota_format_type v2r1_quota_format = {
+	.qf_fmt_id	= QFMT_VFS_V1,
+	.qf_ops		= &v2_format_ops,
+	.qf_owner	= THIS_MODULE
+};
+
 static int __init init_v2_quota_format(void)
 {
-	return register_quota_format(&v2_quota_format);
+	int ret;
+
+	ret = register_quota_format(&v2r0_quota_format);
+	if (ret)
+		return ret;
+	return register_quota_format(&v2r1_quota_format);
 }
 
 static void __exit exit_v2_quota_format(void)
 {
-	unregister_quota_format(&v2_quota_format);
+	unregister_quota_format(&v2r0_quota_format);
+	unregister_quota_format(&v2r1_quota_format);
 }
 
 module_init(init_v2_quota_format);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index 530fe580685c..f1966b42c2fd 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -17,8 +17,8 @@
 }
 
 #define V2_INITQVERSIONS {\
-	0,		/* USRQUOTA */\
-	0		/* GRPQUOTA */\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
 }
 
 /* First generic header */
@@ -32,7 +32,7 @@ struct v2_disk_dqheader {
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2r0_disk_dqblk {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
@@ -44,6 +44,19 @@ struct v2_disk_dqblk {
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+struct v2r1_disk_dqblk {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_pad;
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
 /* Header with type and version specific information */
 struct v2_disk_dqinfo {
 	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 7db3a005483f..e70e62194243 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -74,6 +74,7 @@
 #define	QFMT_VFS_OLD 1
 #define	QFMT_VFS_V0 2
 #define QFMT_OCFS2 3
+#define	QFMT_VFS_V1 4
 
 /* Size of block in which space limits are passed through the quota
  * interface */
-- 
cgit v1.2.3


From cc9b2e9f6603190c009e5d2629ce8e3f99571346 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 26 Nov 2009 09:50:20 -0600
Subject: [SCSI] enclosure: fix oops while iterating enclosure_status array

Based on patch originally by Jeff Mahoney <jeffm@suse.com>

 enclosure_status is expected to be a NULL terminated array of strings
 but isn't actually NULL terminated. When writing an invalid value to
 /sys/class/enclosure/.../.../status, it goes off the end of the array
 and Oopses.


Fix by making the assumption true and adding NULL at the end.

Reported-by: Artur Wojcik <artur.wojcik@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/misc/enclosure.c  | 1 +
 include/linux/enclosure.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index e9eae4a78402..1eac626e710a 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -391,6 +391,7 @@ static const char *const enclosure_status [] = {
 	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
 	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
 	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
 };
 
 static const char *const enclosure_type [] = {
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 90d1c2184112..9a33c5f7e126 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -42,6 +42,8 @@ enum enclosure_status {
 	ENCLOSURE_STATUS_NOT_INSTALLED,
 	ENCLOSURE_STATUS_UNKNOWN,
 	ENCLOSURE_STATUS_UNAVAILABLE,
+	/* last element for counting purposes */
+	ENCLOSURE_STATUS_MAX
 };
 
 /* SFF-8485 activity light settings */
-- 
cgit v1.2.3


From b9889ed1ddeca5a3f3569c8de7354e9e97d803ae Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Dec 2009 20:32:39 +0100
Subject: sched: Remove forced2_migrations stats

This build warning:

 kernel/sched.c: In function 'set_task_cpu':
 kernel/sched.c:2070: warning: unused variable 'old_rq'

Made me realize that the forced2_migrations stat looks pretty
pointless (and a misnomer) - remove it.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 kernel/sched.c        | 6 ------
 kernel/sched_debug.c  | 2 --
 3 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee9f200d12d3..87b89a827f0c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1174,7 +1174,6 @@ struct sched_entity {
 	u64			nr_failed_migrations_running;
 	u64			nr_failed_migrations_hot;
 	u64			nr_forced_migrations;
-	u64			nr_forced2_migrations;
 
 	u64			nr_wakeups;
 	u64			nr_wakeups_sync;
diff --git a/kernel/sched.c b/kernel/sched.c
index 36cc05a76947..bc68037f3199 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2067,7 +2067,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct rq *old_rq = cpu_rq(old_cpu);
 	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
 		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
@@ -2075,10 +2074,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	if (old_cpu != new_cpu) {
 		p->se.nr_migrations++;
-#ifdef CONFIG_SCHEDSTATS
-		if (task_hot(p, old_rq->clock, NULL))
-			schedstat_inc(p, se.nr_forced2_migrations);
-#endif
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
@@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.nr_failed_migrations_running	= 0;
 	p->se.nr_failed_migrations_hot		= 0;
 	p->se.nr_forced_migrations		= 0;
-	p->se.nr_forced2_migrations		= 0;
 
 	p->se.nr_wakeups			= 0;
 	p->se.nr_wakeups_sync			= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 0fc5287fe80f..5ae24fc65d75 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -432,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.nr_failed_migrations_running);
 	P(se.nr_failed_migrations_hot);
 	P(se.nr_forced_migrations);
-	P(se.nr_forced2_migrations);
 	P(se.nr_wakeups);
 	P(se.nr_wakeups_sync);
 	P(se.nr_wakeups_migrate);
@@ -508,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.nr_failed_migrations_running	= 0;
 	p->se.nr_failed_migrations_hot		= 0;
 	p->se.nr_forced_migrations		= 0;
-	p->se.nr_forced2_migrations		= 0;
 	p->se.nr_wakeups			= 0;
 	p->se.nr_wakeups_sync			= 0;
 	p->se.nr_wakeups_migrate		= 0;
-- 
cgit v1.2.3


From 637e8a60a7aaf4ef7d46cfdf83bcfac9cf6f0fbd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 14 Nov 2009 02:28:05 +0100
Subject: usbdevfs: move compat_ioctl handling to devio.c

Half the compat_ioctl handling is in devio.c, the other
half is in fs/compat_ioctl.c. This moves everything into
one place for consistency.

As a positive side-effect, push down the BKL into the
ioctl methods.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Alon Bar-Lev <alon.barlev@gmail.com>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: linux-usb@vger.kernel.org
---
 drivers/usb/core/devio.c     | 110 +++++++++++++++++++++++++++++++++++++-----
 fs/compat_ioctl.c            | 112 -------------------------------------------
 include/linux/usbdevice_fs.h |  26 ++++++++++
 3 files changed, 125 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 181f78c84105..6e8bcdfd23b4 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1388,6 +1388,46 @@ static int proc_reapurbnonblock(struct dev_state *ps, void __user *arg)
 }
 
 #ifdef CONFIG_COMPAT
+static int proc_control_compat(struct dev_state *ps,
+				struct usbdevfs_ctrltransfer32 __user *p32)
+{
+        struct usbdevfs_ctrltransfer __user *p;
+        __u32 udata;
+        p = compat_alloc_user_space(sizeof(*p));
+        if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
+            get_user(udata, &p32->data) ||
+	    put_user(compat_ptr(udata), &p->data))
+		return -EFAULT;
+        return proc_control(ps, p);
+}
+
+static int proc_bulk_compat(struct dev_state *ps,
+			struct usbdevfs_bulktransfer32 __user *p32)
+{
+        struct usbdevfs_bulktransfer __user *p;
+        compat_uint_t n;
+        compat_caddr_t addr;
+
+        p = compat_alloc_user_space(sizeof(*p));
+
+        if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
+            get_user(n, &p32->len) || put_user(n, &p->len) ||
+            get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
+            get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
+                return -EFAULT;
+
+        return proc_bulk(ps, p);
+}
+static int proc_disconnectsignal_compat(struct dev_state *ps, void __user *arg)
+{
+	struct usbdevfs_disconnectsignal32 ds;
+
+	if (copy_from_user(&ds, arg, sizeof(ds)))
+		return -EFAULT;
+	ps->discsignr = ds.signr;
+	ps->disccontext = compat_ptr(ds.context);
+	return 0;
+}
 
 static int get_urb32(struct usbdevfs_urb *kurb,
 		     struct usbdevfs_urb32 __user *uurb)
@@ -1482,6 +1522,7 @@ static int proc_reapurbnonblock_compat(struct dev_state *ps, void __user *arg)
 	return processcompl_compat(as, (void __user * __user *)arg);
 }
 
+
 #endif
 
 static int proc_disconnectsignal(struct dev_state *ps, void __user *arg)
@@ -1648,12 +1689,12 @@ static int proc_release_port(struct dev_state *ps, void __user *arg)
  * are assuming that somehow the configuration has been prevented from
  * changing.  But there's no mechanism to ensure that...
  */
-static int usbdev_ioctl(struct inode *inode, struct file *file,
-			unsigned int cmd, unsigned long arg)
+static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
+				void __user *p)
 {
 	struct dev_state *ps = file->private_data;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct usb_device *dev = ps->dev;
-	void __user *p = (void __user *)arg;
 	int ret = -ENOTTY;
 
 	if (!(file->f_mode & FMODE_WRITE))
@@ -1726,6 +1767,24 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		break;
 
 #ifdef CONFIG_COMPAT
+	case USBDEVFS_CONTROL32:
+		snoop(&dev->dev, "%s: CONTROL32\n", __func__);
+		ret = proc_control_compat(ps, p);
+		if (ret >= 0)
+			inode->i_mtime = CURRENT_TIME;
+		break;
+
+	case USBDEVFS_BULK32:
+		snoop(&dev->dev, "%s: BULK32\n", __func__);
+		ret = proc_bulk_compat(ps, p);
+		if (ret >= 0)
+			inode->i_mtime = CURRENT_TIME;
+		break;
+
+	case USBDEVFS_DISCSIGNAL32:
+		snoop(&dev->dev, "%s: DISCSIGNAL32\n", __func__);
+		ret = proc_disconnectsignal_compat(ps, p);
+		break;
 
 	case USBDEVFS_SUBMITURB32:
 		snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
@@ -1745,7 +1804,7 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		break;
 
 	case USBDEVFS_IOCTL32:
-		snoop(&dev->dev, "%s: IOCTL\n", __func__);
+		snoop(&dev->dev, "%s: IOCTL32\n", __func__);
 		ret = proc_ioctl_compat(ps, ptr_to_compat(p));
 		break;
 #endif
@@ -1801,6 +1860,32 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 	return ret;
 }
 
+static long usbdev_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = usbdev_do_ioctl(file, cmd, (void __user *)arg);
+	unlock_kernel();
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long usbdev_compat_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = usbdev_do_ioctl(file, cmd, compat_ptr(arg));
+	unlock_kernel();
+
+	return ret;
+}
+#endif
+
 /* No kernel lock - fine */
 static unsigned int usbdev_poll(struct file *file,
 				struct poll_table_struct *wait)
@@ -1817,13 +1902,16 @@ static unsigned int usbdev_poll(struct file *file,
 }
 
 const struct file_operations usbdev_file_operations = {
-	.owner = 	THIS_MODULE,
-	.llseek =	usbdev_lseek,
-	.read =		usbdev_read,
-	.poll =		usbdev_poll,
-	.ioctl =	usbdev_ioctl,
-	.open =		usbdev_open,
-	.release =	usbdev_release,
+	.owner =	  THIS_MODULE,
+	.llseek =	  usbdev_lseek,
+	.read =		  usbdev_read,
+	.poll =		  usbdev_poll,
+	.unlocked_ioctl = usbdev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =   usbdev_compat_ioctl,
+#endif
+	.open =		  usbdev_open,
+	.release =	  usbdev_release,
 };
 
 static void usbdev_remove(struct usb_device *udev)
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 598763fd207f..278020d2449c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -742,94 +742,6 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd,
         return err;
 }
 
-struct usbdevfs_ctrltransfer32 {
-        u8 bRequestType;
-        u8 bRequest;
-        u16 wValue;
-        u16 wIndex;
-        u16 wLength;
-        u32 timeout;  /* in milliseconds */
-        compat_caddr_t data;
-};
-
-#define USBDEVFS_CONTROL32           _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
-
-static int do_usbdevfs_control(unsigned int fd, unsigned int cmd,
-			struct usbdevfs_ctrltransfer32 __user *p32)
-{
-        struct usbdevfs_ctrltransfer __user *p;
-        __u32 udata;
-        p = compat_alloc_user_space(sizeof(*p));
-        if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
-            get_user(udata, &p32->data) ||
-	    put_user(compat_ptr(udata), &p->data))
-		return -EFAULT;
-        return sys_ioctl(fd, USBDEVFS_CONTROL, (unsigned long)p);
-}
-
-
-struct usbdevfs_bulktransfer32 {
-        compat_uint_t ep;
-        compat_uint_t len;
-        compat_uint_t timeout; /* in milliseconds */
-        compat_caddr_t data;
-};
-
-#define USBDEVFS_BULK32              _IOWR('U', 2, struct usbdevfs_bulktransfer32)
-
-static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd,
-			struct usbdevfs_bulktransfer32 __user *p32)
-{
-        struct usbdevfs_bulktransfer __user *p;
-        compat_uint_t n;
-        compat_caddr_t addr;
-
-        p = compat_alloc_user_space(sizeof(*p));
-
-        if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
-            get_user(n, &p32->len) || put_user(n, &p->len) ||
-            get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
-            get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
-                return -EFAULT;
-
-        return sys_ioctl(fd, USBDEVFS_BULK, (unsigned long)p);
-}
-
-
-/*
- *  USBDEVFS_SUBMITURB, USBDEVFS_REAPURB and USBDEVFS_REAPURBNDELAY
- *  are handled in usbdevfs core.			-Christopher Li
- */
-
-struct usbdevfs_disconnectsignal32 {
-        compat_int_t signr;
-        compat_caddr_t context;
-};
-
-#define USBDEVFS_DISCSIGNAL32      _IOR('U', 14, struct usbdevfs_disconnectsignal32)
-
-static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd,
-			struct usbdevfs_disconnectsignal32 __user *udis)
-{
-        struct usbdevfs_disconnectsignal kdis;
-        mm_segment_t old_fs;
-        u32 uctx;
-        int err;
-
-        if (get_user(kdis.signr, &udis->signr) ||
-            __get_user(uctx, &udis->context))
-                return -EFAULT;
-
-        kdis.context = compat_ptr(uctx);
-
-        old_fs = get_fs();
-        set_fs(KERNEL_DS);
-        err = sys_ioctl(fd, USBDEVFS_DISCSIGNAL, (unsigned long) &kdis);
-        set_fs(old_fs);
-
-        return err;
-}
-
 /*
  * I2C layer ioctls
  */
@@ -1471,21 +1383,6 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
 COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
 COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
 COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
-/* USB */
-COMPATIBLE_IOCTL(USBDEVFS_RESETEP)
-COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE)
-COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION)
-COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER)
-COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB)
-COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE)
-COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE)
-COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO)
-COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO)
-COMPATIBLE_IOCTL(USBDEVFS_RESET)
-COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
-COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
-COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
-COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
 /* NBD */
 COMPATIBLE_IOCTL(NBD_DO_IT)
 COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
@@ -1604,8 +1501,6 @@ COMPATIBLE_IOCTL(TIOCSLTC)
 COMPATIBLE_IOCTL(TIOCSTART)
 COMPATIBLE_IOCTL(TIOCSTOP)
 #endif
-/* Usbdevfs */
-COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
 
 /* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
    but we don't want warnings on other file systems. So declare
@@ -1677,13 +1572,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case TIOCGSERIAL:
 	case TIOCSSERIAL:
 		return serial_struct_ioctl(fd, cmd, argp);
-	/* Usbdevfs */
-	case USBDEVFS_CONTROL32:
-		return do_usbdevfs_control(fd, cmd, argp);
-	case USBDEVFS_BULK32:
-		return do_usbdevfs_bulk(fd, cmd, argp);
-	case USBDEVFS_DISCSIGNAL32:
-		return do_usbdevfs_discsignal(fd, cmd, argp);
 	/* i2c */
 	case I2C_FUNCS:
 		return w_long(fd, cmd, argp);
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index b2a7d8ba6ee3..15591d2ea400 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -128,6 +128,29 @@ struct usbdevfs_hub_portinfo {
 #ifdef __KERNEL__
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
+
+struct usbdevfs_ctrltransfer32 {
+        u8 bRequestType;
+        u8 bRequest;
+        u16 wValue;
+        u16 wIndex;
+        u16 wLength;
+        u32 timeout;  /* in milliseconds */
+        compat_caddr_t data;
+};
+
+struct usbdevfs_bulktransfer32 {
+        compat_uint_t ep;
+        compat_uint_t len;
+        compat_uint_t timeout; /* in milliseconds */
+        compat_caddr_t data;
+};
+
+struct usbdevfs_disconnectsignal32 {
+        compat_int_t signr;
+        compat_caddr_t context;
+};
+
 struct usbdevfs_urb32 {
 	unsigned char type;
 	unsigned char endpoint;
@@ -153,7 +176,9 @@ struct usbdevfs_ioctl32 {
 #endif /* __KERNEL__ */
 
 #define USBDEVFS_CONTROL           _IOWR('U', 0, struct usbdevfs_ctrltransfer)
+#define USBDEVFS_CONTROL32           _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
 #define USBDEVFS_BULK              _IOWR('U', 2, struct usbdevfs_bulktransfer)
+#define USBDEVFS_BULK32              _IOWR('U', 2, struct usbdevfs_bulktransfer32)
 #define USBDEVFS_RESETEP           _IOR('U', 3, unsigned int)
 #define USBDEVFS_SETINTERFACE      _IOR('U', 4, struct usbdevfs_setinterface)
 #define USBDEVFS_SETCONFIGURATION  _IOR('U', 5, unsigned int)
@@ -166,6 +191,7 @@ struct usbdevfs_ioctl32 {
 #define USBDEVFS_REAPURBNDELAY     _IOW('U', 13, void *)
 #define USBDEVFS_REAPURBNDELAY32   _IOW('U', 13, __u32)
 #define USBDEVFS_DISCSIGNAL        _IOR('U', 14, struct usbdevfs_disconnectsignal)
+#define USBDEVFS_DISCSIGNAL32      _IOR('U', 14, struct usbdevfs_disconnectsignal32)
 #define USBDEVFS_CLAIMINTERFACE    _IOR('U', 15, unsigned int)
 #define USBDEVFS_RELEASEINTERFACE  _IOR('U', 16, unsigned int)
 #define USBDEVFS_CONNECTINFO       _IOW('U', 17, struct usbdevfs_connectinfo)
-- 
cgit v1.2.3


From 87a8f240e9bcf025ba45e4563c842b0d59c5e8ef Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 10 Dec 2009 23:52:01 +0000
Subject: dm log: add flush callback fn

Introduce a callback pointer from the log to dm-raid1 layer.

Before some region is set as "in-sync", we need to flush hardware cache on
all the disks. But the log module doesn't have access to the mirror_set
structure. So it will use this callback.

So far the callback is unused, it will be used in further patches.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log.c          | 6 ++++--
 drivers/md/dm-raid1.c        | 2 +-
 include/linux/dm-dirty-log.h | 6 ++++--
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index d779f8c915dd..666a80e3602e 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -145,8 +145,9 @@ int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
 EXPORT_SYMBOL(dm_dirty_log_type_unregister);
 
 struct dm_dirty_log *dm_dirty_log_create(const char *type_name,
-					 struct dm_target *ti,
-					 unsigned int argc, char **argv)
+			struct dm_target *ti,
+			int (*flush_callback_fn)(struct dm_target *ti),
+			unsigned int argc, char **argv)
 {
 	struct dm_dirty_log_type *type;
 	struct dm_dirty_log *log;
@@ -161,6 +162,7 @@ struct dm_dirty_log *dm_dirty_log_create(const char *type_name,
 		return NULL;
 	}
 
+	log->flush_callback_fn = flush_callback_fn;
 	log->type = type;
 	if (type->ctr(log, ti, argc, argv)) {
 		kfree(log);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 752a29e1855b..d44bc497dad9 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -896,7 +896,7 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
 		return NULL;
 	}
 
-	dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2);
+	dl = dm_dirty_log_create(argv[0], ti, NULL, param_count, argv + 2);
 	if (!dl) {
 		ti->error = "Error creating mirror dirty log";
 		return NULL;
diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h
index 5e8b11d88f6f..7084503c3405 100644
--- a/include/linux/dm-dirty-log.h
+++ b/include/linux/dm-dirty-log.h
@@ -21,6 +21,7 @@ struct dm_dirty_log_type;
 
 struct dm_dirty_log {
 	struct dm_dirty_log_type *type;
+	int (*flush_callback_fn)(struct dm_target *ti);
 	void *context;
 };
 
@@ -136,8 +137,9 @@ int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type);
  * type->constructor/destructor() directly.
  */
 struct dm_dirty_log *dm_dirty_log_create(const char *type_name,
-					 struct dm_target *ti,
-					 unsigned argc, char **argv);
+			struct dm_target *ti,
+			int (*flush_callback_fn)(struct dm_target *ti),
+			unsigned argc, char **argv);
 void dm_dirty_log_destroy(struct dm_dirty_log *log);
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From c58098be979509a54021e837a47fcad08db31f94 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 10 Dec 2009 23:52:05 +0000
Subject: dm raid1: remove bio_endio from dm_rh_mark_nosync

Move bio completion out of dm_rh_mark_nosync in preparation for the
next patch.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Takahiro Yasui <tyasui@redhat.com>
Tested-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c          | 3 ++-
 drivers/md/dm-region-hash.c    | 6 +-----
 include/linux/dm-region-hash.h | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d1a7f1a4789c..4f466ad75680 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -779,7 +779,8 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 			hold_bio(ms, bio);
 		else {
 			ms->in_sync = 0;
-			dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0);
+			dm_rh_mark_nosync(ms->rh, bio);
+			bio_endio(bio, 0);
 		}
 	}
 }
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 00806b760ccd..5f19ceb6fe91 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -383,8 +383,6 @@ static void complete_resync_work(struct dm_region *reg, int success)
 /* dm_rh_mark_nosync
  * @ms
  * @bio
- * @done
- * @error
  *
  * The bio was written on some mirror(s) but failed on other mirror(s).
  * We can successfully endio the bio but should avoid the region being
@@ -392,8 +390,7 @@ static void complete_resync_work(struct dm_region *reg, int success)
  *
  * This function is _not_ safe in interrupt context!
  */
-void dm_rh_mark_nosync(struct dm_region_hash *rh,
-		       struct bio *bio, unsigned done, int error)
+void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
 {
 	unsigned long flags;
 	struct dm_dirty_log *log = rh->log;
@@ -430,7 +427,6 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh,
 	BUG_ON(!list_empty(&reg->list));
 	spin_unlock_irqrestore(&rh->region_lock, flags);
 
-	bio_endio(bio, error);
 	if (recovering)
 		complete_resync_work(reg, 0);
 }
diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h
index a9e652a41373..9e2a7a401df5 100644
--- a/include/linux/dm-region-hash.h
+++ b/include/linux/dm-region-hash.h
@@ -78,8 +78,7 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region);
 /* Delay bios on regions. */
 void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio);
 
-void dm_rh_mark_nosync(struct dm_region_hash *rh,
-		       struct bio *bio, unsigned done, int error);
+void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio);
 
 /*
  * Region recovery control.
-- 
cgit v1.2.3


From 7c6664114b7342a36f14a6836564e865669b3cea Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Thu, 10 Dec 2009 23:52:19 +0000
Subject: dm: rename dm_get_table to dm_get_live_table

Rename dm_get_table to dm_get_live_table.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c         | 14 +++++++-------
 drivers/md/dm.c               | 24 ++++++++++++------------
 include/linux/device-mapper.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 99de0e4ce831..bf3d19a0fb78 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -235,7 +235,7 @@ static void __hash_remove(struct hash_cell *hc)
 	dm_set_mdptr(hc->md, NULL);
 	mutex_unlock(&dm_hash_cells_mutex);
 
-	table = dm_get_table(hc->md);
+	table = dm_get_live_table(hc->md);
 	if (table) {
 		dm_table_event(table);
 		dm_table_put(table);
@@ -338,7 +338,7 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
 	/*
 	 * Wake up any dm event waiters.
 	 */
-	table = dm_get_table(hc->md);
+	table = dm_get_live_table(hc->md);
 	if (table) {
 		dm_table_event(table);
 		dm_table_put(table);
@@ -564,7 +564,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 
 	param->event_nr = dm_get_event_nr(md);
 
-	table = dm_get_table(md);
+	table = dm_get_live_table(md);
 	if (table) {
 		param->flags |= DM_ACTIVE_PRESENT_FLAG;
 		param->target_count = dm_table_get_num_targets(table);
@@ -993,7 +993,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_table(md);
+	table = dm_get_live_table(md);
 	if (table) {
 		retrieve_status(table, param, param_size);
 		dm_table_put(table);
@@ -1226,7 +1226,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_table(md);
+	table = dm_get_live_table(md);
 	if (table) {
 		retrieve_deps(table, param, param_size);
 		dm_table_put(table);
@@ -1255,7 +1255,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_table(md);
+	table = dm_get_live_table(md);
 	if (table) {
 		retrieve_status(table, param, param_size);
 		dm_table_put(table);
@@ -1299,7 +1299,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 		goto out;
 	}
 
-	table = dm_get_table(md);
+	table = dm_get_live_table(md);
 	if (!table)
 		goto out_argv;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3de8d6d5b0b8..233a2e9156a3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -397,7 +397,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 	struct dm_target *tgt;
 	int r = -ENOTTY;
 
@@ -528,7 +528,7 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
  * function to access the md->map field, and make sure they call
  * dm_table_put() when finished.
  */
-struct dm_table *dm_get_table(struct mapped_device *md)
+struct dm_table *dm_get_live_table(struct mapped_device *md)
 {
 	struct dm_table *t;
 	unsigned long flags;
@@ -1294,7 +1294,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 	struct clone_info ci;
 	int error = 0;
 
-	ci.map = dm_get_table(md);
+	ci.map = dm_get_live_table(md);
 	if (unlikely(!ci.map)) {
 		if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
 			bio_io_error(bio);
@@ -1335,7 +1335,7 @@ static int dm_merge_bvec(struct request_queue *q,
 			 struct bio_vec *biovec)
 {
 	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 	struct dm_target *ti;
 	sector_t max_sectors;
 	int max_size = 0;
@@ -1638,7 +1638,7 @@ static void map_request(struct dm_target *ti, struct request *clone,
 static void dm_request_fn(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 	struct dm_target *ti;
 	struct request *rq, *clone;
 
@@ -1697,7 +1697,7 @@ static int dm_lld_busy(struct request_queue *q)
 {
 	int r;
 	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 
 	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
 		r = 1;
@@ -1712,7 +1712,7 @@ static int dm_lld_busy(struct request_queue *q)
 static void dm_unplug_all(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 
 	if (map) {
 		if (dm_request_based(md))
@@ -1730,7 +1730,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 	struct dm_table *map;
 
 	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-		map = dm_get_table(md);
+		map = dm_get_live_table(md);
 		if (map) {
 			/*
 			 * Request-based dm cares about only own queue for
@@ -2166,7 +2166,7 @@ void dm_put(struct mapped_device *md)
 	BUG_ON(test_bit(DMF_FREEING, &md->flags));
 
 	if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
-		map = dm_get_table(md);
+		map = dm_get_live_table(md);
 		idr_replace(&_minor_idr, MINOR_ALLOCED,
 			    MINOR(disk_devt(dm_disk(md))));
 		set_bit(DMF_FREEING, &md->flags);
@@ -2302,7 +2302,7 @@ static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
 static int dm_rq_barrier(struct mapped_device *md)
 {
 	int i, j;
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map = dm_get_live_table(md);
 	unsigned num_targets = dm_table_get_num_targets(map);
 	struct dm_target *ti;
 	struct request *clone;
@@ -2453,7 +2453,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 		goto out_unlock;
 	}
 
-	map = dm_get_table(md);
+	map = dm_get_live_table(md);
 
 	/*
 	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2558,7 +2558,7 @@ int dm_resume(struct mapped_device *md)
 	if (!dm_suspended(md))
 		goto out;
 
-	map = dm_get_table(md);
+	map = dm_get_live_table(md);
 	if (!map || !dm_table_get_size(map))
 		goto out;
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index df7607e6dce8..fc16a351ef7a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -276,7 +276,7 @@ void dm_table_unplug_all(struct dm_table *t);
 /*
  * Table reference counting.
  */
-struct dm_table *dm_get_table(struct mapped_device *md);
+struct dm_table *dm_get_live_table(struct mapped_device *md);
 void dm_table_get(struct dm_table *t);
 void dm_table_put(struct dm_table *t);
 
-- 
cgit v1.2.3


From 1d0f3ce83200edc5d43723c77c62b09ad6560294 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 10 Dec 2009 23:52:22 +0000
Subject: dm ioctl: retrieve status from inactive table

Add the flag DM_QUERY_INACTIVE_TABLE_FLAG to the ioctls to return
infomation about the loaded-but-not-yet-active table instead of the live
table.  Prior to this patch it was impossible to obtain this information
until the device had been 'resumed'.

Userspace dmsetup and libdevmapper support the flag as of version 1.02.40.
e.g. dmsetup info --inactive vg1-lv1

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c    | 70 +++++++++++++++++++++++++++++++++++++++---------
 include/linux/dm-ioctl.h | 13 ++++++---
 2 files changed, 67 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index d06dd39856f3..a3d20265ffc1 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -523,8 +523,6 @@ static int list_versions(struct dm_ioctl *param, size_t param_size)
 	return 0;
 }
 
-
-
 static int check_name(const char *name)
 {
 	if (strchr(name, '/')) {
@@ -535,6 +533,40 @@ static int check_name(const char *name)
 	return 0;
 }
 
+/*
+ * On successful return, the caller must not attempt to acquire
+ * _hash_lock without first calling dm_table_put, because dm_table_destroy
+ * waits for this dm_table_put and could be called under this lock.
+ */
+static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
+{
+	struct hash_cell *hc;
+	struct dm_table *table = NULL;
+
+	down_read(&_hash_lock);
+	hc = dm_get_mdptr(md);
+	if (!hc || hc->md != md) {
+		DMWARN("device has been removed from the dev hash table.");
+		goto out;
+	}
+
+	table = hc->new_map;
+	if (table)
+		dm_table_get(table);
+
+out:
+	up_read(&_hash_lock);
+
+	return table;
+}
+
+static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
+						      struct dm_ioctl *param)
+{
+	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
+		dm_get_inactive_table(md) : dm_get_live_table(md);
+}
+
 /*
  * Fills in a dm_ioctl structure, ready for sending back to
  * userland.
@@ -559,18 +591,30 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 	 */
 	param->open_count = dm_open_count(md);
 
-	if (get_disk_ro(disk))
-		param->flags |= DM_READONLY_FLAG;
-
 	param->event_nr = dm_get_event_nr(md);
+	param->target_count = 0;
 
 	table = dm_get_live_table(md);
 	if (table) {
-		param->flags |= DM_ACTIVE_PRESENT_FLAG;
-		param->target_count = dm_table_get_num_targets(table);
+		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
+			if (get_disk_ro(disk))
+				param->flags |= DM_READONLY_FLAG;
+			param->target_count = dm_table_get_num_targets(table);
+		}
 		dm_table_put(table);
-	} else
-		param->target_count = 0;
+
+		param->flags |= DM_ACTIVE_PRESENT_FLAG;
+	}
+
+	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
+		table = dm_get_inactive_table(md);
+		if (table) {
+			if (!(dm_table_get_mode(table) & FMODE_WRITE))
+				param->flags |= DM_READONLY_FLAG;
+			param->target_count = dm_table_get_num_targets(table);
+			dm_table_put(table);
+		}
+	}
 
 	return 0;
 }
@@ -993,7 +1037,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_live_table(md);
+	table = dm_get_live_or_inactive_table(md, param);
 	if (table) {
 		retrieve_status(table, param, param_size);
 		dm_table_put(table);
@@ -1226,7 +1270,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_live_table(md);
+	table = dm_get_live_or_inactive_table(md, param);
 	if (table) {
 		retrieve_deps(table, param, param_size);
 		dm_table_put(table);
@@ -1255,13 +1299,13 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	table = dm_get_live_table(md);
+	table = dm_get_live_or_inactive_table(md, param);
 	if (table) {
 		retrieve_status(table, param, param_size);
 		dm_table_put(table);
 	}
 
- out:
+out:
 	dm_put(md);
 	return r;
 }
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 2ab84c83c31a..aa95508d2f95 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004 - 2005 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the LGPL.
  */
@@ -266,9 +266,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	15
+#define DM_VERSION_MINOR	16
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2009-04-01)"
+#define DM_VERSION_EXTRA	"-ioctl (2009-11-05)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -309,4 +309,11 @@ enum {
  */
 #define DM_NOFLUSH_FLAG		(1 << 11) /* In */
 
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one.  Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG	(1 << 12) /* In */
+
 #endif				/* _LINUX_DM_IOCTL_H */
-- 
cgit v1.2.3


From 042d2a9bcd80fe12d4b0871706aa9dd2231e8238 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Thu, 10 Dec 2009 23:52:24 +0000
Subject: dm: keep old table until after resume succeeded

When swapping a new table into place, retain the old table until
its replacement is in place.

An old check for an empty table is removed because this is enforced
in populate_table().

__unbind() becomes redundant when followed by __bind().

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c         | 10 ++++++----
 drivers/md/dm.c               | 34 +++++++++++++++++-----------------
 include/linux/device-mapper.h |  4 +++-
 3 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a3d20265ffc1..63fd4de25bda 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -855,7 +855,7 @@ static int do_resume(struct dm_ioctl *param)
 	unsigned suspend_flags = DM_SUSPEND_LOCKFS_FLAG;
 	struct hash_cell *hc;
 	struct mapped_device *md;
-	struct dm_table *new_map;
+	struct dm_table *new_map, *old_map = NULL;
 
 	down_write(&_hash_lock);
 
@@ -884,11 +884,11 @@ static int do_resume(struct dm_ioctl *param)
 		if (!dm_suspended(md))
 			dm_suspend(md, suspend_flags);
 
-		r = dm_swap_table(md, new_map);
-		if (r) {
+		old_map = dm_swap_table(md, new_map);
+		if (IS_ERR(old_map)) {
 			dm_table_destroy(new_map);
 			dm_put(md);
-			return r;
+			return PTR_ERR(old_map);
 		}
 
 		if (dm_table_get_mode(new_map) & FMODE_WRITE)
@@ -900,6 +900,8 @@ static int do_resume(struct dm_ioctl *param)
 	if (dm_suspended(md))
 		r = dm_resume(md);
 
+	if (old_map)
+		dm_table_destroy(old_map);
 
 	if (!r) {
 		dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 255b75c61544..c254c6cf69a1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2033,9 +2033,13 @@ static void __set_size(struct mapped_device *md, sector_t size)
 	mutex_unlock(&md->bdev->bd_inode->i_mutex);
 }
 
-static int __bind(struct mapped_device *md, struct dm_table *t,
-		  struct queue_limits *limits)
+/*
+ * Returns old map, which caller must destroy.
+ */
+static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
+			       struct queue_limits *limits)
 {
+	struct dm_table *old_map;
 	struct request_queue *q = md->queue;
 	sector_t size;
 	unsigned long flags;
@@ -2050,11 +2054,6 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
 
 	__set_size(md, size);
 
-	if (!size) {
-		dm_table_destroy(t);
-		return 0;
-	}
-
 	dm_table_event_callback(t, event_callback, md);
 
 	/*
@@ -2070,11 +2069,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
 	__bind_mempools(md, t);
 
 	write_lock_irqsave(&md->map_lock, flags);
+	old_map = md->map;
 	md->map = t;
 	dm_table_set_restrictions(t, q, limits);
 	write_unlock_irqrestore(&md->map_lock, flags);
 
-	return 0;
+	return old_map;
 }
 
 /*
@@ -2368,13 +2368,13 @@ static void dm_rq_barrier_work(struct work_struct *work)
 }
 
 /*
- * Swap in a new table (destroying old one).
+ * Swap in a new table, returning the old one for the caller to destroy.
  */
-int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
-	struct dm_table *map;
+	struct dm_table *map = ERR_PTR(-EINVAL);
 	struct queue_limits limits;
-	int r = -EINVAL;
+	int r;
 
 	mutex_lock(&md->suspend_lock);
 
@@ -2383,8 +2383,10 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		goto out;
 
 	r = dm_calculate_queue_limits(table, &limits);
-	if (r)
+	if (r) {
+		map = ERR_PTR(r);
 		goto out;
+	}
 
 	/* cannot change the device type, once a table is bound */
 	if (md->map &&
@@ -2393,13 +2395,11 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		goto out;
 	}
 
-	map = __unbind(md);
-	r = __bind(md, table, &limits);
-	dm_table_destroy(map);
+	map = __bind(md, table, &limits);
 
 out:
 	mutex_unlock(&md->suspend_lock);
-	return r;
+	return map;
 }
 
 /*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index fc16a351ef7a..b9c6c8ca11be 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -295,8 +295,10 @@ void dm_table_event(struct dm_table *t);
 
 /*
  * The device must be suspended before calling this method.
+ * Returns the previous table, which the caller must destroy.
  */
-int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+struct dm_table *dm_swap_table(struct mapped_device *md,
+			       struct dm_table *t);
 
 /*
  * A wrapper around vmalloc.
-- 
cgit v1.2.3


From 4f186f8bbfa92bf1a2b39f7a8674348bfdba9437 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Thu, 10 Dec 2009 23:52:26 +0000
Subject: dm: rename dm_suspended to dm_suspended_md

This patch renames dm_suspended() to dm_suspended_md() and
keeps it internal to dm.
No functional change.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c         |  8 ++++----
 drivers/md/dm-sysfs.c         |  2 +-
 drivers/md/dm.c               | 12 ++++++------
 drivers/md/dm.h               |  5 +++++
 include/linux/device-mapper.h |  1 -
 5 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 63fd4de25bda..1d669322b27c 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -579,7 +579,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
 			  DM_ACTIVE_PRESENT_FLAG);
 
-	if (dm_suspended(md))
+	if (dm_suspended_md(md))
 		param->flags |= DM_SUSPEND_FLAG;
 
 	param->dev = huge_encode_dev(disk_devt(disk));
@@ -839,7 +839,7 @@ static int do_suspend(struct dm_ioctl *param)
 	if (param->flags & DM_NOFLUSH_FLAG)
 		suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
 
-	if (!dm_suspended(md))
+	if (!dm_suspended_md(md))
 		r = dm_suspend(md, suspend_flags);
 
 	if (!r)
@@ -881,7 +881,7 @@ static int do_resume(struct dm_ioctl *param)
 			suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
 		if (param->flags & DM_NOFLUSH_FLAG)
 			suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
-		if (!dm_suspended(md))
+		if (!dm_suspended_md(md))
 			dm_suspend(md, suspend_flags);
 
 		old_map = dm_swap_table(md, new_map);
@@ -897,7 +897,7 @@ static int do_resume(struct dm_ioctl *param)
 			set_disk_ro(dm_disk(md), 1);
 	}
 
-	if (dm_suspended(md))
+	if (dm_suspended_md(md))
 		r = dm_resume(md);
 
 	if (old_map)
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index b000de38c99a..f53392df7b97 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -59,7 +59,7 @@ static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
 
 static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
 {
-	sprintf(buf, "%d\n", dm_suspended(md));
+	sprintf(buf, "%d\n", dm_suspended_md(md));
 
 	return strlen(buf);
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f2b993c43359..e0702bf37935 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -415,7 +415,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 
 	tgt = dm_table_get_target(map, 0);
 
-	if (dm_suspended(md)) {
+	if (dm_suspended_md(md)) {
 		r = -EAGAIN;
 		goto out;
 	}
@@ -2182,7 +2182,7 @@ void dm_put(struct mapped_device *md)
 			    MINOR(disk_devt(dm_disk(md))));
 		set_bit(DMF_FREEING, &md->flags);
 		spin_unlock(&_minor_lock);
-		if (!dm_suspended(md)) {
+		if (!dm_suspended_md(md)) {
 			dm_table_presuspend_targets(map);
 			dm_table_postsuspend_targets(map);
 		}
@@ -2381,7 +2381,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 	mutex_lock(&md->suspend_lock);
 
 	/* device must be suspended */
-	if (!dm_suspended(md))
+	if (!dm_suspended_md(md))
 		goto out;
 
 	r = dm_calculate_queue_limits(table, &limits);
@@ -2461,7 +2461,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
 	mutex_lock(&md->suspend_lock);
 
-	if (dm_suspended(md)) {
+	if (dm_suspended_md(md)) {
 		r = -EINVAL;
 		goto out_unlock;
 	}
@@ -2568,7 +2568,7 @@ int dm_resume(struct mapped_device *md)
 	struct dm_table *map = NULL;
 
 	mutex_lock(&md->suspend_lock);
-	if (!dm_suspended(md))
+	if (!dm_suspended_md(md))
 		goto out;
 
 	map = dm_get_live_table(md);
@@ -2679,7 +2679,7 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
 	return md;
 }
 
-int dm_suspended(struct mapped_device *md)
+int dm_suspended_md(struct mapped_device *md)
 {
 	return test_bit(DMF_SUSPENDED, &md->flags);
 }
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 604a5f2a2383..8dadaa5bc396 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -93,6 +93,11 @@ int dm_split_args(int *argc, char ***argvp, char *input);
  */
 int dm_deleting_md(struct mapped_device *md);
 
+/*
+ * Is this mapped_device suspended?
+ */
+int dm_suspended_md(struct mapped_device *md);
+
 /*
  * The device-mapper can be driven through one of two interfaces;
  * ioctl or filesystem, depending which patch you have applied.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index b9c6c8ca11be..fca0d31bbf2d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -235,7 +235,6 @@ void dm_uevent_add(struct mapped_device *md, struct list_head *elist);
 const char *dm_device_name(struct mapped_device *md);
 int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
-int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
-- 
cgit v1.2.3


From 64dbce580d5a7e89e8de20b91f80c7267cdad91d Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Thu, 10 Dec 2009 23:52:27 +0000
Subject: dm: export suspended state to targets

This patch adds the exported dm_suspended() function so that targets
can check whether or not they are suspended.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c               | 11 +++++++++++
 include/linux/device-mapper.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e0702bf37935..3167480b532c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2684,6 +2684,17 @@ int dm_suspended_md(struct mapped_device *md)
 	return test_bit(DMF_SUSPENDED, &md->flags);
 }
 
+int dm_suspended(struct dm_target *ti)
+{
+	struct mapped_device *md = dm_table_get_md(ti->table);
+	int r = dm_suspended_md(md);
+
+	dm_put(md);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_suspended);
+
 int dm_noflush_suspending(struct dm_target *ti)
 {
 	struct mapped_device *md = dm_table_get_md(ti->table);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index fca0d31bbf2d..d4c9c0b88adc 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -235,6 +235,7 @@ void dm_uevent_add(struct mapped_device *md, struct list_head *elist);
 const char *dm_device_name(struct mapped_device *md);
 int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
+int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
-- 
cgit v1.2.3


From a88f6667078412e5eff37ead68a043ee0ec9f1da Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 10 Dec 2009 18:35:15 +0100
Subject: dmaengine: clarify the meaning of the DMA_CTRL_ACK flag

DMA_CTRL_ACK's description applies to its clear state, not to its set state.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2b9f2ac7ed60..78784982b33e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -74,7 +74,7 @@ enum dma_transaction_type {
  *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
  *  this transaction
- * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
+ * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
-- 
cgit v1.2.3


From 0f24f1287a86b198c1e4bd4ce45e8565e40ff804 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 11 Dec 2009 15:45:30 +0800
Subject: tracing, slab: Define kmem_cache_alloc_notrace ifdef CONFIG_TRACING

Define kmem_trace_alloc_{,node}_notrace() if CONFIG_TRACING is
enabled, otherwise perf-kmem will show wrong stats ifndef
CONFIG_KMEM_TRACE, because a kmalloc() memory allocation may
be traced by both trace_kmalloc() and trace_kmem_cache_alloc().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: linux-mm@kvack.org <linux-mm@kvack.org>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
LKML-Reference: <4B21F89A.7000801@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/slab_def.h | 4 ++--
 include/linux/slub_def.h | 4 ++--
 mm/slab.c                | 6 +++---
 mm/slub.c                | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 850d057500de..ca6b2b317991 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -110,7 +110,7 @@ extern struct cache_sizes malloc_sizes[];
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
 extern size_t slab_buffer_size(struct kmem_cache *cachep);
 #else
@@ -166,7 +166,7 @@ found:
 extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
 					   gfp_t flags,
 					   int nodeid);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5ad70a60fd74..1e14beb23f9b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -217,7 +217,7 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
 #else
 static __always_inline void *
@@ -266,7 +266,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
 					   gfp_t gfpflags,
 					   int node);
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..9733bb4009d9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -490,7 +490,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
 
 #endif
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 size_t slab_buffer_size(struct kmem_cache *cachep)
 {
 	return cachep->buffer_size;
@@ -3558,7 +3558,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
 {
 	return __cache_alloc(cachep, flags, __builtin_return_address(0));
@@ -3621,7 +3621,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
 				    gfp_t flags,
 				    int nodeid)
diff --git a/mm/slub.c b/mm/slub.c
index 4996fc719552..4a89c3d231b2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1754,7 +1754,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
 {
 	return slab_alloc(s, gfpflags, -1, _RET_IP_);
@@ -1775,7 +1775,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
 
-#ifdef CONFIG_KMEMTRACE
+#ifdef CONFIG_TRACING
 void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
 				    gfp_t gfpflags,
 				    int node)
-- 
cgit v1.2.3


From 99ac64c826e62a07e5818cfde620be4d524f1edf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Dec 2009 11:58:42 +0100
Subject: hw-breakpoints: Handle bad modify_user_hw_breakpoint off-case return
 value

While converting modify_user_hw_breakpoint() return value, we
forgot to handle the off-case. It's not returning a pointer
anymore.

This solves the build warning reported by Stephen Rothwell against
linux-next.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1260529122-6260-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/hw_breakpoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 69f07a9f1277..41235c93e4e9 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -93,7 +93,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    struct task_struct *tsk)	{ return NULL; }
 static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  struct perf_event_attr *attr)	{ return NULL; }
+			  struct perf_event_attr *attr)	{ return -ENOSYS; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
-- 
cgit v1.2.3


From f8b7256096a20436f6d0926747e3ac3d64c81d24 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 30 Nov 2009 17:37:04 -0500
Subject: Unify sys_mmap*

New helper - sys_mmap_pgoff(); switch syscalls to using it.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c               | 19 +++----
 arch/arm/kernel/entry-common.S            |  4 +-
 arch/arm/kernel/sys_arm.c                 | 30 +----------
 arch/avr32/include/asm/syscalls.h         |  4 --
 arch/avr32/kernel/sys_avr32.c             | 31 ------------
 arch/avr32/kernel/syscall-stubs.S         |  2 +-
 arch/blackfin/kernel/sys_bfin.c           | 33 ------------
 arch/blackfin/mach-common/entry.S         |  2 +-
 arch/cris/kernel/sys_cris.c               | 30 ++---------
 arch/frv/kernel/sys_frv.c                 | 66 +-----------------------
 arch/h8300/kernel/sys_h8300.c             | 83 +------------------------------
 arch/h8300/kernel/syscalls.S              |  2 +-
 arch/ia64/kernel/sys_ia64.c               | 37 +-------------
 arch/m32r/kernel/sys_m32r.c               | 24 ---------
 arch/m32r/kernel/syscall_table.S          |  2 +-
 arch/m68k/kernel/sys_m68k.c               | 83 +++----------------------------
 arch/m68knommu/kernel/sys_m68k.c          | 38 +-------------
 arch/m68knommu/kernel/syscalltable.S      |  2 +-
 arch/microblaze/kernel/sys_microblaze.c   | 38 ++------------
 arch/microblaze/kernel/syscall_table.S    |  2 +-
 arch/mips/kernel/linux32.c                | 19 +------
 arch/mips/kernel/syscall.c                | 29 +----------
 arch/mn10300/kernel/entry.S               |  2 +-
 arch/mn10300/kernel/sys_mn10300.c         | 31 +-----------
 arch/parisc/kernel/sys_parisc.c           | 30 ++---------
 arch/powerpc/kernel/syscalls.c            | 15 +-----
 arch/s390/kernel/compat_linux.c           | 32 ++----------
 arch/s390/kernel/sys_s390.c               | 30 +----------
 arch/score/kernel/sys_score.c             | 25 ++--------
 arch/sh/kernel/sys_sh.c                   | 28 +----------
 arch/sparc/kernel/sys_sparc_32.c          | 31 ++----------
 arch/sparc/kernel/sys_sparc_64.c          | 22 +++-----
 arch/um/kernel/syscall.c                  | 28 +----------
 arch/um/sys-i386/shared/sysdep/syscalls.h |  4 --
 arch/x86/ia32/ia32entry.S                 |  2 +-
 arch/x86/ia32/sys_ia32.c                  | 43 +---------------
 arch/x86/include/asm/sys_ia32.h           |  3 --
 arch/x86/include/asm/syscalls.h           |  2 -
 arch/x86/kernel/sys_i386_32.c             | 27 +---------
 arch/x86/kernel/sys_x86_64.c              | 17 +------
 arch/x86/kernel/syscall_table_32.S        |  2 +-
 arch/xtensa/include/asm/syscall.h         |  2 -
 arch/xtensa/include/asm/unistd.h          |  2 +-
 arch/xtensa/kernel/syscall.c              | 25 ----------
 include/linux/syscalls.h                  |  4 ++
 mm/util.c                                 | 29 +++++++++++
 46 files changed, 109 insertions(+), 907 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9a3334ae282e..62619f25132f 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -178,25 +178,18 @@ SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags, unsigned long, fd,
 		unsigned long, off)
 {
-	struct file *file = NULL;
-	unsigned long ret = -EBADF;
+	unsigned long ret = -EINVAL;
 
 #if 0
 	if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED))
 		printk("%s: unimplemented OSF mmap flags %04lx\n", 
 			current->comm, flags);
 #endif
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	down_write(&current->mm->mmap_sem);
-	ret = do_mmap(file, addr, len, prot, flags, off);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
+	if ((off + PAGE_ALIGN(len)) < off)
+		goto out;
+	if (off & ~PAGE_MASK)
+		goto out;
+	ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
  out:
 	return ret;
 }
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f0fe95b7085d..2c1db77d7848 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -416,12 +416,12 @@ sys_mmap2:
 		tst	r5, #PGOFF_MASK
 		moveq	r5, r5, lsr #PAGE_SHIFT - 12
 		streq	r5, [sp, #4]
-		beq	do_mmap2
+		beq	sys_mmap_pgoff
 		mov	r0, #-EINVAL
 		mov	pc, lr
 #else
 		str	r5, [sp, #4]
-		b	do_mmap2
+		b	sys_mmap_pgoff
 #endif
 ENDPROC(sys_mmap2)
 
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 3b897444a9bd..ae4027bd01bd 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,34 +28,6 @@
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
-/* common code for old and new mmaps */
-inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	int error = -EINVAL;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	error = -EBADF;
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 struct mmap_arg_struct {
 	unsigned long addr;
 	unsigned long len;
@@ -77,7 +49,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
diff --git a/arch/avr32/include/asm/syscalls.h b/arch/avr32/include/asm/syscalls.h
index 483d666c27c0..66a197266637 100644
--- a/arch/avr32/include/asm/syscalls.h
+++ b/arch/avr32/include/asm/syscalls.h
@@ -29,10 +29,6 @@ asmlinkage int sys_sigaltstack(const stack_t __user *, stack_t __user *,
 			       struct pt_regs *);
 asmlinkage int sys_rt_sigreturn(struct pt_regs *);
 
-/* kernel/sys_avr32.c */
-asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
-			  unsigned long, unsigned long, off_t);
-
 /* mm/cache.c */
 asmlinkage int sys_cacheflush(int, void __user *, size_t);
 
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
index 5d2daeaf356f..459349b5ed5a 100644
--- a/arch/avr32/kernel/sys_avr32.c
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -5,39 +5,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/mm.h>
 #include <linux/unistd.h>
 
-#include <asm/mman.h>
-#include <asm/uaccess.h>
-#include <asm/syscalls.h>
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, off_t offset)
-{
-	int error = -EBADF;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return error;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, offset);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-	return error;
-}
-
 int kernel_execve(const char *file, char **argv, char **envp)
 {
 	register long scno asm("r8") = __NR_execve;
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
index f7244cd02fbb..0447a3e2ba64 100644
--- a/arch/avr32/kernel/syscall-stubs.S
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -61,7 +61,7 @@ __sys_execve:
 __sys_mmap2:
 	pushm	lr
 	st.w	--sp, ARG6
-	call	sys_mmap2
+	call	sys_mmap_pgoff
 	sub	sp, -4
 	popm	pc
 
diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index afcef129d4e8..2e7f8e10bf87 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -22,39 +22,6 @@
 #include <asm/cacheflush.h>
 #include <asm/dma.h>
 
-/* common code for old and new mmaps */
-static inline long
-do_mmap2(unsigned long addr, unsigned long len,
-	 unsigned long prot, unsigned long flags,
-	 unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
- out:
-	return error;
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, unsigned long pgoff)
-{
-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
 asmlinkage void *sys_sram_alloc(size_t size, unsigned long flags)
 {
 	return sram_alloc_with_lsl(size, flags);
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index a50637a8b9bd..f3f8bb46b517 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1422,7 +1422,7 @@ ENTRY(_sys_call_table)
 	.long _sys_ni_syscall	/* streams2 */
 	.long _sys_vfork		/* 190 */
 	.long _sys_getrlimit
-	.long _sys_mmap2
+	.long _sys_mmap_pgoff
 	.long _sys_truncate64
 	.long _sys_ftruncate64
 	.long _sys_stat64	/* 195 */
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index 2ad962c7e88e..c2bbb1ac98a9 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -26,31 +26,6 @@
 #include <asm/uaccess.h>
 #include <asm/segment.h>
 
-/* common code for old and new mmaps */
-static inline long
-do_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
-        unsigned long flags, unsigned long fd, unsigned long pgoff)
-{
-        int error = -EBADF;
-        struct file * file = NULL;
-
-        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-        if (!(flags & MAP_ANONYMOUS)) {
-                file = fget(fd);
-                if (!file)
-                        goto out;
-        }
-
-        down_write(&current->mm->mmap_sem);
-        error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-        up_write(&current->mm->mmap_sem);
-
-        if (file)
-                fput(file);
-out:
-        return error;
-}
-
 asmlinkage unsigned long old_mmap(unsigned long __user *args)
 {        
 	unsigned long buffer[6];
@@ -63,7 +38,7 @@ asmlinkage unsigned long old_mmap(unsigned long __user *args)
 	if (buffer[5] & ~PAGE_MASK) /* verify that offset is on page boundary */
 		goto out;
 
-	err = do_mmap2(buffer[0], buffer[1], buffer[2], buffer[3],
+	err = sys_mmap_pgoff(buffer[0], buffer[1], buffer[2], buffer[3],
                        buffer[4], buffer[5] >> PAGE_SHIFT);
 out:
 	return err;
@@ -73,7 +48,8 @@ asmlinkage long
 sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
           unsigned long flags, unsigned long fd, unsigned long pgoff)
 {
-        return do_mmap2(addr, len, prot, flags, fd, pgoff);
+	/* bug(?): 8Kb pages here */
+        return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
 /*
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index 2b6b5289cdcc..1d3d4c9e2521 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -31,9 +31,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)
 {
-	int error = -EBADF;
-	struct file * file = NULL;
-
 	/* As with sparc32, make sure the shift for mmap2 is constant
 	   (12), no matter what PAGE_SIZE we have.... */
 
@@ -41,69 +38,10 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	   trying to map something we can't */
 	if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1))
 		return -EINVAL;
-	pgoff >>= PAGE_SHIFT - 12;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
-#if 0 /* DAVIDM - do we want this */
-struct mmap_arg_struct64 {
-	__u32 addr;
-	__u32 len;
-	__u32 prot;
-	__u32 flags;
-	__u64 offset; /* 64 bits */
-	__u32 fd;
-};
-
-asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg)
-{
-	int error = -EFAULT;
-	struct file * file = NULL;
-	struct mmap_arg_struct64 a;
-	unsigned long pgoff;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-
-	if ((long)a.offset & ~PAGE_MASK)
-		return -EINVAL;
-
-	pgoff = a.offset >> PAGE_SHIFT;
-	if ((a.offset >> PAGE_SHIFT) != pgoff)
-		return -EINVAL;
-
-	if (!(a.flags & MAP_ANONYMOUS)) {
-		error = -EBADF;
-		file = fget(a.fd);
-		if (!file)
-			goto out;
-	}
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
-out:
-	return error;
+	return sys_mmap_pgoff(addr, len, prot, flags, fd,
+			      pgoff >> (PAGE_SHIFT - 12));
 }
-#endif
 
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 8cb5d73a0e35..b5969db0ca10 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -26,39 +26,6 @@
 #include <asm/traps.h>
 #include <asm/unistd.h>
 
-/* common code for old and new mmaps */
-static inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
 /*
  * Perform the select(nd, in, out, ex, tv) and mmap() system
  * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
@@ -87,57 +54,11 @@ asmlinkage int old_mmap(struct mmap_arg_struct *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
-#if 0 /* DAVIDM - do we want this */
-struct mmap_arg_struct64 {
-	__u32 addr;
-	__u32 len;
-	__u32 prot;
-	__u32 flags;
-	__u64 offset; /* 64 bits */
-	__u32 fd;
-};
-
-asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg)
-{
-	int error = -EFAULT;
-	struct file * file = NULL;
-	struct mmap_arg_struct64 a;
-	unsigned long pgoff;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-
-	if ((long)a.offset & ~PAGE_MASK)
-		return -EINVAL;
-
-	pgoff = a.offset >> PAGE_SHIFT;
-	if ((a.offset >> PAGE_SHIFT) != pgoff)
-		return -EINVAL;
-
-	if (!(a.flags & MAP_ANONYMOUS)) {
-		error = -EBADF;
-		file = fget(a.fd);
-		if (!file)
-			goto out;
-	}
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			       a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
-#endif
 
 struct sel_arg_struct {
 	unsigned long n;
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index 4eb67faac633..2d69881eda6a 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -206,7 +206,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* streams2 */
 	.long SYMBOL_NAME(sys_vfork)            /* 190 */
 	.long SYMBOL_NAME(sys_getrlimit)
-	.long SYMBOL_NAME(sys_mmap2)
+	.long SYMBOL_NAME(sys_mmap_pgoff)
 	.long SYMBOL_NAME(sys_truncate64)
 	.long SYMBOL_NAME(sys_ftruncate64)
 	.long SYMBOL_NAME(sys_stat64)		/* 195 */
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 92ed83f34036..ae384a2974c2 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -185,39 +185,6 @@ int ia64_mmap_check(unsigned long addr, unsigned long len,
 	return 0;
 }
 
-static inline unsigned long
-do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
-{
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return -EBADF;
-
-		if (!file->f_op || !file->f_op->mmap) {
-			addr = -ENODEV;
-			goto out;
-		}
-	}
-
-	/* Careful about overflows.. */
-	len = PAGE_ALIGN(len);
-	if (!len || len > TASK_SIZE) {
-		addr = -EINVAL;
-		goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-out:	if (file)
-		fput(file);
-	return addr;
-}
-
 /*
  * mmap2() is like mmap() except that the offset is expressed in units
  * of PAGE_SIZE (instead of bytes).  This allows to mmap2() (pieces
@@ -226,7 +193,7 @@ out:	if (file)
 asmlinkage unsigned long
 sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
 {
-	addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 	if (!IS_ERR((void *) addr))
 		force_successful_syscall_return();
 	return addr;
@@ -238,7 +205,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo
 	if (offset_in_page(off) != 0)
 		return -EINVAL;
 
-	addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 	if (!IS_ERR((void *) addr))
 		force_successful_syscall_return();
 	return addr;
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index 305ac852bbed..d3c865c5a6ba 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -76,30 +76,6 @@ asmlinkage int sys_tas(int __user *addr)
 	return oldval;
 }
 
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index aa3bf4cfab37..60536e271233 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -191,7 +191,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall		/* streams2 */
 	.long sys_vfork			/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap2
+	.long sys_mmap_pgoff
 	.long sys_truncate64
 	.long sys_ftruncate64
 	.long sys_stat64		/* 195 */
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7deb402bfc75..218f441de667 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -29,37 +29,16 @@
 #include <asm/page.h>
 #include <asm/unistd.h>
 
-/* common code for old and new mmaps */
-static inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags,
 	unsigned long fd, unsigned long pgoff)
 {
-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
+	/*
+	 * This is wrong for sun3 - there PAGE_SIZE is 8Kb,
+	 * so we need to shift the argument down by 1; m68k mmap64(3)
+	 * (in libc) expects the last argument of mmap2 in 4Kb units.
+	 */
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
 /*
@@ -90,57 +69,11 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
-#if 0
-struct mmap_arg_struct64 {
-	__u32 addr;
-	__u32 len;
-	__u32 prot;
-	__u32 flags;
-	__u64 offset; /* 64 bits */
-	__u32 fd;
-};
-
-asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg)
-{
-	int error = -EFAULT;
-	struct file * file = NULL;
-	struct mmap_arg_struct64 a;
-	unsigned long pgoff;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-
-	if ((long)a.offset & ~PAGE_MASK)
-		return -EINVAL;
-
-	pgoff = a.offset >> PAGE_SHIFT;
-	if ((a.offset >> PAGE_SHIFT) != pgoff)
-		return -EINVAL;
-
-	if (!(a.flags & MAP_ANONYMOUS)) {
-		error = -EBADF;
-		file = fget(a.fd);
-		if (!file)
-			goto out;
-	}
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			       a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
-#endif
 
 struct sel_arg_struct {
 	unsigned long n;
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index efdd090778a3..b67cbc735a9b 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -27,39 +27,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-/* common code for old and new mmaps */
-static inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
 /*
  * Perform the select(nd, in, out, ex, tv) and mmap() system
  * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
@@ -88,9 +55,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+				a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 23535cc415ae..486837efa3d7 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -210,7 +210,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* streams2 */
 	.long sys_vfork		/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap2
+	.long sys_mmap_pgoff
 	.long sys_truncate64
 	.long sys_ftruncate64
 	.long sys_stat64	/* 195 */
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 07cabed4b947..9f3c205fb75b 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -62,46 +62,14 @@ out:
 	return error;
 }
 
-asmlinkage long
-sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	struct file *file = NULL;
-	int ret = -EBADF;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file) {
-			printk(KERN_INFO "no fd in mmap\r\n");
-			goto out;
-		}
-	}
-
-	down_write(&current->mm->mmap_sem);
-	ret = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
-out:
-	return ret;
-}
-
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, off_t pgoff)
 {
-	int err = -EINVAL;
-
-	if (pgoff & ~PAGE_MASK) {
-		printk(KERN_INFO "no pagemask in mmap\r\n");
-		goto out;
-	}
+	if (pgoff & ~PAGE_MASK)
+		return -EINVAL;
 
-	err = sys_mmap2(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT);
-out:
-	return err;
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT);
 }
 
 /*
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index c1ab1dc10898..b96f365ea6b1 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -196,7 +196,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall		/* reserved for streams2 */
 	.long sys_vfork		/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap2			/* mmap2 */
+	.long sys_mmap_pgoff		/* mmap2 */
 	.long sys_truncate64
 	.long sys_ftruncate64
 	.long sys_stat64		/* 195 */
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 1a2793efdc4e..f042563c924f 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -67,28 +67,13 @@ SYSCALL_DEFINE6(32_mmap2, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags, unsigned long, fd,
 	unsigned long, pgoff)
 {
-	struct file * file = NULL;
 	unsigned long error;
 
 	error = -EINVAL;
 	if (pgoff & (~PAGE_MASK >> 12))
 		goto out;
-	pgoff >>= PAGE_SHIFT-12;
-
-	if (!(flags & MAP_ANONYMOUS)) {
-		error = -EBADF;
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
-
+	error = sys_mmap_pgoff(addr, len, prot, flags, fd,
+			       pgoff >> (PAGE_SHIFT-12));
 out:
 	return error;
 }
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index fe0d79805603..c25b2e7dcb7b 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -129,31 +129,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	}
 }
 
-/* common code for old and new mmaps */
-static inline unsigned long
-do_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
-        unsigned long flags, unsigned long fd, unsigned long pgoff)
-{
-	unsigned long error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags, unsigned long,
 	fd, off_t, offset)
@@ -164,7 +139,7 @@ SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
 	if (offset & ~PAGE_MASK)
 		goto out;
 
-	result = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+	result = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 
 out:
 	return result;
@@ -177,7 +152,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len,
 	if (pgoff & (~PAGE_MASK >> 12))
 		return -EINVAL;
 
-	return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12));
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12));
 }
 
 save_static_function(sys_fork);
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index a94e7ea3faa6..c9ee6c009d79 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -578,7 +578,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* reserved for streams2 */
 	.long sys_vfork		/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap2
+	.long sys_mmap_pgoff
 	.long sys_truncate64
 	.long sys_ftruncate64
 	.long sys_stat64	/* 195 */
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index ec4100dfcb7d..17cc6ce04e84 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -23,42 +23,13 @@
 
 #include <asm/uaccess.h>
 
-/*
- * memory mapping syscall
- */
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, unsigned long pgoff)
-{
-	struct file *file = NULL;
-	long error = -EINVAL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	error = -EBADF;
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long fd, unsigned long offset)
 {
 	if (offset & ~PAGE_MASK)
 		return -EINVAL;
-	return sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
 
 struct sel_arg_struct {
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 71b31957c8f1..9147391afb03 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -110,37 +110,14 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	return addr;
 }
 
-static unsigned long do_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags, unsigned long fd,
-	unsigned long pgoff)
-{
-	struct file * file = NULL;
-	unsigned long error = -EBADF;
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file != NULL)
-		fput(file);
-out:
-	return error;
-}
-
 asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
 	unsigned long pgoff)
 {
 	/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
 	   we have. */
-	return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT - 12));
+	return sys_mmap_pgoff(addr, len, prot, flags, fd,
+			      pgoff >> (PAGE_SHIFT - 12));
 }
 
 asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
@@ -148,7 +125,8 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
 		unsigned long offset)
 {
 	if (!(offset & ~PAGE_MASK)) {
-		return do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+		return sys_mmap_pgoff(addr, len, prot, flags, fd,
+					offset >> PAGE_SHIFT);
 	} else {
 		return -EINVAL;
 	}
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index c04832c4a02e..3370e62e43d4 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -140,7 +140,6 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long off, int shift)
 {
-	struct file * file = NULL;
 	unsigned long ret = -EINVAL;
 
 	if (!arch_validate_prot(prot))
@@ -151,20 +150,8 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len,
 			goto out;
 		off >>= shift;
 	}
-		
-	ret = -EBADF;
-	if (!(flags & MAP_ANONYMOUS)) {
-		if (!(file = fget(fd)))
-			goto out;
-	}
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-	down_write(&current->mm->mmap_sem);
-	ret = do_mmap_pgoff(file, addr, len, prot, flags, off);
-	up_write(&current->mm->mmap_sem);
-	if (file)
-		fput(file);
+	ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off);
 out:
 	return ret;
 }
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 11556aa6bf17..22c9e557bb22 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -624,33 +624,6 @@ struct mmap_arg_struct_emu31 {
 	u32	offset;
 };
 
-/* common code for old and new mmaps */
-static inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	struct file * file = NULL;
-	unsigned long error = -EBADF;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:    
-	return error;
-}
-
-
 asmlinkage unsigned long
 old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
 {
@@ -664,7 +637,8 @@ old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); 
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			       a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
@@ -677,7 +651,7 @@ sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
 
 	if (copy_from_user(&a, arg, sizeof(a)))
 		goto out;
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
 out:
 	return error;
 }
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index e9d94f61d500..86a74c9c9e63 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -32,32 +32,6 @@
 #include <asm/uaccess.h>
 #include "entry.h"
 
-/* common code for old and new mmaps */
-static inline long do_mmap2(
-	unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	long error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 /*
  * Perform the select(nd, in, out, ex, tv) and mmap() system
  * calls. Linux for S/390 isn't able to handle more than 5
@@ -81,7 +55,7 @@ SYSCALL_DEFINE1(mmap2, struct mmap_arg_struct __user *, arg)
 
 	if (copy_from_user(&a, arg, sizeof(a)))
 		goto out;
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
 out:
 	return error;
 }
@@ -98,7 +72,7 @@ SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct __user *, arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
 out:
 	return error;
 }
diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c
index 001249469866..3d6a67dd628c 100644
--- a/arch/score/kernel/sys_score.c
+++ b/arch/score/kernel/sys_score.c
@@ -36,34 +36,15 @@ asmlinkage long
 sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
 	  unsigned long flags, unsigned long fd, unsigned long pgoff)
 {
-	int error = -EBADF;
-	struct file *file = NULL;
-
-	if (pgoff & (~PAGE_MASK >> 12))
-		return -EINVAL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return error;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-
-	return error;
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
 asmlinkage long
 sys_mmap(unsigned long addr, unsigned long len, unsigned long prot,
 	unsigned long flags, unsigned long fd, off_t pgoff)
 {
-	return sys_mmap2(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT);
+	/* where's the alignment check? */
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT);
 }
 
 asmlinkage long
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 8aa5d1ceaf14..71399cde03b5 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -28,37 +28,13 @@
 #include <asm/cacheflush.h>
 #include <asm/cachectl.h>
 
-static inline long
-do_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
-	 unsigned long flags, int fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 asmlinkage int old_mmap(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags,
 	int fd, unsigned long off)
 {
 	if (off & ~PAGE_MASK)
 		return -EINVAL;
-	return do_mmap2(addr, len, prot, flags, fd, off>>PAGE_SHIFT);
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, off>>PAGE_SHIFT);
 }
 
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
@@ -74,7 +50,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 
 	pgoff >>= PAGE_SHIFT - 12;
 
-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
 /*
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 10c43bea32c7..36f6f26d9cec 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -234,31 +234,6 @@ int sparc_mmap_check(unsigned long addr, unsigned long len)
 }
 
 /* Linux version of mmap */
-static unsigned long do_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags, unsigned long fd,
-	unsigned long pgoff)
-{
-	struct file * file = NULL;
-	unsigned long retval = -EBADF;
-
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	len = PAGE_ALIGN(len);
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return retval;
-}
 
 asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
@@ -266,14 +241,16 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
 {
 	/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
 	   we have. */
-	return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT - 12));
+	return sys_mmap_pgoff(addr, len, prot, flags, fd,
+			      pgoff >> (PAGE_SHIFT - 12));
 }
 
 asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
 	unsigned long off)
 {
-	return do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	/* no alignment check? */
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
 
 long sparc_remap_file_pages(unsigned long start, unsigned long size,
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index d498b32c75f6..8f9cd58497de 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -572,23 +572,13 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags, unsigned long, fd,
 		unsigned long, off)
 {
-	struct file * file = NULL;
-	unsigned long retval = -EBADF;
+	unsigned long retval = -EINVAL;
 
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	len = PAGE_ALIGN(len);
-
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap(file, addr, len, prot, flags, off);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
+	if ((off + PAGE_ALIGN(len)) < off)
+		goto out;
+	if (off & ~PAGE_MASK)
+		goto out;
+	retval = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 out:
 	return retval;
 }
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a4625c7b2bf9..cccab850c27e 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -8,6 +8,7 @@
 #include "linux/mm.h"
 #include "linux/sched.h"
 #include "linux/utsname.h"
+#include "linux/syscalls.h"
 #include "asm/current.h"
 #include "asm/mman.h"
 #include "asm/uaccess.h"
@@ -37,31 +38,6 @@ long sys_vfork(void)
 	return ret;
 }
 
-/* common code for old and new mmaps */
-long sys_mmap2(unsigned long addr, unsigned long len,
-	       unsigned long prot, unsigned long flags,
-	       unsigned long fd, unsigned long pgoff)
-{
-	long error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
- out:
-	return error;
-}
-
 long old_mmap(unsigned long addr, unsigned long len,
 	      unsigned long prot, unsigned long flags,
 	      unsigned long fd, unsigned long offset)
@@ -70,7 +46,7 @@ long old_mmap(unsigned long addr, unsigned long len,
 	if (offset & ~PAGE_MASK)
 		goto out;
 
-	err = sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+	err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
  out:
 	return err;
 }
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h
index 905698197e35..e7787679e317 100644
--- a/arch/um/sys-i386/shared/sysdep/syscalls.h
+++ b/arch/um/sys-i386/shared/sysdep/syscalls.h
@@ -20,7 +20,3 @@ extern syscall_handler_t *sys_call_table[];
 #define EXECUTE_SYSCALL(syscall, regs) \
 	((long (*)(struct syscall_args)) \
 	 (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
-
-extern long sys_mmap2(unsigned long addr, unsigned long len,
-		      unsigned long prot, unsigned long flags,
-		      unsigned long fd, unsigned long pgoff);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4eefdca9832b..53147ad85b96 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -696,7 +696,7 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall		/* streams2 */
 	.quad stub32_vfork            /* 190 */
 	.quad compat_sys_getrlimit
-	.quad sys32_mmap2
+	.quad sys_mmap_pgoff
 	.quad sys32_truncate64
 	.quad sys32_ftruncate64
 	.quad sys32_stat64		/* 195 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index df82c0e48ded..422572c77923 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -155,9 +155,6 @@ struct mmap_arg_struct {
 asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
 {
 	struct mmap_arg_struct a;
-	struct file *file = NULL;
-	unsigned long retval;
-	struct mm_struct *mm ;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
 		return -EFAULT;
@@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
 	if (a.offset & ~PAGE_MASK)
 		return -EINVAL;
 
-	if (!(a.flags & MAP_ANONYMOUS)) {
-		file = fget(a.fd);
-		if (!file)
-			return -EBADF;
-	}
-
-	mm = current->mm;
-	down_write(&mm->mmap_sem);
-	retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
 			       a.offset>>PAGE_SHIFT);
-	if (file)
-		fput(file);
-
-	up_write(&mm->mmap_sem);
-
-	return retval;
 }
 
 asmlinkage long sys32_mprotect(unsigned long start, size_t len,
@@ -483,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
 	return ret;
 }
 
-asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
-			    unsigned long prot, unsigned long flags,
-			    unsigned long fd, unsigned long pgoff)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long error;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return -EBADF;
-	}
-
-	down_write(&mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&mm->mmap_sem);
-
-	if (file)
-		fput(file);
-	return error;
-}
-
 asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
 {
 	char *arch = "x86_64";
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 9af9decb38c3..4a5a089e1c62 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -57,9 +57,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
-			    unsigned long, unsigned long, unsigned long);
-
 struct oldold_utsname;
 struct old_utsname;
 asmlinkage long sys32_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 372b76edd63f..1bb6e395881c 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -55,8 +55,6 @@ struct sel_arg_struct;
 struct oldold_utsname;
 struct old_utsname;
 
-asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
-			  unsigned long, unsigned long, unsigned long);
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
 asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 1884a8d12bfa..dee1ff7cba58 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,31 +24,6 @@
 
 #include <asm/syscalls.h>
 
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file *file = NULL;
-	struct mm_struct *mm = current->mm;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 /*
  * Perform the select(nd, in, out, ex, tv) and mmap() system
  * calls. Linux/i386 didn't use to be able to handle more than
@@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
 	if (a.offset & ~PAGE_MASK)
 		goto out;
 
-	err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+	err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
 			a.fd, a.offset >> PAGE_SHIFT);
 out:
 	return err;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 45e00eb09c3a..8aa2057efd12 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, fd, unsigned long, off)
 {
 	long error;
-	struct file *file;
-
 	error = -EINVAL;
 	if (off & ~PAGE_MASK)
 		goto out;
 
-	error = -EBADF;
-	file = NULL;
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
+	error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 out:
 	return error;
 }
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 70c2125d55b9..15228b5d3eb7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -191,7 +191,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* reserved for streams2 */
 	.long ptregs_vfork	/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap2
+	.long sys_mmap_pgoff
 	.long sys_truncate64
 	.long sys_ftruncate64
 	.long sys_stat64	/* 195 */
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 05cebf8f62b1..4352dbe1186a 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -13,8 +13,6 @@ struct sigaction;
 asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*);
 asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
 asmlinkage long xtensa_pipe(int __user *);
-asmlinkage long xtensa_mmap2(unsigned long, unsigned long, unsigned long,
-    			     unsigned long, unsigned long, unsigned long);
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);
 asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 4e55dc763021..fbf318b3af3e 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -189,7 +189,7 @@ __SYSCALL( 79, sys_fremovexattr, 2)
 /* File Map / Shared Memory Operations */
 
 #define __NR_mmap2 				 80
-__SYSCALL( 80, xtensa_mmap2, 6)
+__SYSCALL( 80, sys_mmap_pgoff, 6)
 #define __NR_munmap 				 81
 __SYSCALL( 81, sys_munmap, 2)
 #define __NR_mprotect 				 82
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index ac15ecbdf919..1e67bab775c1 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -57,31 +57,6 @@ asmlinkage long xtensa_pipe(int __user *userfds)
 	return error;
 }
 
-
-asmlinkage long xtensa_mmap2(unsigned long addr, unsigned long len,
-   			     unsigned long prot, unsigned long flags,
-			     unsigned long fd, unsigned long pgoff)
-{
-	int error = -EBADF;
-	struct file * file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-	if (file)
-		fput(file);
-out:
-	return error;
-}
-
 asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
 {
 	unsigned long ret;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bc70c5810fec..939a61507ac5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -834,4 +834,8 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 asmlinkage long sys_perf_event_open(
 		struct perf_event_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
+
+asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			unsigned long fd, unsigned long pgoff);
 #endif
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..3bf81b294ae8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,6 +4,10 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/hugetlb.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
 #include <asm/uaccess.h>
 
 #define CREATE_TRACE_POINTS
@@ -268,6 +272,31 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, pgoff)
+{
+	struct file * file = NULL;
+	unsigned long retval = -EBADF;
+
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+	down_write(&current->mm->mmap_sem);
+	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return retval;
+}
+
 /* Tracepoints definitions. */
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-- 
cgit v1.2.3


From 03889384cee7a198a79447c1ea6aca2c8e54d155 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Dec 2009 09:48:22 -0500
Subject: tracing: Add trace_dump_stack()

I've been asked a few times about how to find out what is calling
some location in the kernel. One way is to use dynamic function tracing
and implement the func_stack_trace. But this only finds out who is
calling a particular function. It does not tell you who is calling
that function and entering a specific if conditional.

I have myself implemented a quick version of trace_dump_stack() for
this purpose a few times, and just needed it now. This is when I realized
that this would be a good tool to have in the kernel like trace_printk().

Using trace_dump_stack() is similar to dump_stack() except that it
writes to the trace buffer instead and can be used in critical locations.

For example:

@@ -5485,8 +5485,12 @@ need_resched_nonpreemptible:
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
-		else
+		else {
 			deactivate_task(rq, prev, 1);
+			trace_printk("Deactivating task %s:%d\n",
+				     prev->comm, prev->pid);
+			trace_dump_stack();
+		}
 		switch_count = &prev->nvcsw;
 	}

Produces:

           <...>-3249  [001]   296.105269: schedule: Deactivating task ntpd:3249
           <...>-3249  [001]   296.105270: <stack trace>
 => schedule
 => schedule_hrtimeout_range
 => poll_schedule_timeout
 => do_select
 => core_sys_select
 => sys_select
 => system_call_fastpath

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h |  2 ++
 kernel/trace/trace.c   | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fa4c590cf12..5ad4199fb073 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -492,6 +492,8 @@ extern int
 __trace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 
+extern void trace_dump_stack(void);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..f531301b7a3b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1151,6 +1151,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+	unsigned long flags;
+
+	if (tracing_disabled || tracing_selftest_running)
+		return 0;
+
+	local_save_flags(flags);
+
+	/* skipping 3 traces, seems to get us at the caller of this function */
+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
-- 
cgit v1.2.3


From 073120cc28ad9f6003452c8bb9d15a87b1820201 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 28 Oct 2009 19:51:17 +0100
Subject: Driver Core: devtmpfs: use sys_mount()

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/devtmpfs.c | 9 ++-------
 include/linux/device.h  | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 1cf498fd2b52..880a203b6688 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -332,9 +332,8 @@ out:
  * If configured, or requested by the commandline, devtmpfs will be
  * auto-mounted after the kernel mounted the root filesystem.
  */
-int devtmpfs_mount(const char *mountpoint)
+int devtmpfs_mount(const char *mntdir)
 {
-	struct path path;
 	int err;
 
 	if (!dev_mount)
@@ -343,15 +342,11 @@ int devtmpfs_mount(const char *mountpoint)
 	if (!dev_mnt)
 		return 0;
 
-	err = kern_path(mountpoint, LOOKUP_FOLLOW, &path);
-	if (err)
-		return err;
-	err = do_add_mount(dev_mnt, &path, 0, NULL);
+	err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL);
 	if (err)
 		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
 	else
 		printk(KERN_INFO "devtmpfs: mounted\n");
-	path_put(&path);
 	return err;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 2ea3e4921812..2a73d9bcbc9c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -558,7 +558,7 @@ extern void wait_for_device_probe(void);
 #ifdef CONFIG_DEVTMPFS
 extern int devtmpfs_create_node(struct device *dev);
 extern int devtmpfs_delete_node(struct device *dev);
-extern int devtmpfs_mount(const char *mountpoint);
+extern int devtmpfs_mount(const char *mntdir);
 #else
 static inline int devtmpfs_create_node(struct device *dev) { return 0; }
 static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
-- 
cgit v1.2.3


From 9ebfbd45f9d4ee9cd72529cf99e5f300eb398e67 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 29 Oct 2009 12:36:02 +0100
Subject: firmware_class: make request_firmware_nowait more useful

Unfortunately, one cannot hold on to the struct firmware
that request_firmware_nowait() hands off, which is needed
in some cases. Allow this by requiring the callback to
free it (via release_firmware).

Additionally, give it a gfp_t parameter -- all the current
users call it from a GFP_KERNEL context so the GFP_ATOMIC
isn't necessary. This also marks an API break which is
useful in a sense, although that is obviously not the
primary purpose of this change.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Cc: Pavel Roskin <proski@gnu.org>
Cc: Abhay Salunke <abhay_salunke@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/firmware_class.c               | 14 ++++++--------
 drivers/firmware/dell_rbu.c                 |  9 +++++++--
 drivers/serial/ucc_uart.c                   |  8 +++++---
 drivers/staging/comedi/drivers/usbdux.c     |  5 ++++-
 drivers/staging/comedi/drivers/usbduxfast.c |  5 ++++-
 drivers/usb/atm/ueagle-atm.c                |  7 ++++---
 include/linux/firmware.h                    |  5 +++--
 7 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 7376367bcb80..a95024166b66 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -601,12 +601,9 @@ request_firmware_work_func(void *arg)
 	}
 	ret = _request_firmware(&fw, fw_work->name, fw_work->device,
 		fw_work->uevent);
-	if (ret < 0)
-		fw_work->cont(NULL, fw_work->context);
-	else {
-		fw_work->cont(fw, fw_work->context);
-		release_firmware(fw);
-	}
+
+	fw_work->cont(fw, fw_work->context);
+
 	module_put(fw_work->module);
 	kfree(fw_work);
 	return ret;
@@ -619,6 +616,7 @@ request_firmware_work_func(void *arg)
  *	is non-zero else the firmware copy must be done manually.
  * @name: name of firmware file
  * @device: device for which firmware is being loaded
+ * @gfp: allocation flags
  * @context: will be passed over to @cont, and
  *	@fw may be %NULL if firmware request fails.
  * @cont: function will be called asynchronously when the firmware
@@ -631,12 +629,12 @@ request_firmware_work_func(void *arg)
 int
 request_firmware_nowait(
 	struct module *module, int uevent,
-	const char *name, struct device *device, void *context,
+	const char *name, struct device *device, gfp_t gfp, void *context,
 	void (*cont)(const struct firmware *fw, void *context))
 {
 	struct task_struct *task;
 	struct firmware_work *fw_work = kmalloc(sizeof (struct firmware_work),
-						GFP_ATOMIC);
+						gfp);
 
 	if (!fw_work)
 		return -ENOMEM;
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index b4704e150b28..b3a0cf57442e 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -544,9 +544,12 @@ static void callbackfn_rbu(const struct firmware *fw, void *context)
 {
 	rbu_data.entry_created = 0;
 
-	if (!fw || !fw->size)
+	if (!fw)
 		return;
 
+	if (!fw->size)
+		goto out;
+
 	spin_lock(&rbu_data.lock);
 	if (!strcmp(image_type, "mono")) {
 		if (!img_update_realloc(fw->size))
@@ -568,6 +571,8 @@ static void callbackfn_rbu(const struct firmware *fw, void *context)
 	} else
 		pr_debug("invalid image type specified.\n");
 	spin_unlock(&rbu_data.lock);
+ out:
+	release_firmware(fw);
 }
 
 static ssize_t read_rbu_image_type(struct kobject *kobj,
@@ -615,7 +620,7 @@ static ssize_t write_rbu_image_type(struct kobject *kobj,
 			spin_unlock(&rbu_data.lock);
 			req_firm_rc = request_firmware_nowait(THIS_MODULE,
 				FW_ACTION_NOHOTPLUG, "dell_rbu",
-				&rbu_device->dev, &context,
+				&rbu_device->dev, GFP_KERNEL, &context,
 				callbackfn_rbu);
 			if (req_firm_rc) {
 				printk(KERN_ERR
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 46de564aaea0..465f2fae1025 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -1179,16 +1179,18 @@ static void uart_firmware_cont(const struct firmware *fw, void *context)
 
 	if (firmware->header.length != fw->size) {
 		dev_err(dev, "invalid firmware\n");
-		return;
+		goto out;
 	}
 
 	ret = qe_upload_firmware(firmware);
 	if (ret) {
 		dev_err(dev, "could not load firmware\n");
-		return;
+		goto out;
 	}
 
 	firmware_loaded = 1;
+ out:
+	release_firmware(fw);
 }
 
 static int ucc_uart_probe(struct of_device *ofdev,
@@ -1247,7 +1249,7 @@ static int ucc_uart_probe(struct of_device *ofdev,
 			 */
 			ret = request_firmware_nowait(THIS_MODULE,
 				FW_ACTION_HOTPLUG, filename, &ofdev->dev,
-				&ofdev->dev, uart_firmware_cont);
+				GFP_KERNEL, &ofdev->dev, uart_firmware_cont);
 			if (ret) {
 				dev_err(&ofdev->dev,
 					"could not load firmware %s\n",
diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c
index cca4e869f0ec..dfcd12bec86b 100644
--- a/drivers/staging/comedi/drivers/usbdux.c
+++ b/drivers/staging/comedi/drivers/usbdux.c
@@ -2327,9 +2327,11 @@ static void usbdux_firmware_request_complete_handler(const struct firmware *fw,
 	if (ret) {
 		dev_err(&usbdev->dev,
 			"Could not upload firmware (err=%d)\n", ret);
-		return;
+		goto out;
 	}
 	comedi_usb_auto_config(usbdev, BOARDNAME);
+ out:
+	release_firmware(fw);
 }
 
 /* allocate memory for the urbs and initialise them */
@@ -2580,6 +2582,7 @@ static int usbduxsub_probe(struct usb_interface *uinterf,
 				      FW_ACTION_HOTPLUG,
 				      "usbdux_firmware.bin",
 				      &udev->dev,
+				      GFP_KERNEL,
 				      usbduxsub + index,
 				      usbdux_firmware_request_complete_handler);
 
diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c
index d143222579c2..2e675cce7dbf 100644
--- a/drivers/staging/comedi/drivers/usbduxfast.c
+++ b/drivers/staging/comedi/drivers/usbduxfast.c
@@ -1451,10 +1451,12 @@ static void usbduxfast_firmware_request_complete_handler(const struct firmware
 	if (ret) {
 		dev_err(&usbdev->dev,
 			"Could not upload firmware (err=%d)\n", ret);
-		return;
+		goto out;
 	}
 
 	comedi_usb_auto_config(usbdev, BOARDNAME);
+ out:
+	release_firmware(fw);
 }
 
 /*
@@ -1569,6 +1571,7 @@ static int usbduxfastsub_probe(struct usb_interface *uinterf,
 				      FW_ACTION_HOTPLUG,
 				      "usbduxfast_firmware.bin",
 				      &udev->dev,
+				      GFP_KERNEL,
 				      usbduxfastsub + index,
 				      usbduxfast_firmware_request_complete_handler);
 
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index bba4d3eabe0f..c5395246886d 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -667,12 +667,12 @@ static void uea_upload_pre_firmware(const struct firmware *fw_entry, void *conte
 	else
 		uea_info(usb, "firmware uploaded\n");
 
-	uea_leaves(usb);
-	return;
+	goto err;
 
 err_fw_corrupted:
 	uea_err(usb, "firmware is corrupted\n");
 err:
+	release_firmware(fw_entry);
 	uea_leaves(usb);
 }
 
@@ -705,7 +705,8 @@ static int uea_load_firmware(struct usb_device *usb, unsigned int ver)
 		break;
 	}
 
-	ret = request_firmware_nowait(THIS_MODULE, 1, fw_name, &usb->dev, usb, uea_upload_pre_firmware);
+	ret = request_firmware_nowait(THIS_MODULE, 1, fw_name, &usb->dev,
+				      GFP_KERNEL, usb, uea_upload_pre_firmware);
 	if (ret)
 		uea_err(usb, "firmware %s is not available\n", fw_name);
 	else
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index d31544628436..043811f0d277 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/gfp.h>
 
 #define FW_ACTION_NOHOTPLUG 0
 #define FW_ACTION_HOTPLUG 1
@@ -38,7 +39,7 @@ int request_firmware(const struct firmware **fw, const char *name,
 		     struct device *device);
 int request_firmware_nowait(
 	struct module *module, int uevent,
-	const char *name, struct device *device, void *context,
+	const char *name, struct device *device, gfp_t gfp, void *context,
 	void (*cont)(const struct firmware *fw, void *context));
 
 void release_firmware(const struct firmware *fw);
@@ -51,7 +52,7 @@ static inline int request_firmware(const struct firmware **fw,
 }
 static inline int request_firmware_nowait(
 	struct module *module, int uevent,
-	const char *name, struct device *device, void *context,
+	const char *name, struct device *device, gfp_t gfp, void *context,
 	void (*cont)(const struct firmware *fw, void *context))
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 832b6af198aefe6034310e124594cc8b833c0ef9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Nov 2009 16:08:56 -0800
Subject: sysfs: Propagate renames to the vfs on demand

By teaching sysfs_revalidate to hide a dentry for
a sysfs_dirent if the sysfs_dirent has been renamed,
and by teaching sysfs_lookup to return the original
dentry if the sysfs dirent has been renamed.  I can
show the results of renames correctly without having to
update the dcache during the directory rename.

This massively simplifies the rename logic allowing a lot
of weird sysfs special cases to be removed along with
a lot of now unnecesary helper code.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/namei.c            |  22 -------
 fs/sysfs/dir.c        | 158 ++++++++++----------------------------------------
 fs/sysfs/inode.c      |  12 ----
 fs/sysfs/sysfs.h      |   1 -
 include/linux/namei.h |   1 -
 5 files changed, 32 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e9..d3c190c35fcc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1279,28 +1279,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 	return __lookup_hash(&this, base, NULL);
 }
 
-/**
- * lookup_one_noperm - bad hack for sysfs
- * @name:	pathname component to lookup
- * @base:	base directory to lookup from
- *
- * This is a variant of lookup_one_len that doesn't perform any permission
- * checks.   It's a horrible hack to work around the braindead sysfs
- * architecture and should not be used anywhere else.
- *
- * DON'T USE THIS FUNCTION EVER, thanks.
- */
-struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
-{
-	int err;
-	struct qstr this;
-
-	err = __lookup_one_len(name, &this, base, strlen(name));
-	if (err)
-		return ERR_PTR(err);
-	return __lookup_hash(&this, base, NULL);
-}
-
 int user_path_at(int dfd, const char __user *name, unsigned flags,
 		 struct path *path)
 {
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f3af45e47eaa..97954c69ff0e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -25,7 +25,6 @@
 #include "sysfs.h"
 
 DEFINE_MUTEX(sysfs_mutex);
-DEFINE_MUTEX(sysfs_rename_mutex);
 DEFINE_SPINLOCK(sysfs_assoc_lock);
 
 static DEFINE_SPINLOCK(sysfs_ino_lock);
@@ -84,46 +83,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 	}
 }
 
-/**
- *	sysfs_get_dentry - get dentry for the given sysfs_dirent
- *	@sd: sysfs_dirent of interest
- *
- *	Get dentry for @sd.  Dentry is looked up if currently not
- *	present.  This function descends from the root looking up
- *	dentry for each step.
- *
- *	LOCKING:
- *	mutex_lock(sysfs_rename_mutex)
- *
- *	RETURNS:
- *	Pointer to found dentry on success, ERR_PTR() value on error.
- */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
-{
-	struct dentry *dentry = dget(sysfs_sb->s_root);
-
-	while (dentry->d_fsdata != sd) {
-		struct sysfs_dirent *cur;
-		struct dentry *parent;
-
-		/* find the first ancestor which hasn't been looked up */
-		cur = sd;
-		while (cur->s_parent != dentry->d_fsdata)
-			cur = cur->s_parent;
-
-		/* look it up */
-		parent = dentry;
-		mutex_lock(&parent->d_inode->i_mutex);
-		dentry = lookup_one_noperm(cur->s_name, parent);
-		mutex_unlock(&parent->d_inode->i_mutex);
-		dput(parent);
-
-		if (IS_ERR(dentry))
-			break;
-	}
-	return dentry;
-}
-
 /**
  *	sysfs_get_active - get an active reference to sysfs_dirent
  *	@sd: sysfs_dirent to get an active reference to
@@ -315,6 +274,14 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (sd->s_flags & SYSFS_FLAG_REMOVED)
 		goto out_bad;
 
+	/* The sysfs dirent has been moved? */
+	if (dentry->d_parent->d_fsdata != sd->s_parent)
+		goto out_bad;
+
+	/* The sysfs dirent has been renamed */
+	if (strcmp(dentry->d_name.name, sd->s_name) != 0)
+		goto out_bad;
+
 	mutex_unlock(&sysfs_mutex);
 out_valid:
 	return 1;
@@ -322,6 +289,12 @@ out_bad:
 	/* Remove the dentry from the dcache hashes.
 	 * If this is a deleted dentry we use d_drop instead of d_delete
 	 * so sysfs doesn't need to cope with negative dentries.
+	 *
+	 * If this is a dentry that has simply been renamed we
+	 * use d_drop to remove it from the dcache lookup on its
+	 * old parent.  If this dentry persists later when a lookup
+	 * is performed at its new name the dentry will be readded
+	 * to the dcache hashes.
 	 */
 	is_dir = (sysfs_type(sd) == SYSFS_DIR);
 	mutex_unlock(&sysfs_mutex);
@@ -705,10 +678,15 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	/* instantiate and hash dentry */
-	dentry->d_op = &sysfs_dentry_ops;
-	dentry->d_fsdata = sysfs_get(sd);
-	d_instantiate(dentry, inode);
-	d_rehash(dentry);
+	ret = d_find_alias(inode);
+	if (!ret) {
+		dentry->d_op = &sysfs_dentry_ops;
+		dentry->d_fsdata = sysfs_get(sd);
+		d_add(dentry, inode);
+	} else {
+		d_move(ret, dentry);
+		iput(inode);
+	}
 
  out_unlock:
 	mutex_unlock(&sysfs_mutex);
@@ -785,62 +763,32 @@ void sysfs_remove_dir(struct kobject * kobj)
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 {
 	struct sysfs_dirent *sd = kobj->sd;
-	struct dentry *parent = NULL;
-	struct dentry *old_dentry = NULL, *new_dentry = NULL;
 	const char *dup_name = NULL;
 	int error;
 
-	mutex_lock(&sysfs_rename_mutex);
+	mutex_lock(&sysfs_mutex);
 
 	error = 0;
 	if (strcmp(sd->s_name, new_name) == 0)
 		goto out;	/* nothing to rename */
 
-	/* get the original dentry */
-	old_dentry = sysfs_get_dentry(sd);
-	if (IS_ERR(old_dentry)) {
-		error = PTR_ERR(old_dentry);
-		old_dentry = NULL;
-		goto out;
-	}
-
-	parent = old_dentry->d_parent;
-
-	/* lock parent and get dentry for new name */
-	mutex_lock(&parent->d_inode->i_mutex);
-	mutex_lock(&sysfs_mutex);
-
 	error = -EEXIST;
 	if (sysfs_find_dirent(sd->s_parent, new_name))
-		goto out_unlock;
-
-	error = -ENOMEM;
-	new_dentry = d_alloc_name(parent, new_name);
-	if (!new_dentry)
-		goto out_unlock;
+		goto out;
 
 	/* rename sysfs_dirent */
 	error = -ENOMEM;
 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
 	if (!new_name)
-		goto out_unlock;
+		goto out;
 
 	dup_name = sd->s_name;
 	sd->s_name = new_name;
 
-	/* rename */
-	d_add(new_dentry, NULL);
-	d_move(old_dentry, new_dentry);
-
 	error = 0;
- out_unlock:
+ out:
 	mutex_unlock(&sysfs_mutex);
-	mutex_unlock(&parent->d_inode->i_mutex);
 	kfree(dup_name);
-	dput(old_dentry);
-	dput(new_dentry);
- out:
-	mutex_unlock(&sysfs_rename_mutex);
 	return error;
 }
 
@@ -848,12 +796,11 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
 	struct sysfs_dirent *sd = kobj->sd;
 	struct sysfs_dirent *new_parent_sd;
-	struct dentry *old_parent, *new_parent = NULL;
-	struct dentry *old_dentry = NULL, *new_dentry = NULL;
 	int error;
 
-	mutex_lock(&sysfs_rename_mutex);
 	BUG_ON(!sd->s_parent);
+
+	mutex_lock(&sysfs_mutex);
 	new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ?
 		new_parent_kobj->sd : &sysfs_root;
 
@@ -861,61 +808,20 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 	if (sd->s_parent == new_parent_sd)
 		goto out;	/* nothing to move */
 
-	/* get dentries */
-	old_dentry = sysfs_get_dentry(sd);
-	if (IS_ERR(old_dentry)) {
-		error = PTR_ERR(old_dentry);
-		old_dentry = NULL;
-		goto out;
-	}
-	old_parent = old_dentry->d_parent;
-
-	new_parent = sysfs_get_dentry(new_parent_sd);
-	if (IS_ERR(new_parent)) {
-		error = PTR_ERR(new_parent);
-		new_parent = NULL;
-		goto out;
-	}
-
-again:
-	mutex_lock(&old_parent->d_inode->i_mutex);
-	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
-		mutex_unlock(&old_parent->d_inode->i_mutex);
-		goto again;
-	}
-	mutex_lock(&sysfs_mutex);
-
 	error = -EEXIST;
 	if (sysfs_find_dirent(new_parent_sd, sd->s_name))
-		goto out_unlock;
-
-	error = -ENOMEM;
-	new_dentry = d_alloc_name(new_parent, sd->s_name);
-	if (!new_dentry)
-		goto out_unlock;
-
-	error = 0;
-	d_add(new_dentry, NULL);
-	d_move(old_dentry, new_dentry);
+		goto out;
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);
 	sysfs_get(new_parent_sd);
-	drop_nlink(old_parent->d_inode);
 	sysfs_put(sd->s_parent);
 	sd->s_parent = new_parent_sd;
-	inc_nlink(new_parent->d_inode);
 	sysfs_link_sibling(sd);
 
- out_unlock:
+	error = 0;
+out:
 	mutex_unlock(&sysfs_mutex);
-	mutex_unlock(&new_parent->d_inode->i_mutex);
-	mutex_unlock(&old_parent->d_inode->i_mutex);
- out:
-	dput(new_parent);
-	dput(old_dentry);
-	dput(new_dentry);
-	mutex_unlock(&sysfs_rename_mutex);
 	return error;
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 1ffd5559926f..9f783d4e4b51 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -205,17 +205,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 	inode->i_ctime = iattr->ia_ctime;
 }
 
-
-/*
- * sysfs has a different i_mutex lock order behavior for i_mutex than other
- * filesystems; sysfs i_mutex is called in many places with subsystem locks
- * held. At the same time, many of the VFS locking rules do not apply to
- * sysfs at all (cross directory rename for example). To untangle this mess
- * (which gives false positives in lockdep), we're giving sysfs inodes their
- * own class for i_mutex.
- */
-static struct lock_class_key sysfs_inode_imutex_key;
-
 static int sysfs_count_nlink(struct sysfs_dirent *sd)
 {
 	struct sysfs_dirent *child;
@@ -268,7 +257,6 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 	inode->i_mapping->a_ops = &sysfs_aops;
 	inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
 	inode->i_op = &sysfs_inode_operations;
-	lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
 
 	set_default_inode_attr(inode, sd->s_mode);
 	sysfs_refresh_inode(sd, inode);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 90b35012abb2..98a15bf1efe1 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -103,7 +103,6 @@ extern struct kmem_cache *sysfs_dir_cachep;
  * dir.c
  */
 extern struct mutex sysfs_mutex;
-extern struct mutex sysfs_rename_mutex;
 extern spinlock_t sysfs_assoc_lock;
 
 extern const struct file_operations sysfs_dir_operations;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index ec0f607b364a..028946750289 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -76,7 +76,6 @@ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
 extern void release_open_intent(struct nameidata *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
-extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
 extern int follow_down(struct path *);
 extern int follow_up(struct path *);
-- 
cgit v1.2.3


From c60e0504c8e4fa14179d0687d80ef25148dd6dd4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 27 Nov 2009 17:38:51 +0900
Subject: Driver Core: Early platform driver buffer

Add early_platform_init_buffer() support and update the
early platform driver code to allow passing parameters
to the driver on the kernel command line.

early_platform_init_buffer() simply allows early platform
drivers to provide a pointer and length to a memory area
where the remaining part of the kernel command line option
will be stored.

Needed to pass baud rate and other serial port options
to the reworked early serial console code on SuperH.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 29 ++++++++++++++++++++++-------
 include/linux/platform_device.h | 20 +++++++++++++++-----
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4fa954b07ac4..9d2ee25deaf5 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1000,7 +1000,7 @@ static __initdata LIST_HEAD(early_platform_device_list);
 int __init early_platform_driver_register(struct early_platform_driver *epdrv,
 					  char *buf)
 {
-	unsigned long index;
+	char *tmp;
 	int n;
 
 	/* Simply add the driver to the end of the global list.
@@ -1019,13 +1019,28 @@ int __init early_platform_driver_register(struct early_platform_driver *epdrv,
 	if (buf && !strncmp(buf, epdrv->pdrv->driver.name, n)) {
 		list_move(&epdrv->list, &early_platform_driver_list);
 
-		if (!strcmp(buf, epdrv->pdrv->driver.name))
+		/* Allow passing parameters after device name */
+		if (buf[n] == '\0' || buf[n] == ',')
 			epdrv->requested_id = -1;
-		else if (buf[n] == '.' && strict_strtoul(&buf[n + 1], 10,
-							 &index) == 0)
-			epdrv->requested_id = index;
-		else
-			epdrv->requested_id = EARLY_PLATFORM_ID_ERROR;
+		else {
+			epdrv->requested_id = simple_strtoul(&buf[n + 1],
+							     &tmp, 10);
+
+			if (buf[n] != '.' || (tmp == &buf[n + 1])) {
+				epdrv->requested_id = EARLY_PLATFORM_ID_ERROR;
+				n = 0;
+			} else
+				n += strcspn(&buf[n + 1], ",") + 1;
+		}
+
+		if (buf[n] == ',')
+			n++;
+
+		if (epdrv->bufsize) {
+			memcpy(epdrv->buffer, &buf[n],
+			       min_t(int, epdrv->bufsize, strlen(&buf[n]) + 1));
+			epdrv->buffer[epdrv->bufsize - 1] = '\0';
+		}
 	}
 
 	return 0;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 3c6675c2444b..71ff887ca44e 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -83,6 +83,8 @@ struct early_platform_driver {
 	struct platform_driver *pdrv;
 	struct list_head list;
 	int requested_id;
+	char *buffer;
+	int bufsize;
 };
 
 #define EARLY_PLATFORM_ID_UNSET -2
@@ -102,21 +104,29 @@ extern int early_platform_driver_probe(char *class_str,
 				       int nr_probe, int user_only);
 extern void early_platform_cleanup(void);
 
+#define early_platform_init(class_string, platdrv)		\
+	early_platform_init_buffer(class_string, platdrv, NULL, 0)
 
 #ifndef MODULE
-#define early_platform_init(class_string, platform_driver)		\
+#define early_platform_init_buffer(class_string, platdrv, buf, bufsiz)	\
 static __initdata struct early_platform_driver early_driver = {		\
 	.class_str = class_string,					\
-	.pdrv = platform_driver,					\
+	.buffer = buf,							\
+	.bufsize = bufsiz,						\
+	.pdrv = platdrv,						\
 	.requested_id = EARLY_PLATFORM_ID_UNSET,			\
 };									\
-static int __init early_platform_driver_setup_func(char *buf)		\
+static int __init early_platform_driver_setup_func(char *buffer)	\
 {									\
-	return early_platform_driver_register(&early_driver, buf);	\
+	return early_platform_driver_register(&early_driver, buffer);	\
 }									\
 early_param(class_string, early_platform_driver_setup_func)
 #else /* MODULE */
-#define early_platform_init(class_string, platform_driver)
+#define early_platform_init_buffer(class_string, platdrv, buf, bufsiz)	\
+static inline char *early_platform_driver_setup_func(void)		\
+{									\
+	return bufsiz ? buf : NULL;					\
+}
 #endif /* MODULE */
 
 #endif /* _PLATFORM_DEVICE_H_ */
-- 
cgit v1.2.3


From 4c1bd3d7a7d114dabd58f62f386ac4bfd268be1f Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 24 Aug 2009 14:44:30 +0100
Subject: USB: make urb scatter-gather support more generic

The WHCI HCD will also support urbs with scatter-gather lists.  Add a
usb_bus field to indicated how many sg list elements are supported by
the HCD.  Use this to decide whether to pass the scatter-list to the HCD
or not.

Make the usb-storage driver use this new field.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/message.c  |  8 +-------
 drivers/usb/host/xhci-pci.c |  2 ++
 drivers/usb/storage/usb.c   | 10 ++++++++++
 include/linux/usb.h         |  1 +
 4 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index e80f1af438c8..8d874cad6581 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -393,13 +393,7 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	if (io->entries <= 0)
 		return io->entries;
 
-	/* If we're running on an xHCI host controller, queue the whole scatter
-	 * gather list with one call to urb_enqueue().  This is only for bulk,
-	 * as that endpoint type does not care how the data gets broken up
-	 * across frames.
-	 */
-	if (usb_pipebulk(pipe) &&
-			bus_to_hcd(dev->bus)->driver->flags & HCD_USB3) {
+	if (dev->bus->sg_tablesize > 0) {
 		io->urbs = kmalloc(sizeof *io->urbs, mem_flags);
 		use_sg = true;
 	} else {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 06595ec27bb7..e097008d6fb1 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -54,6 +54,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
 	int			retval;
 
+	hcd->self.sg_tablesize = TRBS_PER_SEGMENT - 1;
+
 	xhci->cap_regs = hcd->regs;
 	xhci->op_regs = hcd->regs +
 		HC_LENGTH(xhci_readl(xhci, &xhci->cap_regs->hc_capbase));
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 01e43a13a6bf..1599d86154c4 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -843,6 +843,15 @@ static int usb_stor_scan_thread(void * __us)
 	complete_and_exit(&us->scanning_done, 0);
 }
 
+static unsigned int usb_stor_sg_tablesize(struct usb_interface *intf)
+{
+	struct usb_device *usb_dev = interface_to_usbdev(intf);
+
+	if (usb_dev->bus->sg_tablesize) {
+		return usb_dev->bus->sg_tablesize;
+	}
+	return SG_ALL;
+}
 
 /* First part of general USB mass-storage probing */
 int usb_stor_probe1(struct us_data **pus,
@@ -871,6 +880,7 @@ int usb_stor_probe1(struct us_data **pus,
 	 * Allow 16-byte CDBs and thus > 2TB
 	 */
 	host->max_cmd_len = 16;
+	host->sg_tablesize = usb_stor_sg_tablesize(intf);
 	*pus = us = host_to_us(host);
 	memset(us, 0, sizeof(struct us_data));
 	mutex_init(&(us->dev_mutex));
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a34fa89f1474..6e91ee4f5b81 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -331,6 +331,7 @@ struct usb_bus {
 	u8 otg_port;			/* 0, or number of OTG/HNP port */
 	unsigned is_b_host:1;		/* true during some HNP roleswitches */
 	unsigned b_hnp_enable:1;	/* OTG: did A-Host enable HNP? */
+	unsigned sg_tablesize;		/* 0 or largest number of sg list entries */
 
 	int devnum_next;		/* Next open device number in
 					 * round-robin allocation */
-- 
cgit v1.2.3


From 5242658d1b97771d658991cf29be32bcf81d5859 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 21 Oct 2009 00:03:38 +0200
Subject: USB gadget: Handle endpoint requests at the function level

Control requests targeted at an endpoint (that is sent to EP0 but
specifying the target endpoint address in wIndex) are dispatched to the
current configuration's setup callback, requiring all gadget drivers to
dispatch the requests to the correct function driver.

To avoid this, record which endpoints are used by each function in the
composite driver SET CONFIGURATION handler and dispatch requests
targeted at endpoints to the correct function.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 54 +++++++++++++++++++++++++++++++++++-------
 include/linux/usb/composite.h  |  1 +
 2 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index d05397ec8a18..8498f1a114d5 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -373,6 +373,8 @@ static void reset_config(struct usb_composite_dev *cdev)
 	list_for_each_entry(f, &cdev->config->functions, list) {
 		if (f->disable)
 			f->disable(f);
+
+		bitmap_zero(f->endpoints, 32);
 	}
 	cdev->config = NULL;
 }
@@ -418,10 +420,35 @@ static int set_config(struct usb_composite_dev *cdev,
 	/* Initialize all interfaces by setting them to altsetting zero. */
 	for (tmp = 0; tmp < MAX_CONFIG_INTERFACES; tmp++) {
 		struct usb_function	*f = c->interface[tmp];
+		struct usb_descriptor_header **descriptors;
 
 		if (!f)
 			break;
 
+		/*
+		 * Record which endpoints are used by the function. This is used
+		 * to dispatch control requests targeted at that endpoint to the
+		 * function's setup callback instead of the current
+		 * configuration's setup callback.
+		 */
+		if (gadget->speed == USB_SPEED_HIGH)
+			descriptors = f->hs_descriptors;
+		else
+			descriptors = f->descriptors;
+
+		for (; *descriptors; ++descriptors) {
+			struct usb_endpoint_descriptor *ep;
+			int addr;
+
+			if ((*descriptors)->bDescriptorType != USB_DT_ENDPOINT)
+				continue;
+
+			ep = (struct usb_endpoint_descriptor *)*descriptors;
+			addr = ((ep->bEndpointAddress & 0x80) >> 3)
+			     |  (ep->bEndpointAddress & 0x0f);
+			set_bit(addr, f->endpoints);
+		}
+
 		result = f->set_alt(f, tmp, 0);
 		if (result < 0) {
 			DBG(cdev, "interface %d (%s/%p) alt 0 --> %d\n",
@@ -688,6 +715,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 	u16				w_value = le16_to_cpu(ctrl->wValue);
 	u16				w_length = le16_to_cpu(ctrl->wLength);
 	struct usb_function		*f = NULL;
+	u8				endp;
 
 	/* partial re-init of the response message; the function or the
 	 * gadget might need to intercept e.g. a control-OUT completion
@@ -800,23 +828,33 @@ unknown:
 			ctrl->bRequestType, ctrl->bRequest,
 			w_value, w_index, w_length);
 
-		/* functions always handle their interfaces ... punt other
-		 * recipients (endpoint, other, WUSB, ...) to the current
+		/* functions always handle their interfaces and endpoints...
+		 * punt other recipients (other, WUSB, ...) to the current
 		 * configuration code.
 		 *
 		 * REVISIT it could make sense to let the composite device
 		 * take such requests too, if that's ever needed:  to work
 		 * in config 0, etc.
 		 */
-		if ((ctrl->bRequestType & USB_RECIP_MASK)
-				== USB_RECIP_INTERFACE) {
+		switch (ctrl->bRequestType & USB_RECIP_MASK) {
+		case USB_RECIP_INTERFACE:
 			f = cdev->config->interface[intf];
-			if (f && f->setup)
-				value = f->setup(f, ctrl);
-			else
+			break;
+
+		case USB_RECIP_ENDPOINT:
+			endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f);
+			list_for_each_entry(f, &cdev->config->functions, list) {
+				if (test_bit(endp, f->endpoints))
+					break;
+			}
+			if (&f->list == &cdev->config->functions)
 				f = NULL;
+			break;
 		}
-		if (value < 0 && !f) {
+
+		if (f && f->setup)
+			value = f->setup(f, ctrl);
+		else {
 			struct usb_configuration	*c;
 
 			c = cdev->config;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 4f6bb3d2160e..738ea1a691cb 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -127,6 +127,7 @@ struct usb_function {
 	/* private: */
 	/* internals */
 	struct list_head		list;
+	DECLARE_BITMAP(endpoints, 32);
 };
 
 int usb_add_function(struct usb_configuration *, struct usb_function *);
-- 
cgit v1.2.3


From 91c8a5a9985d5bf9c55f6f82f183f57b050b2a3a Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 15 Oct 2009 17:09:34 +0300
Subject: USB OTG: add support for ulpi connected external transceivers

This adds support for OTG transceivers directly connected to the ULPI
interface. In particular, the following details are added

- a struct for low level io functions (read/write)
- a priv field to be used as 'viewport' by low level access functions
- an (*init) and (*shutdown) callbacks, along with static inline helpers
- a (*set_vbus) callback to switch the port power on and off
- a flags field for per-transceiver settings
- some defines for the flags bitmask to configure platform specific
  details

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 2443c0e7a80c..52bb917641f0 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -33,6 +33,23 @@ enum usb_otg_state {
 	OTG_STATE_A_VBUS_ERR,
 };
 
+#define USB_OTG_PULLUP_ID		(1 << 0)
+#define USB_OTG_PULLDOWN_DP		(1 << 1)
+#define USB_OTG_PULLDOWN_DM		(1 << 2)
+#define USB_OTG_EXT_VBUS_INDICATOR	(1 << 3)
+#define USB_OTG_DRV_VBUS		(1 << 4)
+#define USB_OTG_DRV_VBUS_EXT		(1 << 5)
+
+struct otg_transceiver;
+
+/* for transceivers connected thru an ULPI interface, the user must
+ * provide access ops
+ */
+struct otg_io_access_ops {
+	int (*read)(struct otg_transceiver *otg, u32 reg);
+	int (*write)(struct otg_transceiver *otg, u32 val, u32 reg);
+};
+
 /*
  * the otg driver needs to interact with both device side and host side
  * usb controllers.  it decides which controller is active at a given
@@ -42,6 +59,7 @@ enum usb_otg_state {
 struct otg_transceiver {
 	struct device		*dev;
 	const char		*label;
+	unsigned int		 flags;
 
 	u8			default_a;
 	enum usb_otg_state	state;
@@ -49,10 +67,17 @@ struct otg_transceiver {
 	struct usb_bus		*host;
 	struct usb_gadget	*gadget;
 
+	struct otg_io_access_ops	*io_ops;
+	void __iomem			*io_priv;
+
 	/* to pass extra port status to the root hub */
 	u16			port_status;
 	u16			port_change;
 
+	/* initialize/shutdown the OTG controller */
+	int	(*init)(struct otg_transceiver *otg);
+	void	(*shutdown)(struct otg_transceiver *otg);
+
 	/* bind/unbind the host controller */
 	int	(*set_host)(struct otg_transceiver *otg,
 				struct usb_bus *host);
@@ -65,6 +90,10 @@ struct otg_transceiver {
 	int	(*set_power)(struct otg_transceiver *otg,
 				unsigned mA);
 
+	/* effective for A-peripheral, ignored for B devices */
+	int	(*set_vbus)(struct otg_transceiver *otg,
+				bool enabled);
+
 	/* for non-OTG B devices: set transceiver into suspend mode */
 	int	(*set_suspend)(struct otg_transceiver *otg,
 				int suspend);
@@ -85,6 +114,38 @@ extern int otg_set_transceiver(struct otg_transceiver *);
 extern void usb_nop_xceiv_register(void);
 extern void usb_nop_xceiv_unregister(void);
 
+/* helpers for direct access thru low-level io interface */
+static inline int otg_io_read(struct otg_transceiver *otg, u32 reg)
+{
+	if (otg->io_ops && otg->io_ops->read)
+		return otg->io_ops->read(otg, reg);
+
+	return -EINVAL;
+}
+
+static inline int otg_io_write(struct otg_transceiver *otg, u32 reg, u32 val)
+{
+	if (otg->io_ops && otg->io_ops->write)
+		return otg->io_ops->write(otg, reg, val);
+
+	return -EINVAL;
+}
+
+static inline int
+otg_init(struct otg_transceiver *otg)
+{
+	if (otg->init)
+		return otg->init(otg);
+
+	return 0;
+}
+
+static inline void
+otg_shutdown(struct otg_transceiver *otg)
+{
+	if (otg->shutdown)
+		otg->shutdown(otg);
+}
 
 /* for usb host and peripheral controller drivers */
 extern struct otg_transceiver *otg_get_transceiver(void);
@@ -97,6 +158,12 @@ otg_start_hnp(struct otg_transceiver *otg)
 	return otg->start_hnp(otg);
 }
 
+/* Context: can sleep */
+static inline int
+otg_set_vbus(struct otg_transceiver *otg, bool enabled)
+{
+	return otg->set_vbus(otg, enabled);
+}
 
 /* for HCDs */
 static inline int
@@ -105,7 +172,6 @@ otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
 	return otg->set_host(otg, host);
 }
 
-
 /* for usb peripheral controller drivers */
 
 /* Context: can sleep */
-- 
cgit v1.2.3


From 2d57a95f09cf71c4c642e5be15f8b700d17ee90c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 15 Oct 2009 17:09:35 +0300
Subject: USB OTG: Add generic driver for ULPI OTG transceiver

This adds a minimal generic driver for ULPI connected transceivers,
using the OTG framework functions recently introduced.

The driver got a table to match the ULPI chips, which currently only has
one entry for NXP's ISP 1504 transceiver.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/Makefile     |   2 +
 drivers/usb/otg/Kconfig  |   9 ++++
 drivers/usb/otg/Makefile |   1 +
 drivers/usb/otg/ulpi.c   | 136 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/ulpi.h |   7 +++
 5 files changed, 155 insertions(+)
 create mode 100644 drivers/usb/otg/ulpi.c
 create mode 100644 include/linux/usb/ulpi.h

(limited to 'include/linux')

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index be3c9b80bc9f..473aa1a20de9 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -44,3 +44,5 @@ obj-y				+= early/
 
 obj-$(CONFIG_USB_ATM)		+= atm/
 obj-$(CONFIG_USB_SPEEDTOUCH)	+= atm/
+
+obj-$(CONFIG_USB_ULPI)		+= otg/
diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index aa884d072f0b..de56b3d743d7 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -41,6 +41,15 @@ config ISP1301_OMAP
 	  This driver can also be built as a module.  If so, the module
 	  will be called isp1301_omap.
 
+config USB_ULPI
+	bool "Generic ULPI Transceiver Driver"
+	depends on ARM
+	help
+	  Enable this to support ULPI connected USB OTG transceivers which
+	  are likely found on embedded boards.
+
+	  The only chip currently supported is NXP's ISP1504
+
 config TWL4030_USB
 	tristate "TWL4030 USB Transceiver Driver"
 	depends on TWL4030_CORE && REGULATOR_TWL4030
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 208167856529..aeb49a8ec412 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
+obj-$(CONFIG_USB_ULPI)		+= ulpi.o
 
 ccflags-$(CONFIG_USB_DEBUG)	+= -DDEBUG
 ccflags-$(CONFIG_USB_GADGET_DEBUG) += -DDEBUG
diff --git a/drivers/usb/otg/ulpi.c b/drivers/usb/otg/ulpi.c
new file mode 100644
index 000000000000..896527456b7e
--- /dev/null
+++ b/drivers/usb/otg/ulpi.c
@@ -0,0 +1,136 @@
+/*
+ * Generic ULPI USB transceiver support
+ *
+ * Copyright (C) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ * Based on sources from
+ *
+ *   Sascha Hauer <s.hauer@pengutronix.de>
+ *   Freescale Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/usb.h>
+#include <linux/usb/otg.h>
+#include <linux/usb/ulpi.h>
+
+/* ULPI register addresses */
+#define ULPI_VID_LOW         0x00    /* Vendor ID low */
+#define ULPI_VID_HIGH        0x01    /* Vendor ID high */
+#define ULPI_PID_LOW         0x02    /* Product ID low */
+#define ULPI_PID_HIGH        0x03    /* Product ID high */
+#define ULPI_ITFCTL          0x07    /* Interface Control */
+#define ULPI_OTGCTL          0x0A    /* OTG Control */
+
+/* add to above register address to access Set/Clear functions */
+#define ULPI_REG_SET         0x01
+#define ULPI_REG_CLEAR       0x02
+
+/* ULPI OTG Control Register bits */
+#define ID_PULL_UP              (1 << 0)        /* enable ID Pull Up */
+#define DP_PULL_DOWN            (1 << 1)        /* enable DP Pull Down */
+#define DM_PULL_DOWN            (1 << 2)        /* enable DM Pull Down */
+#define DISCHRG_VBUS            (1 << 3)        /* Discharge Vbus */
+#define CHRG_VBUS               (1 << 4)        /* Charge Vbus */
+#define DRV_VBUS                (1 << 5)        /* Drive Vbus */
+#define DRV_VBUS_EXT            (1 << 6)        /* Drive Vbus external */
+#define USE_EXT_VBUS_IND        (1 << 7)        /* Use ext. Vbus indicator */
+
+#define ULPI_ID(vendor, product) (((vendor) << 16) | (product))
+
+#define TR_FLAG(flags, a, b)	(((flags) & a) ? b : 0)
+
+/* ULPI hardcoded IDs, used for probing */
+static unsigned int ulpi_ids[] = {
+	ULPI_ID(0x04cc, 0x1504),	/* NXP ISP1504 */
+};
+
+static int ulpi_set_flags(struct otg_transceiver *otg)
+{
+	unsigned int flags = 0;
+
+	if (otg->flags & USB_OTG_PULLUP_ID)
+		flags |= ID_PULL_UP;
+
+	if (otg->flags & USB_OTG_PULLDOWN_DM)
+		flags |= DM_PULL_DOWN;
+
+	if (otg->flags & USB_OTG_PULLDOWN_DP)
+		flags |= DP_PULL_DOWN;
+
+	if (otg->flags & USB_OTG_EXT_VBUS_INDICATOR)
+		flags |= USE_EXT_VBUS_IND;
+
+	return otg_io_write(otg, flags, ULPI_OTGCTL + ULPI_REG_SET);
+}
+
+static int ulpi_init(struct otg_transceiver *otg)
+{
+	int i, vid, pid;
+
+	vid = (otg_io_read(otg, ULPI_VID_HIGH) << 8) |
+	       otg_io_read(otg, ULPI_VID_LOW);
+	pid = (otg_io_read(otg, ULPI_PID_HIGH) << 8) |
+	       otg_io_read(otg, ULPI_PID_LOW);
+
+	pr_info("ULPI transceiver vendor/product ID 0x%04x/0x%04x\n", vid, pid);
+
+	for (i = 0; i < ARRAY_SIZE(ulpi_ids); i++)
+		if (ulpi_ids[i] == ULPI_ID(vid, pid))
+			return ulpi_set_flags(otg);
+
+	pr_err("ULPI ID does not match any known transceiver.\n");
+	return -ENODEV;
+}
+
+static int ulpi_set_vbus(struct otg_transceiver *otg, bool on)
+{
+	unsigned int flags = otg_io_read(otg, ULPI_OTGCTL);
+
+	flags &= ~(DRV_VBUS | DRV_VBUS_EXT);
+
+	if (on) {
+		if (otg->flags & USB_OTG_DRV_VBUS)
+			flags |= DRV_VBUS;
+
+		if (otg->flags & USB_OTG_DRV_VBUS_EXT)
+			flags |= DRV_VBUS_EXT;
+	}
+
+	return otg_io_write(otg, flags, ULPI_OTGCTL + ULPI_REG_SET);
+}
+
+struct otg_transceiver *
+otg_ulpi_create(struct otg_io_access_ops *ops,
+		unsigned int flags)
+{
+	struct otg_transceiver *otg;
+
+	otg = kzalloc(sizeof(*otg), GFP_KERNEL);
+	if (!otg)
+		return NULL;
+
+	otg->label	= "ULPI";
+	otg->flags	= flags;
+	otg->io_ops	= ops;
+	otg->init	= ulpi_init;
+	otg->set_vbus	= ulpi_set_vbus;
+
+	return otg;
+}
+EXPORT_SYMBOL_GPL(otg_ulpi_create);
+
diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h
new file mode 100644
index 000000000000..20675c6ebc4d
--- /dev/null
+++ b/include/linux/usb/ulpi.h
@@ -0,0 +1,7 @@
+#ifndef __LINUX_USB_ULPI_H
+#define __LINUX_USB_ULPI_H
+
+struct otg_transceiver *otg_ulpi_create(struct otg_io_access_ops *ops,
+					unsigned int flags);
+
+#endif /* __LINUX_USB_ULPI_H */
-- 
cgit v1.2.3


From fb34d53752d5bec5acc73422e462a9c68aeeaa2a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 13 Nov 2009 11:53:59 -0500
Subject: USB: remove the auto_pm flag

This patch (as1302) removes the auto_pm flag from struct usb_device.
The flag's only purpose was to distinguish between autosuspends and
external suspends, but that information is now available in the
pm_message_t argument passed to suspend methods.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 9 +++++----
 drivers/bluetooth/btusb.c              | 2 +-
 drivers/hid/usbhid/hid-core.c          | 8 ++++----
 drivers/net/wimax/i2400m/usb.c         | 7 ++-----
 drivers/usb/core/driver.c              | 4 ----
 drivers/usb/serial/option.c            | 2 +-
 drivers/usb/serial/sierra.c            | 2 +-
 include/linux/usb.h                    | 2 --
 8 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index ad642615ad4c..8817368203d6 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -423,15 +423,16 @@ an URB had completed too recently.
 
 External suspend calls should never be allowed to fail in this way,
 only autosuspend calls.  The driver can tell them apart by checking
-udev->auto_pm; this flag will be set to 1 for internal PM events
-(autosuspend or autoresume) and 0 for external PM events.
+the PM_EVENT_AUTO bit in the message.event argument to the suspend
+method; this bit will be set for internal PM events (autosuspend) and
+clear for external PM events.
 
 Many of the ingredients in the autosuspend framework are oriented
 towards interfaces: The usb_interface structure contains the
 pm_usage_cnt field, and the usb_autopm_* routines take an interface
 pointer as their argument.  But somewhat confusingly, a few of the
-pieces (usb_mark_last_busy() and udev->auto_pm) use the usb_device
-structure instead.  Drivers need to keep this straight; they can call
+pieces (i.e., usb_mark_last_busy()) use the usb_device structure
+instead.  Drivers need to keep this straight; they can call
 interface_to_usbdev() to find the device structure for a given
 interface.
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 44bc8bbabf54..4d2905996751 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1066,7 +1066,7 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message)
 		return 0;
 
 	spin_lock_irq(&data->txlock);
-	if (!(interface_to_usbdev(intf)->auto_pm && data->tx_in_flight)) {
+	if (!((message.event & PM_EVENT_AUTO) && data->tx_in_flight)) {
 		set_bit(BTUSB_SUSPENDING, &data->flags);
 		spin_unlock_irq(&data->txlock);
 	} else {
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 0258289f3b3e..e2997a8d5e1b 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1253,10 +1253,9 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid = hid->driver_data;
-	struct usb_device *udev = interface_to_usbdev(intf);
 	int status;
 
-	if (udev->auto_pm) {
+	if (message.event & PM_EVENT_AUTO) {
 		spin_lock_irq(&usbhid->lock);	/* Sync with error handler */
 		if (!test_bit(HID_RESET_PENDING, &usbhid->iofl)
 		    && !test_bit(HID_CLEAR_HALT, &usbhid->iofl)
@@ -1281,7 +1280,7 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 			return -EIO;
 	}
 
-	if (!ignoreled && udev->auto_pm) {
+	if (!ignoreled && (message.event & PM_EVENT_AUTO)) {
 		spin_lock_irq(&usbhid->lock);
 		if (test_bit(HID_LED_ON, &usbhid->iofl)) {
 			spin_unlock_irq(&usbhid->lock);
@@ -1294,7 +1293,8 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 	hid_cancel_delayed_stuff(usbhid);
 	hid_cease_io(usbhid);
 
-	if (udev->auto_pm && test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) {
+	if ((message.event & PM_EVENT_AUTO) &&
+			test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) {
 		/* lost race against keypresses */
 		status = hid_start_in(hid);
 		if (status < 0)
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 47e84ef355c5..3b48681f8a0d 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -579,7 +579,7 @@ void i2400mu_disconnect(struct usb_interface *iface)
  *
  *    As well, the device might refuse going to sleep for whichever
  *    reason. In this case we just fail. For system suspend/hibernate,
- *    we *can't* fail. We look at usb_dev->auto_pm to see if the
+ *    we *can't* fail. We check PM_EVENT_AUTO to see if the
  *    suspend call comes from the USB stack or from the system and act
  *    in consequence.
  *
@@ -591,14 +591,11 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg)
 	int result = 0;
 	struct device *dev = &iface->dev;
 	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-#ifdef CONFIG_PM
-	struct usb_device *usb_dev = i2400mu->usb_dev;
-#endif
 	unsigned is_autosuspend = 0;
 	struct i2400m *i2400m = &i2400mu->i2400m;
 
 #ifdef CONFIG_PM
-	if (usb_dev->auto_pm > 0)
+	if (pm_msg.event & PM_EVENT_AUTO)
 		is_autosuspend = 1;
 #endif
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 4f864472c5c4..8016a296010e 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1341,7 +1341,6 @@ static int usb_autopm_do_device(struct usb_device *udev, int inc_usage_cnt)
 	int	status = 0;
 
 	usb_pm_lock(udev);
-	udev->auto_pm = 1;
 	udev->pm_usage_cnt += inc_usage_cnt;
 	WARN_ON(udev->pm_usage_cnt < 0);
 	if (inc_usage_cnt)
@@ -1473,7 +1472,6 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
 	if (intf->condition == USB_INTERFACE_UNBOUND)
 		status = -ENODEV;
 	else {
-		udev->auto_pm = 1;
 		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
 		udev->last_busy = jiffies;
 		if (inc_usage_cnt >= 0 &&
@@ -1707,7 +1705,6 @@ int usb_external_suspend_device(struct usb_device *udev, pm_message_t msg)
 
 	do_unbind_rebind(udev, DO_UNBIND);
 	usb_pm_lock(udev);
-	udev->auto_pm = 0;
 	status = usb_suspend_both(udev, msg);
 	usb_pm_unlock(udev);
 	return status;
@@ -1730,7 +1727,6 @@ int usb_external_resume_device(struct usb_device *udev, pm_message_t msg)
 	int	status;
 
 	usb_pm_lock(udev);
-	udev->auto_pm = 0;
 	status = usb_resume_both(udev, msg);
 	udev->last_busy = jiffies;
 	usb_pm_unlock(udev);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 0d46bbec44b7..8751ec79a159 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1313,7 +1313,7 @@ static int option_suspend(struct usb_serial *serial, pm_message_t message)
 
 	dbg("%s entered", __func__);
 
-	if (serial->dev->auto_pm) {
+	if (message.event & PM_EVENT_AUTO) {
 		spin_lock_irq(&intfdata->susp_lock);
 		b = intfdata->in_flight;
 		spin_unlock_irq(&intfdata->susp_lock);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index c5c41aed106d..ac1b6449fb6a 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -1005,7 +1005,7 @@ static int sierra_suspend(struct usb_serial *serial, pm_message_t message)
 	struct sierra_intf_private *intfdata;
 	int b;
 
-	if (serial->dev->auto_pm) {
+	if (message.event & PM_EVENT_AUTO) {
 		intfdata = serial->private;
 		spin_lock_irq(&intfdata->susp_lock);
 		b = intfdata->in_flight;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 6e91ee4f5b81..4b6f6db544ee 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -429,7 +429,6 @@ struct usb_tt;
  * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
  * @connect_time: time device was first connected
- * @auto_pm: autosuspend/resume in progress
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @autosuspend_disabled: autosuspend disabled by the user
@@ -514,7 +513,6 @@ struct usb_device {
 	int autosuspend_delay;
 	unsigned long connect_time;
 
-	unsigned auto_pm:1;
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned autosuspend_disabled:1;
-- 
cgit v1.2.3


From 8e4ceb38eb5bbaef22fc00abe9bc11e26bea2ab5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 7 Dec 2009 13:01:37 -0500
Subject: USB: prepare for changover to Runtime PM framework

This patch (as1303) revises the USB Power Management infrastructure to
make it compatible with the new driver-model Runtime PM framework:

	Drivers are no longer allowed to access intf->pm_usage_cnt
	directly; the PM framework manages its own usage counters.

	usb_autopm_set_interface() is eliminated, because it directly
	sets intf->pm_usage_cnt.

	usb_autopm_enable() and usb_autopm_disable() are eliminated,
	because they call usb_autopm_set_interface().

	usb_autopm_get_interface_no_resume() and
	usb_autopm_put_interface_no_suspend() are added.  They
	correspond to pm_runtime_get_noresume() and
	pm_runtime_put_noidle() in the PM framework.

	The power/level attribute no longer accepts "suspend", only
	"on" and "auto".  The PM framework doesn't allow devices to be
	forced into a suspended mode.

The hub driver contains the only code that violates the new
guidelines.  It is updated to use the new interface routines instead.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 60 ++++++++++++++++------------------
 drivers/usb/core/driver.c              | 31 ------------------
 drivers/usb/core/hub.c                 | 45 +++++++++++++++++--------
 drivers/usb/core/sysfs.c               | 25 +++-----------
 include/linux/usb.h                    | 26 ++++++---------
 5 files changed, 74 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 8817368203d6..c7c1dc2f8017 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -2,7 +2,7 @@
 
 		 Alan Stern <stern@rowland.harvard.edu>
 
-			    October 5, 2007
+			    November 10, 2009
 
 
@@ -123,9 +123,9 @@ relevant attribute files are: wakeup, level, and autosuspend.
 
 	power/level
 
-		This file contains one of three words: "on", "auto",
-		or "suspend".  You can write those words to the file
-		to change the device's setting.
+		This file contains one of two words: "on" or "auto".
+		You can write those words to the file to change the
+		device's setting.
 
 		"on" means that the device should be resumed and
 		autosuspend is not allowed.  (Of course, system
@@ -134,10 +134,10 @@ relevant attribute files are: wakeup, level, and autosuspend.
 		"auto" is the normal state in which the kernel is
 		allowed to autosuspend and autoresume the device.
 
-		"suspend" means that the device should remain
-		suspended, and autoresume is not allowed.  (But remote
-		wakeup may still be allowed, since it is controlled
-		separately by the power/wakeup attribute.)
+		(In kernels up to 2.6.32, you could also specify
+		"suspend", meaning that the device should remain
+		suspended and autoresume was not allowed.  This
+		setting is no longer supported.)
 
 	power/autosuspend
 
@@ -313,13 +313,14 @@ three of the methods listed above.  In addition, a driver indicates
 that it supports autosuspend by setting the .supports_autosuspend flag
 in its usb_driver structure.  It is then responsible for informing the
 USB core whenever one of its interfaces becomes busy or idle.  The
-driver does so by calling these five functions:
+driver does so by calling these six functions:
 
 	int  usb_autopm_get_interface(struct usb_interface *intf);
 	void usb_autopm_put_interface(struct usb_interface *intf);
-	int  usb_autopm_set_interface(struct usb_interface *intf);
 	int  usb_autopm_get_interface_async(struct usb_interface *intf);
 	void usb_autopm_put_interface_async(struct usb_interface *intf);
+	void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
+	void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
 The functions work by maintaining a counter in the usb_interface
 structure.  When intf->pm_usage_count is > 0 then the interface is
@@ -331,11 +332,13 @@ considered to be idle, and the kernel may autosuspend the device.
 associated with the device itself rather than any of its interfaces.
 This field is used only by the USB core.)
 
-The driver owns intf->pm_usage_count; it can modify the value however
-and whenever it likes.  A nice aspect of the non-async usb_autopm_*
-routines is that the changes they make are protected by the usb_device
-structure's PM mutex (udev->pm_mutex); however drivers may change
-pm_usage_count without holding the mutex.  Drivers using the async
+Drivers must not modify intf->pm_usage_count directly; its value
+should be changed only be using the functions listed above.  Drivers
+are responsible for insuring that the overall change to pm_usage_count
+during their lifetime balances out to 0 (it may be necessary for the
+disconnect method to call usb_autopm_put_interface() one or more times
+to fulfill this requirement).  The first two routines use the PM mutex
+in struct usb_device for mutual exclusion; drivers using the async
 routines are responsible for their own synchronization and mutual
 exclusion.
 
@@ -347,11 +350,6 @@ exclusion.
 	attempts an autosuspend if the new value is <= 0 and the
 	device isn't suspended.
 
-	usb_autopm_set_interface() leaves pm_usage_count alone.
-	It attempts an autoresume if the value is > 0 and the device
-	is suspended, and it attempts an autosuspend if the value is
-	<= 0 and the device isn't suspended.
-
 	usb_autopm_get_interface_async() and
 	usb_autopm_put_interface_async() do almost the same things as
 	their non-async counterparts.  The differences are: they do
@@ -360,13 +358,11 @@ exclusion.
 	such as an URB's completion handler, but when they return the
 	device will not generally not yet be in the desired state.
 
-There also are a couple of utility routines drivers can use:
-
-	usb_autopm_enable() sets pm_usage_cnt to 0 and then calls
-	usb_autopm_set_interface(), which will attempt an autosuspend.
-
-	usb_autopm_disable() sets pm_usage_cnt to 1 and then calls
-	usb_autopm_set_interface(), which will attempt an autoresume.
+	usb_autopm_get_interface_no_resume() and
+	usb_autopm_put_interface_no_suspend() merely increment or
+	decrement the pm_usage_count value; they do not attempt to
+	carry out an autoresume or an autosuspend.  Hence they can be
+	called in an atomic context.
 
 The conventional usage pattern is that a driver calls
 usb_autopm_get_interface() in its open routine and
@@ -400,11 +396,11 @@ though, setting this flag won't cause the kernel to autoresume it.
 Normally a driver would set this flag in its probe method, at which
 time the device is guaranteed not to be autosuspended.)
 
-The usb_autopm_* routines have to run in a sleepable process context;
-they must not be called from an interrupt handler or while holding a
-spinlock.  In fact, the entire autosuspend mechanism is not well geared
-toward interrupt-driven operation.  However there is one thing a
-driver can do in an interrupt handler:
+The synchronous usb_autopm_* routines have to run in a sleepable
+process context; they must not be called from an interrupt handler or
+while holding a spinlock.  In fact, the entire autosuspend mechanism
+is not well geared toward interrupt-driven operation.  However there
+is one thing a driver can do in an interrupt handler:
 
 	usb_mark_last_busy(struct usb_device *udev);
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 8016a296010e..7a05bab73960 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -948,8 +948,6 @@ static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 
  done:
 	dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
-	if (status == 0)
-		udev->autoresume_disabled = 0;
 	return status;
 }
 
@@ -1280,11 +1278,6 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 
 	/* Propagate the resume up the tree, if necessary */
 	if (udev->state == USB_STATE_SUSPENDED) {
-		if ((msg.event & PM_EVENT_AUTO) &&
-				udev->autoresume_disabled) {
-			status = -EPERM;
-			goto done;
-		}
 		if (parent) {
 			status = usb_autoresume_device(parent);
 			if (status == 0) {
@@ -1638,8 +1631,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 
 	if (intf->condition == USB_INTERFACE_UNBOUND)
 		status = -ENODEV;
-	else if (udev->autoresume_disabled)
-		status = -EPERM;
 	else {
 		atomic_inc(&intf->pm_usage_cnt);
 		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
@@ -1652,28 +1643,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
 
-/**
- * usb_autopm_set_interface - set a USB interface's autosuspend state
- * @intf: the usb_interface whose state should be set
- *
- * This routine sets the autosuspend state of @intf's device according
- * to @intf's usage counter, which the caller must have set previously.
- * If the counter is <= 0, the device is autosuspended (if it isn't
- * already suspended and if nothing else prevents the autosuspend).  If
- * the counter is > 0, the device is autoresumed (if it isn't already
- * awake).
- */
-int usb_autopm_set_interface(struct usb_interface *intf)
-{
-	int	status;
-
-	status = usb_autopm_do_interface(intf, 0);
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
-	return status;
-}
-EXPORT_SYMBOL_GPL(usb_autopm_set_interface);
-
 #else
 
 void usb_autosuspend_work(struct work_struct *work)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 5413d712cae0..b38fd6730e2a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -71,6 +71,7 @@ struct usb_hub {
 
 	unsigned		mA_per_port;	/* current for each child */
 
+	unsigned		init_done:1;
 	unsigned		limited_power:1;
 	unsigned		quiescing:1;
 	unsigned		disconnected:1;
@@ -375,12 +376,13 @@ static void kick_khubd(struct usb_hub *hub)
 {
 	unsigned long	flags;
 
-	/* Suppress autosuspend until khubd runs */
-	atomic_set(&to_usb_interface(hub->intfdev)->pm_usage_cnt, 1);
-
 	spin_lock_irqsave(&hub_event_lock, flags);
 	if (!hub->disconnected && list_empty(&hub->event_list)) {
 		list_add_tail(&hub->event_list, &hub_event_list);
+
+		/* Suppress autosuspend until khubd runs */
+		usb_autopm_get_interface_no_resume(
+				to_usb_interface(hub->intfdev));
 		wake_up(&khubd_wait);
 	}
 	spin_unlock_irqrestore(&hub_event_lock, flags);
@@ -665,7 +667,7 @@ int usb_remove_device(struct usb_device *udev)
 }
 
 enum hub_activation_type {
-	HUB_INIT, HUB_INIT2, HUB_INIT3,
+	HUB_INIT, HUB_INIT2, HUB_INIT3,		/* INITs must come first */
 	HUB_POST_RESET, HUB_RESUME, HUB_RESET_RESUME,
 };
 
@@ -710,8 +712,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 					msecs_to_jiffies(delay));
 
 			/* Suppress autosuspend until init is done */
-			atomic_set(&to_usb_interface(hub->intfdev)->
-					pm_usage_cnt, 1);
+			usb_autopm_get_interface_no_resume(
+					to_usb_interface(hub->intfdev));
 			return;		/* Continues at init2: below */
 		} else {
 			hub_power_on(hub, true);
@@ -818,6 +820,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 	}
  init3:
 	hub->quiescing = 0;
+	hub->init_done = 1;
 
 	status = usb_submit_urb(hub->urb, GFP_NOIO);
 	if (status < 0)
@@ -827,6 +830,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 
 	/* Scan all ports that need attention */
 	kick_khubd(hub);
+
+	/* Allow autosuspend if it was suppressed */
+	if (type <= HUB_INIT3)
+		usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
 }
 
 /* Implement the continuations for the delays above */
@@ -854,6 +861,11 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
 	int i;
 
 	cancel_delayed_work_sync(&hub->init_work);
+	if (!hub->init_done) {
+		hub->init_done = 1;
+		usb_autopm_put_interface_no_suspend(
+				to_usb_interface(hub->intfdev));
+	}
 
 	/* khubd and related activity won't re-trigger */
 	hub->quiescing = 1;
@@ -1176,7 +1188,10 @@ static void hub_disconnect(struct usb_interface *intf)
 
 	/* Take the hub off the event list and don't let it be added again */
 	spin_lock_irq(&hub_event_lock);
-	list_del_init(&hub->event_list);
+	if (!list_empty(&hub->event_list)) {
+		list_del_init(&hub->event_list);
+		usb_autopm_put_interface_no_suspend(intf);
+	}
 	hub->disconnected = 1;
 	spin_unlock_irq(&hub_event_lock);
 
@@ -3235,7 +3250,7 @@ static void hub_events(void)
 		 * disconnected while waiting for the lock to succeed. */
 		usb_lock_device(hdev);
 		if (unlikely(hub->disconnected))
-			goto loop;
+			goto loop2;
 
 		/* If the hub has died, clean up after it */
 		if (hdev->state == USB_STATE_NOTATTACHED) {
@@ -3384,11 +3399,15 @@ static void hub_events(void)
 			}
 		}
 
-loop_autopm:
-		/* Allow autosuspend if we're not going to run again */
-		if (list_empty(&hub->event_list))
-			usb_autopm_enable(intf);
-loop:
+ loop_autopm:
+		/* Balance the usb_autopm_get_interface() above */
+		usb_autopm_put_interface_no_suspend(intf);
+ loop:
+		/* Balance the usb_autopm_get_interface_no_resume() in
+		 * kick_khubd() and allow autosuspend.
+		 */
+		usb_autopm_put_interface(intf);
+ loop2:
 		usb_unlock_device(hdev);
 		kref_put(&hub->kref, hub_release);
 
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index ae763974be25..15477008b631 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -327,7 +327,6 @@ static DEVICE_ATTR(autosuspend, S_IRUGO | S_IWUSR,
 
 static const char on_string[] = "on";
 static const char auto_string[] = "auto";
-static const char suspend_string[] = "suspend";
 
 static ssize_t
 show_level(struct device *dev, struct device_attribute *attr, char *buf)
@@ -335,13 +334,8 @@ show_level(struct device *dev, struct device_attribute *attr, char *buf)
 	struct usb_device *udev = to_usb_device(dev);
 	const char *p = auto_string;
 
-	if (udev->state == USB_STATE_SUSPENDED) {
-		if (udev->autoresume_disabled)
-			p = suspend_string;
-	} else {
-		if (udev->autosuspend_disabled)
-			p = on_string;
-	}
+	if (udev->state != USB_STATE_SUSPENDED && udev->autosuspend_disabled)
+		p = on_string;
 	return sprintf(buf, "%s\n", p);
 }
 
@@ -353,7 +347,7 @@ set_level(struct device *dev, struct device_attribute *attr,
 	int len = count;
 	char *cp;
 	int rc = 0;
-	int old_autosuspend_disabled, old_autoresume_disabled;
+	int old_autosuspend_disabled;
 
 	cp = memchr(buf, '\n', count);
 	if (cp)
@@ -361,7 +355,6 @@ set_level(struct device *dev, struct device_attribute *attr,
 
 	usb_lock_device(udev);
 	old_autosuspend_disabled = udev->autosuspend_disabled;
-	old_autoresume_disabled = udev->autoresume_disabled;
 
 	/* Setting the flags without calling usb_pm_lock is a subject to
 	 * races, but who cares...
@@ -369,28 +362,18 @@ set_level(struct device *dev, struct device_attribute *attr,
 	if (len == sizeof on_string - 1 &&
 			strncmp(buf, on_string, len) == 0) {
 		udev->autosuspend_disabled = 1;
-		udev->autoresume_disabled = 0;
 		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
 
 	} else if (len == sizeof auto_string - 1 &&
 			strncmp(buf, auto_string, len) == 0) {
 		udev->autosuspend_disabled = 0;
-		udev->autoresume_disabled = 0;
 		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
 
-	} else if (len == sizeof suspend_string - 1 &&
-			strncmp(buf, suspend_string, len) == 0) {
-		udev->autosuspend_disabled = 0;
-		udev->autoresume_disabled = 1;
-		rc = usb_external_suspend_device(udev, PMSG_USER_SUSPEND);
-
 	} else
 		rc = -EINVAL;
 
-	if (rc) {
+	if (rc)
 		udev->autosuspend_disabled = old_autosuspend_disabled;
-		udev->autoresume_disabled = old_autoresume_disabled;
-	}
 	usb_unlock_device(udev);
 	return (rc < 0 ? rc : count);
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4b6f6db544ee..6af3581e1114 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -432,7 +432,6 @@ struct usb_tt;
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @autosuspend_disabled: autosuspend disabled by the user
- * @autoresume_disabled: autoresume disabled by the user
  * @skip_sys_resume: skip the next system resume
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
@@ -516,7 +515,6 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned autosuspend_disabled:1;
-	unsigned autoresume_disabled:1;
 	unsigned skip_sys_resume:1;
 #endif
 	struct wusb_dev *wusb_dev;
@@ -542,22 +540,20 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
 /* USB autosuspend and autoresume */
 #ifdef CONFIG_USB_SUSPEND
-extern int usb_autopm_set_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface_async(struct usb_interface *intf);
 extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
-static inline void usb_autopm_enable(struct usb_interface *intf)
+static inline void usb_autopm_get_interface_no_resume(
+		struct usb_interface *intf)
 {
-	atomic_set(&intf->pm_usage_cnt, 0);
-	usb_autopm_set_interface(intf);
+	atomic_inc(&intf->pm_usage_cnt);
 }
-
-static inline void usb_autopm_disable(struct usb_interface *intf)
+static inline void usb_autopm_put_interface_no_suspend(
+		struct usb_interface *intf)
 {
-	atomic_set(&intf->pm_usage_cnt, 1);
-	usb_autopm_set_interface(intf);
+	atomic_dec(&intf->pm_usage_cnt);
 }
 
 static inline void usb_mark_last_busy(struct usb_device *udev)
@@ -567,12 +563,8 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 
 #else
 
-static inline int usb_autopm_set_interface(struct usb_interface *intf)
-{ return 0; }
-
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
-
 static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
 { return 0; }
 
@@ -580,9 +572,11 @@ static inline void usb_autopm_put_interface(struct usb_interface *intf)
 { }
 static inline void usb_autopm_put_interface_async(struct usb_interface *intf)
 { }
-static inline void usb_autopm_enable(struct usb_interface *intf)
+static inline void usb_autopm_get_interface_no_resume(
+		struct usb_interface *intf)
 { }
-static inline void usb_autopm_disable(struct usb_interface *intf)
+static inline void usb_autopm_put_interface_no_suspend(
+		struct usb_interface *intf)
 { }
 static inline void usb_mark_last_busy(struct usb_device *udev)
 { }
-- 
cgit v1.2.3


From a0bb108112a872c0b0c4b3ef4974f95fb75b155d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 7 Dec 2009 16:39:16 -0500
Subject: USB: usb-storage: add BAD_SENSE flag

This patch (as1311) fixes a problem in usb-storage: Some devices are
pretty broken when it comes to reporting sense data.  The information
they send back indicates that they have more than 18 bytes of sense
data available, but when the system asks for more than 18 they fail or
hang.  The symptom is that probing fails with multiple resets.

The patch adds a new BAD_SENSE flag to indicate that usb-storage
should never ask for more than 18 bytes of sense data.  The flag can
be set in an unusual_devs entry or via the "quirks=" module parameter,
and it is set automatically whenever a REQUEST SENSE command for more
than 18 bytes fails or times out.

An unusual_devs entry is added for the Agfa photo frame, which uses a
Prolific chip having this bug.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Daniel Kukula <daniel.kuku@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kernel-parameters.txt |  2 ++
 drivers/usb/storage/transport.c     | 17 +++++++++++++----
 drivers/usb/storage/unusual_devs.h  |  7 +++++++
 drivers/usb/storage/usb.c           |  3 +++
 include/linux/usb_usual.h           |  4 +++-
 5 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 777dc8a32df8..3f886e298f62 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2663,6 +2663,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			to a common usb-storage quirk flag as follows:
 				a = SANE_SENSE (collect more than 18 bytes
 					of sense data);
+				b = BAD_SENSE (don't collect more than 18
+					bytes of sense data);
 				c = FIX_CAPACITY (decrease the reported
 					device capacity by one sector);
 				h = CAPACITY_HEURISTICS (decrease the
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 589f6b4404f0..cc313d16d727 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -666,10 +666,11 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 	 * to wait for at least one CHECK_CONDITION to determine
 	 * SANE_SENSE support
 	 */
-	if ((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) &&
+	if (unlikely((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) &&
 	    result == USB_STOR_TRANSPORT_GOOD &&
 	    !(us->fflags & US_FL_SANE_SENSE) &&
-	    !(srb->cmnd[2] & 0x20)) {
+	    !(us->fflags & US_FL_BAD_SENSE) &&
+	    !(srb->cmnd[2] & 0x20))) {
 		US_DEBUGP("-- SAT supported, increasing auto-sense\n");
 		us->fflags |= US_FL_SANE_SENSE;
 	}
@@ -718,6 +719,12 @@ Retry_Sense:
 		if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) {
 			US_DEBUGP("-- auto-sense aborted\n");
 			srb->result = DID_ABORT << 16;
+
+			/* If SANE_SENSE caused this problem, disable it */
+			if (sense_size != US_SENSE_SIZE) {
+				us->fflags &= ~US_FL_SANE_SENSE;
+				us->fflags |= US_FL_BAD_SENSE;
+			}
 			goto Handle_Errors;
 		}
 
@@ -727,10 +734,11 @@ Retry_Sense:
 		 * (small) sense request. This fixes some USB GSM modems
 		 */
 		if (temp_result == USB_STOR_TRANSPORT_FAILED &&
-		    (us->fflags & US_FL_SANE_SENSE) &&
-		    sense_size != US_SENSE_SIZE) {
+				sense_size != US_SENSE_SIZE) {
 			US_DEBUGP("-- auto-sense failure, retry small sense\n");
 			sense_size = US_SENSE_SIZE;
+			us->fflags &= ~US_FL_SANE_SENSE;
+			us->fflags |= US_FL_BAD_SENSE;
 			goto Retry_Sense;
 		}
 
@@ -754,6 +762,7 @@ Retry_Sense:
 		 */
 		if (srb->sense_buffer[7] > (US_SENSE_SIZE - 8) &&
 		    !(us->fflags & US_FL_SANE_SENSE) &&
+		    !(us->fflags & US_FL_BAD_SENSE) &&
 		    (srb->sense_buffer[0] & 0x7C) == 0x70) {
 			US_DEBUGP("-- SANE_SENSE support enabled\n");
 			us->fflags |= US_FL_SANE_SENSE;
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index d4f034ebaa8a..64a0a2c27e12 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -818,6 +818,13 @@ UNUSUAL_DEV( 0x066f, 0x8000, 0x0001, 0x0001,
 		US_SC_DEVICE, US_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
+/* Reported by Daniel Kukula <daniel.kuku@gmail.com> */
+UNUSUAL_DEV( 0x067b, 0x1063, 0x0100, 0x0100,
+		"Prolific Technology, Inc.",
+		"Prolific Storage Gadget",
+		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		US_FL_BAD_SENSE ),
+
 /* Reported by Rogerio Brito <rbrito@ime.usp.br> */
 UNUSUAL_DEV( 0x067b, 0x2317, 0x0001, 0x001,
 		"Prolific Technology, Inc.",
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 1599d86154c4..f5c0264caa33 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -463,6 +463,9 @@ static void adjust_quirks(struct us_data *us)
 		case 'a':
 			f |= US_FL_SANE_SENSE;
 			break;
+		case 'b':
+			f |= US_FL_BAD_SENSE;
+			break;
 		case 'c':
 			f |= US_FL_FIX_CAPACITY;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 3d15fb9bc116..a4b947e470a5 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -56,7 +56,9 @@
 	US_FLAG(SANE_SENSE,     0x00008000)			\
 		/* Sane Sense (> 18 bytes) */			\
 	US_FLAG(CAPACITY_OK,	0x00010000)			\
-		/* READ CAPACITY response is correct */
+		/* READ CAPACITY response is correct */		\
+	US_FLAG(BAD_SENSE,	0x00020000)			\
+		/* Bad Sense (never more than 18 bytes) */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 91017f9cf5fcfb601b8d583c896ac7de7d200c57 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 3 Dec 2009 09:44:34 -0800
Subject: USB: Refactor code to find alternate interface settings.

Refactor out the code to find alternate interface settings into
usb_find_alt_setting().  Print a debugging message and return null if the
alt setting is not found.

While we're at it, correct a bug in the refactored code.  The interfaces
in the configuration's interface cache are not necessarily in numerical
order, so we can't just use the interface number as an array index.  Loop
through the interface caches, looking for the correct interface.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 12 ++----------
 drivers/usb/core/usb.c | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/usb.h    |  4 ++++
 3 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index daac0427bfd5..fc235b02ff27 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1606,7 +1606,6 @@ int usb_hcd_check_bandwidth(struct usb_device *udev,
 		struct usb_interface *new_intf)
 {
 	int num_intfs, i, j;
-	struct usb_interface_cache *intf_cache;
 	struct usb_host_interface *alt = NULL;
 	int ret = 0;
 	struct usb_hcd *hcd;
@@ -1654,15 +1653,8 @@ int usb_hcd_check_bandwidth(struct usb_device *udev,
 			}
 		}
 		for (i = 0; i < num_intfs; ++i) {
-
-			/* Dig the endpoints for alt setting 0 out of the
-			 * interface cache for this interface
-			 */
-			intf_cache = new_config->intf_cache[i];
-			for (j = 0; j < intf_cache->num_altsetting; j++) {
-				if (intf_cache->altsetting[j].desc.bAlternateSetting == 0)
-					alt = &intf_cache->altsetting[j];
-			}
+			/* Set up endpoints for alternate interface setting 0 */
+			alt = usb_find_alt_setting(new_config, i, 0);
 			if (!alt) {
 				printk(KERN_DEBUG "Did not find alt setting 0 for intf %d\n", i);
 				continue;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index d1e9440799de..99e54586a545 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -63,6 +63,43 @@ MODULE_PARM_DESC(autosuspend, "default autosuspend delay");
 #endif
 
 
+/**
+ * usb_find_alt_setting() - Given a configuration, find the alternate setting
+ * for the given interface.
+ * @config - the configuration to search (not necessarily the current config).
+ * @iface_num - interface number to search in
+ * @alt_num - alternate interface setting number to search for.
+ *
+ * Search the configuration's interface cache for the given alt setting.
+ */
+struct usb_host_interface *usb_find_alt_setting(
+		struct usb_host_config *config,
+		unsigned int iface_num,
+		unsigned int alt_num)
+{
+	struct usb_interface_cache *intf_cache = NULL;
+	int i;
+
+	for (i = 0; i < config->desc.bNumInterfaces; i++) {
+		if (config->intf_cache[i]->altsetting[0].desc.bInterfaceNumber
+				== iface_num) {
+			intf_cache = config->intf_cache[i];
+			break;
+		}
+	}
+	if (!intf_cache)
+		return NULL;
+	for (i = 0; i < intf_cache->num_altsetting; i++)
+		if (intf_cache->altsetting[i].desc.bAlternateSetting == alt_num)
+			return &intf_cache->altsetting[i];
+
+	printk(KERN_DEBUG "Did not find alt setting %u for intf %u, "
+			"config %u\n", alt_num, iface_num,
+			config->desc.bConfigurationValue);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(usb_find_alt_setting);
+
 /**
  * usb_ifnum_to_if - get the interface object with a given interface number
  * @dev: the device whose current configuration is considered
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 6af3581e1114..e101a2d04d75 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -619,6 +619,10 @@ extern struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
 		unsigned ifnum);
 extern struct usb_host_interface *usb_altnum_to_altsetting(
 		const struct usb_interface *intf, unsigned int altnum);
+extern struct usb_host_interface *usb_find_alt_setting(
+		struct usb_host_config *config,
+		unsigned int iface_num,
+		unsigned int alt_num);
 
 
 /**
-- 
cgit v1.2.3


From f53a2ade0bb9f2a81f473e6469155172a96b7c38 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 9 Oct 2009 12:56:41 +0100
Subject: tty: esp: remove broken driver

The ESP driver has been marked broken for years. It's an old ISA device
that clearly nobody cares about any more. Remove it

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/serial/hayes-esp.txt |  154 ---
 drivers/char/Kconfig               |   13 -
 drivers/char/Makefile              |    1 -
 drivers/char/esp.c                 | 2533 ------------------------------------
 include/linux/Kbuild               |    1 -
 include/linux/hayesesp.h           |  114 --
 6 files changed, 2816 deletions(-)
 delete mode 100644 Documentation/serial/hayes-esp.txt
 delete mode 100644 drivers/char/esp.c
 delete mode 100644 include/linux/hayesesp.h

(limited to 'include/linux')

diff --git a/Documentation/serial/hayes-esp.txt b/Documentation/serial/hayes-esp.txt
deleted file mode 100644
index 09b5d5856758..000000000000
--- a/Documentation/serial/hayes-esp.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-HAYES ESP DRIVER VERSION 2.1
-
-A big thanks to the people at Hayes, especially Alan Adamson.  Their support
-has enabled me to provide enhancements to the driver.
-
-Please report your experiences with this driver to me (arobinso@nyx.net).  I
-am looking for both positive and negative feedback.
-
-*** IMPORTANT CHANGES FOR 2.1 ***
-Support for PIO mode.  Five situations will cause PIO mode to be used:
-1) A multiport card is detected.  PIO mode will always be used.  (8 port cards
-do not support DMA).
-2) The DMA channel is set to an invalid value (anything other than 1 or 3).
-3) The DMA buffer/channel could not be allocated.  The port will revert to PIO
-mode until it is reopened.
-4) Less than a specified number of bytes need to be transferred to/from the
-FIFOs.  PIO mode will be used for that transfer only.
-5) A port needs to do a DMA transfer and another port is already using the
-DMA channel.  PIO mode will be used for that transfer only.
-
-Since the Hayes ESP seems to conflict with other cards (notably sound cards)
-when using DMA, DMA is turned off by default.  To use DMA, it must be turned
-on explicitly, either with the "dma=" option described below or with
-setserial.  A multiport card can be forced into DMA mode by using setserial;
-however, most multiport cards don't support DMA.
-
-The latest version of setserial allows the enhanced configuration of the ESP
-card to be viewed and modified.
-***
-
-This package contains the files needed to compile a module to support the Hayes
-ESP card.  The drivers are basically a modified version of the serial drivers.
-
-Features:
-
-- Uses the enhanced mode of the ESP card, allowing a wider range of
-  interrupts and features than compatibility mode
-- Uses DMA and 16 bit PIO mode to transfer data to and from the ESP's FIFOs,
-  reducing CPU load
-- Supports primary and secondary ports
-
-
-If the driver is compiled as a module, the IRQs to use can be specified by
-using the irq= option.  The format is:
-
-irq=[0x100],[0x140],[0x180],[0x200],[0x240],[0x280],[0x300],[0x380]
-
-The address in brackets is the base address of the card.  The IRQ of
-nonexistent cards can be set to 0.  If an IRQ of a card that does exist is set
-to 0, the driver will attempt to guess at the correct IRQ.  For example, to set
-the IRQ of the card at address 0x300 to 12, the insmod command would be:
-
-insmod esp irq=0,0,0,0,0,0,12,0
-
-The custom divisor can be set by using the divisor= option.  The format is the
-same as for the irq= option.  Each divisor value is a series of hex digits,
-with each digit representing the divisor to use for a corresponding port.  The
-divisor value is constructed RIGHT TO LEFT.  Specifying a nonzero divisor value
-will automatically set the spd_cust flag.  To calculate the divisor to use for
-a certain baud rate, divide the port's base baud (generally 921600) by the
-desired rate.  For example, to set the divisor of the primary port at 0x300 to
-4 and the divisor of the secondary port at 0x308 to 8, the insmod command would
-be:
-
-insmod esp divisor=0,0,0,0,0,0,0x84,0
-
-The dma= option can be used to set the DMA channel.  The channel can be either
-1 or 3.  Specifying any other value will force the driver to use PIO mode.
-For example, to set the DMA channel to 3, the insmod command would be:
-
-insmod esp dma=3
-
-The rx_trigger= and tx_trigger= options can be used to set the FIFO trigger
-levels.  They specify when the ESP card should send an interrupt.  Larger
-values will decrease the number of interrupts; however, a value too high may
-result in data loss.  Valid values are 1 through 1023, with 768 being the
-default.  For example, to set the receive trigger level to 512 bytes and the
-transmit trigger level to 700 bytes, the insmod command would be:
-
-insmod esp rx_trigger=512 tx_trigger=700
-
-The flow_off= and flow_on= options can be used to set the hardware flow off/
-flow on levels.  The flow on level must be lower than the flow off level, and
-the flow off level should be higher than rx_trigger.  Valid values are 1
-through 1023, with 1016 being the default flow off level and 944 being the
-default flow on level.  For example, to set the flow off level to 1000 bytes
-and the flow on level to 935 bytes, the insmod command would be:
-
-insmod esp flow_off=1000 flow_on=935
-
-The rx_timeout= option can be used to set the receive timeout value.  This
-value indicates how long after receiving the last character that the ESP card
-should wait before signalling an interrupt.  Valid values are 0 though 255,
-with 128 being the default.  A value too high will increase latency, and a
-value too low will cause unnecessary interrupts.  For example, to set the
-receive timeout to 255, the insmod command would be:
-
-insmod esp rx_timeout=255
-
-The pio_threshold= option sets the threshold (in number of characters) for
-using PIO mode instead of DMA mode.  For example, if this value is 32,
-transfers of 32 bytes or less will always use PIO mode.
-
-insmod esp pio_threshold=32
-
-Multiple options can be listed on the insmod command line by separating each
-option with a space.  For example:
-
-insmod esp dma=3 trigger=512
-
-The esp module can be automatically loaded when needed.  To cause this to
-happen, add the following lines to /etc/modprobe.conf (replacing the last line
-with options for your configuration):
-
-alias char-major-57 esp
-alias char-major-58 esp
-options esp irq=0,0,0,0,0,0,3,0 divisor=0,0,0,0,0,0,0x4,0
-
-You may also need to run 'depmod -a'.
-
-Devices must be created manually.  To create the devices, note the output from
-the module after it is inserted.  The output will appear in the location where
-kernel messages usually appear (usually /var/adm/messages).  Create two devices
-for each 'tty' mentioned, one with major of 57 and the other with major of 58.
-The minor number should be the same as the tty number reported.  The commands
-would be (replace ? with the tty number):
-
-mknod /dev/ttyP? c 57 ?
-mknod /dev/cup? c 58 ?
-
-For example, if the following line appears:
-
-Oct 24 18:17:23 techno kernel: ttyP8 at 0x0140 (irq = 3) is an ESP primary port
-
-...two devices should be created:
-
-mknod /dev/ttyP8 c 57 8
-mknod /dev/cup8 c 58 8
-
-You may need to set the permissions on the devices:
-
-chmod 666 /dev/ttyP*
-chmod 666 /dev/cup*
-
-The ESP module and the serial module should not conflict (they can be used at
-the same time).  After the ESP module has been loaded the ports on the ESP card
-will no longer be accessible by the serial driver.
-
-If I/O errors are experienced when accessing the port, check for IRQ and DMA
-conflicts ('cat /proc/interrupts' and 'cat /proc/dma' for a list of IRQs and
-DMAs currently in use).
-
-Enjoy!
-Andrew J. Robinson <arobinso@nyx.net>
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 6aad99ec4e0f..6f31c9472100 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -201,19 +201,6 @@ config DIGIEPCA
 	  To compile this driver as a module, choose M here: the
 	  module will be called epca.
 
-config ESPSERIAL
-	tristate "Hayes ESP serial port support"
-	depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API && BROKEN
-	help
-	  This is a driver which supports Hayes ESP serial ports.  Both single
-	  port cards and multiport cards are supported.  Make sure to read
-	  <file:Documentation/hayes-esp.txt>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called esp.
-
-	  If unsure, say N.
-
 config MOXA_INTELLIO
 	tristate "Moxa Intellio support"
 	depends on SERIAL_NONSTANDARD && (ISA || EISA || PCI)
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 19a79dd79eee..f957edf7e45d 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -18,7 +18,6 @@ obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
-obj-$(CONFIG_ESPSERIAL)		+= esp.o
 obj-$(CONFIG_MVME147_SCC)	+= generic_serial.o vme_scc.o
 obj-$(CONFIG_MVME162_SCC)	+= generic_serial.o vme_scc.o
 obj-$(CONFIG_BVME6000_SCC)	+= generic_serial.o vme_scc.o
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
deleted file mode 100644
index b19d43cd9542..000000000000
--- a/drivers/char/esp.c
+++ /dev/null
@@ -1,2533 +0,0 @@
-/*
- *  esp.c - driver for Hayes ESP serial cards
- *
- *  --- Notices from serial.c, upon which this driver is based ---
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Extensively rewritten by Theodore Ts'o, 8/16/92 -- 9/14/92.  Now
- *  much more extensible to support other serial cards based on the
- *  16450/16550A UART's.  Added support for the AST FourPort and the
- *  Accent Async board.
- *
- *  set_serial_info fixed to set the flags, custom divisor, and uart
- * 	type fields.  Fix suggested by Michael K. Johnson 12/12/92.
- *
- *  11/95: TIOCMIWAIT, TIOCGICOUNT by Angelo Haritsis <ah@doc.ic.ac.uk>
- *
- *  03/96: Modularised by Angelo Haritsis <ah@doc.ic.ac.uk>
- *
- *  rs_set_termios fixed to look also for changes of the input
- *      flags INPCK, BRKINT, PARMRK, IGNPAR and IGNBRK.
- *                                            Bernd Anhäupl 05/17/96.
- *
- * --- End of notices from serial.c ---
- *
- * Support for the ESP serial card by Andrew J. Robinson
- *     <arobinso@nyx.net> (Card detection routine taken from a patch
- *     by Dennis J. Boylan).  Patches to allow use with 2.1.x contributed
- *     by Chris Faylor.
- *
- * Most recent changes: (Andrew J. Robinson)
- *   Support for PIO mode.  This allows the driver to work properly with
- *     multiport cards.
- *
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br> -
- * several cleanups, use module_init/module_exit, etc
- *
- * This module exports the following rs232 io functions:
- *
- *	int espserial_init(void);
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial.h>
-#include <linux/serialP.h>
-#include <linux/serial_reg.h>
-#include <linux/major.h>
-#include <linux/string.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-
-#include <asm/system.h>
-#include <linux/io.h>
-
-#include <asm/dma.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include <linux/hayesesp.h>
-
-#define NR_PORTS 64	/* maximum number of ports */
-#define NR_PRIMARY 8	/* maximum number of primary ports */
-#define REGION_SIZE 8   /* size of io region to request */
-
-/* The following variables can be set by giving module options */
-static int irq[NR_PRIMARY];	/* IRQ for each base port */
-static unsigned int divisor[NR_PRIMARY]; /* custom divisor for each port */
-static unsigned int dma = ESP_DMA_CHANNEL; /* DMA channel */
-static unsigned int rx_trigger = ESP_RX_TRIGGER;
-static unsigned int tx_trigger = ESP_TX_TRIGGER;
-static unsigned int flow_off = ESP_FLOW_OFF;
-static unsigned int flow_on = ESP_FLOW_ON;
-static unsigned int rx_timeout = ESP_RX_TMOUT;
-static unsigned int pio_threshold = ESP_PIO_THRESHOLD;
-
-MODULE_LICENSE("GPL");
-
-module_param_array(irq, int, NULL, 0);
-module_param_array(divisor, uint, NULL, 0);
-module_param(dma, uint, 0);
-module_param(rx_trigger, uint, 0);
-module_param(tx_trigger, uint, 0);
-module_param(flow_off, uint, 0);
-module_param(flow_on, uint, 0);
-module_param(rx_timeout, uint, 0);
-module_param(pio_threshold, uint, 0);
-
-/* END */
-
-static char *dma_buffer;
-static int dma_bytes;
-static struct esp_pio_buffer *free_pio_buf;
-
-#define DMA_BUFFER_SZ 1024
-
-#define WAKEUP_CHARS 1024
-
-static char serial_name[] __initdata = "ESP serial driver";
-static char serial_version[] __initdata = "2.2";
-
-static struct tty_driver *esp_driver;
-
-/*
- * Serial driver configuration section.  Here are the various options:
- *
- * SERIAL_PARANOIA_CHECK
- * 		Check the magic number for the esp_structure where
- * 		ever possible.
- */
-
-#undef SERIAL_PARANOIA_CHECK
-#define SERIAL_DO_RESTART
-
-#undef SERIAL_DEBUG_INTR
-#undef SERIAL_DEBUG_OPEN
-#undef SERIAL_DEBUG_FLOW
-
-#if defined(MODULE) && defined(SERIAL_DEBUG_MCOUNT)
-#define DBG_CNT(s) printk(KERN_DEBUG "(%s): [%x] refc=%d, serc=%d, ttyc=%d -> %s\n", \
-				tty->name, info->port.flags, \
-				serial_driver.refcount, \
-				info->port.count, tty->count, s)
-#else
-#define DBG_CNT(s)
-#endif
-
-static struct esp_struct *ports;
-
-static void change_speed(struct esp_struct *info);
-static void rs_wait_until_sent(struct tty_struct *, int);
-
-/*
- * The ESP card has a clock rate of 14.7456 MHz (that is, 2**ESPC_SCALE
- * times the normal 1.8432 Mhz clock of most serial boards).
- */
-#define BASE_BAUD ((1843200 / 16) * (1 << ESPC_SCALE))
-
-/* Standard COM flags (except for COM4, because of the 8514 problem) */
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-
-static inline int serial_paranoia_check(struct esp_struct *info,
-					char *name, const char *routine)
-{
-#ifdef SERIAL_PARANOIA_CHECK
-	static const char badmagic[] = KERN_WARNING
-		"Warning: bad magic number for serial struct (%s) in %s\n";
-	static const char badinfo[] = KERN_WARNING
-		"Warning: null esp_struct for (%s) in %s\n";
-
-	if (!info) {
-		printk(badinfo, name, routine);
-		return 1;
-	}
-	if (info->magic != ESP_MAGIC) {
-		printk(badmagic, name, routine);
-		return 1;
-	}
-#endif
-	return 0;
-}
-
-static inline unsigned int serial_in(struct esp_struct *info, int offset)
-{
-	return inb(info->io_port + offset);
-}
-
-static inline void serial_out(struct esp_struct *info, int offset,
-			      unsigned char value)
-{
-	outb(value, info->io_port+offset);
-}
-
-/*
- * ------------------------------------------------------------
- * rs_stop() and rs_start()
- *
- * This routines are called before setting or resetting tty->stopped.
- * They enable or disable transmitter interrupts, as necessary.
- * ------------------------------------------------------------
- */
-static void rs_stop(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_stop"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-	if (info->IER & UART_IER_THRI) {
-		info->IER &= ~UART_IER_THRI;
-		serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-		serial_out(info, UART_ESI_CMD2, info->IER);
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-static void rs_start(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_start"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-	if (info->xmit_cnt && info->xmit_buf && !(info->IER & UART_IER_THRI)) {
-		info->IER |= UART_IER_THRI;
-		serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-		serial_out(info, UART_ESI_CMD2, info->IER);
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-/*
- * ----------------------------------------------------------------------
- *
- * Here starts the interrupt handling routines.  All of the following
- * subroutines are declared as inline and are folded into
- * rs_interrupt().  They were separated out for readability's sake.
- *
- * Note: rs_interrupt() is a "fast" interrupt, which means that it
- * runs with interrupts turned off.  People who may want to modify
- * rs_interrupt() should try to keep the interrupt handler as fast as
- * possible.  After you are done making modifications, it is not a bad
- * idea to do:
- *
- * gcc -S -DKERNEL -Wall -Wstrict-prototypes -O6 -fomit-frame-pointer serial.c
- *
- * and look at the resulting assemble code in serial.s.
- *
- * 				- Ted Ts'o (tytso@mit.edu), 7-Mar-93
- * -----------------------------------------------------------------------
- */
-
-static DEFINE_SPINLOCK(pio_lock);
-
-static inline struct esp_pio_buffer *get_pio_buffer(void)
-{
-	struct esp_pio_buffer *buf;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pio_lock, flags);
-	if (free_pio_buf) {
-		buf = free_pio_buf;
-		free_pio_buf = buf->next;
-	} else {
-		buf = kmalloc(sizeof(struct esp_pio_buffer), GFP_ATOMIC);
-	}
-	spin_unlock_irqrestore(&pio_lock, flags);
-	return buf;
-}
-
-static inline void release_pio_buffer(struct esp_pio_buffer *buf)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&pio_lock, flags);
-	buf->next = free_pio_buf;
-	free_pio_buf = buf;
-	spin_unlock_irqrestore(&pio_lock, flags);
-}
-
-static inline void receive_chars_pio(struct esp_struct *info, int num_bytes)
-{
-	struct tty_struct *tty = info->port.tty;
-	int i;
-	struct esp_pio_buffer *pio_buf;
-	struct esp_pio_buffer *err_buf;
-	unsigned char status_mask;
-
-	pio_buf = get_pio_buffer();
-
-	if (!pio_buf)
-		return;
-
-	err_buf = get_pio_buffer();
-
-	if (!err_buf) {
-		release_pio_buffer(pio_buf);
-		return;
-	}
-
-	status_mask = (info->read_status_mask >> 2) & 0x07;
-
-	for (i = 0; i < num_bytes - 1; i += 2) {
-		*((unsigned short *)(pio_buf->data + i)) =
-			inw(info->io_port + UART_ESI_RX);
-		err_buf->data[i] = serial_in(info, UART_ESI_RWS);
-		err_buf->data[i + 1] = (err_buf->data[i] >> 3) & status_mask;
-		err_buf->data[i] &= status_mask;
-	}
-
-	if (num_bytes & 0x0001) {
-		pio_buf->data[num_bytes - 1] = serial_in(info, UART_ESI_RX);
-		err_buf->data[num_bytes - 1] =
-			(serial_in(info, UART_ESI_RWS) >> 3) & status_mask;
-	}
-
-	/* make sure everything is still ok since interrupts were enabled */
-	tty = info->port.tty;
-
-	if (!tty) {
-		release_pio_buffer(pio_buf);
-		release_pio_buffer(err_buf);
-		info->stat_flags &= ~ESP_STAT_RX_TIMEOUT;
-		return;
-	}
-
-	status_mask = (info->ignore_status_mask >> 2) & 0x07;
-
-	for (i = 0; i < num_bytes; i++) {
-		if (!(err_buf->data[i] & status_mask)) {
-			int flag = 0;
-
-			if (err_buf->data[i] & 0x04) {
-				flag = TTY_BREAK;
-				if (info->port.flags & ASYNC_SAK)
-					do_SAK(tty);
-			} else if (err_buf->data[i] & 0x02)
-				flag = TTY_FRAME;
-			else if (err_buf->data[i] & 0x01)
-				flag = TTY_PARITY;
-			tty_insert_flip_char(tty, pio_buf->data[i], flag);
-		}
-	}
-
-	tty_schedule_flip(tty);
-
-	info->stat_flags &= ~ESP_STAT_RX_TIMEOUT;
-	release_pio_buffer(pio_buf);
-	release_pio_buffer(err_buf);
-}
-
-static void program_isa_dma(int dma, int dir, unsigned long addr, int len)
-{
-	unsigned long flags;
-
-	flags = claim_dma_lock();
-	disable_dma(dma);
-	clear_dma_ff(dma);
-	set_dma_mode(dma, dir);
-	set_dma_addr(dma, addr);
-	set_dma_count(dma, len);
-	enable_dma(dma);
-	release_dma_lock(flags);
-}
-
-static void receive_chars_dma(struct esp_struct *info, int num_bytes)
-{
-	info->stat_flags &= ~ESP_STAT_RX_TIMEOUT;
-	dma_bytes = num_bytes;
-	info->stat_flags |= ESP_STAT_DMA_RX;
-
-	program_isa_dma(dma, DMA_MODE_READ, isa_virt_to_bus(dma_buffer),
-								dma_bytes);
-	serial_out(info, UART_ESI_CMD1, ESI_START_DMA_RX);
-}
-
-static inline void receive_chars_dma_done(struct esp_struct *info,
-					    int status)
-{
-	struct tty_struct *tty = info->port.tty;
-	int num_bytes;
-	unsigned long flags;
-
-	flags = claim_dma_lock();
-	disable_dma(dma);
-	clear_dma_ff(dma);
-
-	info->stat_flags &= ~ESP_STAT_DMA_RX;
-	num_bytes = dma_bytes - get_dma_residue(dma);
-	release_dma_lock(flags);
-
-	info->icount.rx += num_bytes;
-
-	if (num_bytes > 0) {
-		tty_insert_flip_string(tty, dma_buffer, num_bytes - 1);
-
-		status &= (0x1c & info->read_status_mask);
-
-		/* Is the status significant or do we throw the last byte ? */
-		if (!(status & info->ignore_status_mask)) {
-			int statflag = 0;
-
-			if (status & 0x10) {
-				statflag = TTY_BREAK;
-				(info->icount.brk)++;
-				if (info->port.flags & ASYNC_SAK)
-					do_SAK(tty);
-			} else if (status & 0x08) {
-				statflag = TTY_FRAME;
-				info->icount.frame++;
-			} else if (status & 0x04) {
-				statflag = TTY_PARITY;
-				info->icount.parity++;
-			}
-			tty_insert_flip_char(tty, dma_buffer[num_bytes - 1],
-								statflag);
-		}
-		tty_schedule_flip(tty);
-	}
-
-	if (dma_bytes != num_bytes) {
-		num_bytes = dma_bytes - num_bytes;
-		dma_bytes = 0;
-		receive_chars_dma(info, num_bytes);
-	} else
-		dma_bytes = 0;
-}
-
-/* Caller must hold info->lock */
-
-static inline void transmit_chars_pio(struct esp_struct *info,
-					int space_avail)
-{
-	int i;
-	struct esp_pio_buffer *pio_buf;
-
-	pio_buf = get_pio_buffer();
-
-	if (!pio_buf)
-		return;
-
-	while (space_avail && info->xmit_cnt) {
-		if (info->xmit_tail + space_avail <= ESP_XMIT_SIZE) {
-			memcpy(pio_buf->data,
-			       &(info->xmit_buf[info->xmit_tail]),
-			       space_avail);
-		} else {
-			i = ESP_XMIT_SIZE - info->xmit_tail;
-			memcpy(pio_buf->data,
-			       &(info->xmit_buf[info->xmit_tail]), i);
-			memcpy(&(pio_buf->data[i]), info->xmit_buf,
-			       space_avail - i);
-		}
-
-		info->xmit_cnt -= space_avail;
-		info->xmit_tail = (info->xmit_tail + space_avail) &
-			(ESP_XMIT_SIZE - 1);
-
-		for (i = 0; i < space_avail - 1; i += 2) {
-			outw(*((unsigned short *)(pio_buf->data + i)),
-			     info->io_port + UART_ESI_TX);
-		}
-
-		if (space_avail & 0x0001)
-			serial_out(info, UART_ESI_TX,
-				   pio_buf->data[space_avail - 1]);
-
-		if (info->xmit_cnt) {
-			serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-			serial_out(info, UART_ESI_CMD1, ESI_GET_TX_AVAIL);
-			space_avail = serial_in(info, UART_ESI_STAT1) << 8;
-			space_avail |= serial_in(info, UART_ESI_STAT2);
-
-			if (space_avail > info->xmit_cnt)
-				space_avail = info->xmit_cnt;
-		}
-	}
-
-	if (info->xmit_cnt < WAKEUP_CHARS) {
-		if (info->port.tty)
-			tty_wakeup(info->port.tty);
-
-#ifdef SERIAL_DEBUG_INTR
-		printk("THRE...");
-#endif
-
-		if (info->xmit_cnt <= 0) {
-			info->IER &= ~UART_IER_THRI;
-			serial_out(info, UART_ESI_CMD1,
-				   ESI_SET_SRV_MASK);
-			serial_out(info, UART_ESI_CMD2, info->IER);
-		}
-	}
-
-	release_pio_buffer(pio_buf);
-}
-
-/* Caller must hold info->lock */
-static inline void transmit_chars_dma(struct esp_struct *info, int num_bytes)
-{
-	dma_bytes = num_bytes;
-
-	if (info->xmit_tail + dma_bytes <= ESP_XMIT_SIZE) {
-		memcpy(dma_buffer, &(info->xmit_buf[info->xmit_tail]),
-		       dma_bytes);
-	} else {
-		int i = ESP_XMIT_SIZE - info->xmit_tail;
-		memcpy(dma_buffer, &(info->xmit_buf[info->xmit_tail]),
-			i);
-		memcpy(&(dma_buffer[i]), info->xmit_buf, dma_bytes - i);
-	}
-
-	info->xmit_cnt -= dma_bytes;
-	info->xmit_tail = (info->xmit_tail + dma_bytes) & (ESP_XMIT_SIZE - 1);
-
-	if (info->xmit_cnt < WAKEUP_CHARS) {
-		if (info->port.tty)
-			tty_wakeup(info->port.tty);
-
-#ifdef SERIAL_DEBUG_INTR
-		printk("THRE...");
-#endif
-
-		if (info->xmit_cnt <= 0) {
-			info->IER &= ~UART_IER_THRI;
-			serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-			serial_out(info, UART_ESI_CMD2, info->IER);
-		}
-	}
-
-	info->stat_flags |= ESP_STAT_DMA_TX;
-
-	program_isa_dma(dma, DMA_MODE_WRITE, isa_virt_to_bus(dma_buffer),
-								dma_bytes);
-	serial_out(info, UART_ESI_CMD1, ESI_START_DMA_TX);
-}
-
-static inline void transmit_chars_dma_done(struct esp_struct *info)
-{
-	int num_bytes;
-	unsigned long flags;
-
-	flags = claim_dma_lock();
-	disable_dma(dma);
-	clear_dma_ff(dma);
-
-	num_bytes = dma_bytes - get_dma_residue(dma);
-	info->icount.tx += dma_bytes;
-	release_dma_lock(flags);
-
-	if (dma_bytes != num_bytes) {
-		dma_bytes -= num_bytes;
-		memmove(dma_buffer, dma_buffer + num_bytes, dma_bytes);
-
-		program_isa_dma(dma, DMA_MODE_WRITE,
-				isa_virt_to_bus(dma_buffer), dma_bytes);
-
-		serial_out(info, UART_ESI_CMD1, ESI_START_DMA_TX);
-	} else {
-		dma_bytes = 0;
-		info->stat_flags &= ~ESP_STAT_DMA_TX;
-	}
-}
-
-static void check_modem_status(struct esp_struct *info)
-{
-	int	status;
-
-	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-	status = serial_in(info, UART_ESI_STAT2);
-
-	if (status & UART_MSR_ANY_DELTA) {
-		/* update input line counters */
-		if (status & UART_MSR_TERI)
-			info->icount.rng++;
-		if (status & UART_MSR_DDSR)
-			info->icount.dsr++;
-		if (status & UART_MSR_DDCD)
-			info->icount.dcd++;
-		if (status & UART_MSR_DCTS)
-			info->icount.cts++;
-		wake_up_interruptible(&info->port.delta_msr_wait);
-	}
-
-	if ((info->port.flags & ASYNC_CHECK_CD) && (status & UART_MSR_DDCD)) {
-#if (defined(SERIAL_DEBUG_OPEN) || defined(SERIAL_DEBUG_INTR))
-		printk("ttys%d CD now %s...", info->line,
-		       (status & UART_MSR_DCD) ? "on" : "off");
-#endif
-		if (status & UART_MSR_DCD)
-			wake_up_interruptible(&info->port.open_wait);
-		else {
-#ifdef SERIAL_DEBUG_OPEN
-			printk("scheduling hangup...");
-#endif
-			tty_hangup(info->port.tty);
-		}
-	}
-}
-
-/*
- * This is the serial driver's interrupt routine
- */
-static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
-{
-	struct esp_struct *info;
-	unsigned err_status;
-	unsigned int scratch;
-
-#ifdef SERIAL_DEBUG_INTR
-	printk("rs_interrupt_single(%d)...", irq);
-#endif
-	info = (struct esp_struct *)dev_id;
-	err_status = 0;
-	scratch = serial_in(info, UART_ESI_SID);
-
-	spin_lock(&info->lock);
-
-	if (!info->port.tty) {
-		spin_unlock(&info->lock);
-		return IRQ_NONE;
-	}
-
-	if (scratch & 0x04) { /* error */
-		serial_out(info, UART_ESI_CMD1, ESI_GET_ERR_STAT);
-		err_status = serial_in(info, UART_ESI_STAT1);
-		serial_in(info, UART_ESI_STAT2);
-
-		if (err_status & 0x01)
-			info->stat_flags |= ESP_STAT_RX_TIMEOUT;
-
-		if (err_status & 0x20) /* UART status */
-			check_modem_status(info);
-
-		if (err_status & 0x80) /* Start break */
-			wake_up_interruptible(&info->break_wait);
-	}
-
-	if ((scratch & 0x88) || /* DMA completed or timed out */
-	    (err_status & 0x1c) /* receive error */) {
-		if (info->stat_flags & ESP_STAT_DMA_RX)
-			receive_chars_dma_done(info, err_status);
-		else if (info->stat_flags & ESP_STAT_DMA_TX)
-			transmit_chars_dma_done(info);
-	}
-
-	if (!(info->stat_flags & (ESP_STAT_DMA_RX | ESP_STAT_DMA_TX)) &&
-	    ((scratch & 0x01) || (info->stat_flags & ESP_STAT_RX_TIMEOUT)) &&
-	    (info->IER & UART_IER_RDI)) {
-		int num_bytes;
-
-		serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-		serial_out(info, UART_ESI_CMD1, ESI_GET_RX_AVAIL);
-		num_bytes = serial_in(info, UART_ESI_STAT1) << 8;
-		num_bytes |= serial_in(info, UART_ESI_STAT2);
-
-		num_bytes = tty_buffer_request_room(info->port.tty, num_bytes);
-
-		if (num_bytes) {
-			if (dma_bytes ||
-			    (info->stat_flags & ESP_STAT_USE_PIO) ||
-			    (num_bytes <= info->config.pio_threshold))
-				receive_chars_pio(info, num_bytes);
-			else
-				receive_chars_dma(info, num_bytes);
-		}
-	}
-
-	if (!(info->stat_flags & (ESP_STAT_DMA_RX | ESP_STAT_DMA_TX)) &&
-	    (scratch & 0x02) && (info->IER & UART_IER_THRI)) {
-		if ((info->xmit_cnt <= 0) || info->port.tty->stopped) {
-			info->IER &= ~UART_IER_THRI;
-			serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-			serial_out(info, UART_ESI_CMD2, info->IER);
-		} else {
-			int num_bytes;
-
-			serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-			serial_out(info, UART_ESI_CMD1, ESI_GET_TX_AVAIL);
-			num_bytes = serial_in(info, UART_ESI_STAT1) << 8;
-			num_bytes |= serial_in(info, UART_ESI_STAT2);
-
-			if (num_bytes > info->xmit_cnt)
-				num_bytes = info->xmit_cnt;
-
-			if (num_bytes) {
-				if (dma_bytes ||
-				    (info->stat_flags & ESP_STAT_USE_PIO) ||
-				    (num_bytes <= info->config.pio_threshold))
-					transmit_chars_pio(info, num_bytes);
-				else
-					transmit_chars_dma(info, num_bytes);
-			}
-		}
-	}
-
-	info->last_active = jiffies;
-
-#ifdef SERIAL_DEBUG_INTR
-	printk("end.\n");
-#endif
-	spin_unlock(&info->lock);
-	return IRQ_HANDLED;
-}
-
-/*
- * -------------------------------------------------------------------
- * Here ends the serial interrupt routines.
- * -------------------------------------------------------------------
- */
-
-/*
- * ---------------------------------------------------------------
- * Low level utility subroutines for the serial driver:  routines to
- * figure out the appropriate timeout for an interrupt chain, routines
- * to initialize and startup a serial port, and routines to shutdown a
- * serial port.  Useful stuff like that.
- *
- * Caller should hold lock
- * ---------------------------------------------------------------
- */
-
-static void esp_basic_init(struct esp_struct *info)
-{
-	/* put ESPC in enhanced mode */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_MODE);
-
-	if (info->stat_flags & ESP_STAT_NEVER_DMA)
-		serial_out(info, UART_ESI_CMD2, 0x01);
-	else
-		serial_out(info, UART_ESI_CMD2, 0x31);
-
-	/* disable interrupts for now */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-	serial_out(info, UART_ESI_CMD2, 0x00);
-
-	/* set interrupt and DMA channel */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_IRQ);
-
-	if (info->stat_flags & ESP_STAT_NEVER_DMA)
-		serial_out(info, UART_ESI_CMD2, 0x01);
-	else
-		serial_out(info, UART_ESI_CMD2, (dma << 4) | 0x01);
-
-	serial_out(info, UART_ESI_CMD1, ESI_SET_ENH_IRQ);
-
-	if (info->line % 8)	/* secondary port */
-		serial_out(info, UART_ESI_CMD2, 0x0d);	/* shared */
-	else if (info->irq == 9)
-		serial_out(info, UART_ESI_CMD2, 0x02);
-	else
-		serial_out(info, UART_ESI_CMD2, info->irq);
-
-	/* set error status mask (check this) */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_ERR_MASK);
-
-	if (info->stat_flags & ESP_STAT_NEVER_DMA)
-		serial_out(info, UART_ESI_CMD2, 0xa1);
-	else
-		serial_out(info, UART_ESI_CMD2, 0xbd);
-
-	serial_out(info, UART_ESI_CMD2, 0x00);
-
-	/* set DMA timeout */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_DMA_TMOUT);
-	serial_out(info, UART_ESI_CMD2, 0xff);
-
-	/* set FIFO trigger levels */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_TRIGGER);
-	serial_out(info, UART_ESI_CMD2, info->config.rx_trigger >> 8);
-	serial_out(info, UART_ESI_CMD2, info->config.rx_trigger);
-	serial_out(info, UART_ESI_CMD2, info->config.tx_trigger >> 8);
-	serial_out(info, UART_ESI_CMD2, info->config.tx_trigger);
-
-	/* Set clock scaling and wait states */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_PRESCALAR);
-	serial_out(info, UART_ESI_CMD2, 0x04 | ESPC_SCALE);
-
-	/* set reinterrupt pacing */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_REINTR);
-	serial_out(info, UART_ESI_CMD2, 0xff);
-}
-
-static int startup(struct esp_struct *info)
-{
-	unsigned long flags;
-	int	retval = 0;
-	unsigned int num_chars;
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	if (info->port.flags & ASYNC_INITIALIZED)
-		goto out;
-
-	if (!info->xmit_buf) {
-		info->xmit_buf = (unsigned char *)get_zeroed_page(GFP_ATOMIC);
-		retval = -ENOMEM;
-		if (!info->xmit_buf)
-			goto out;
-	}
-
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "starting up ttys%d (irq %d)...",
-						info->line, info->irq);
-#endif
-
-	/* Flush the RX buffer.  Using the ESI flush command may cause */
-	/* wild interrupts, so read all the data instead. */
-
-	serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-	serial_out(info, UART_ESI_CMD1, ESI_GET_RX_AVAIL);
-	num_chars = serial_in(info, UART_ESI_STAT1) << 8;
-	num_chars |= serial_in(info, UART_ESI_STAT2);
-
-	while (num_chars > 1) {
-		inw(info->io_port + UART_ESI_RX);
-		num_chars -= 2;
-	}
-
-	if (num_chars)
-		serial_in(info, UART_ESI_RX);
-
-	/* set receive character timeout */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_RX_TIMEOUT);
-	serial_out(info, UART_ESI_CMD2, info->config.rx_timeout);
-
-	/* clear all flags except the "never DMA" flag */
-	info->stat_flags &= ESP_STAT_NEVER_DMA;
-
-	if (info->stat_flags & ESP_STAT_NEVER_DMA)
-		info->stat_flags |= ESP_STAT_USE_PIO;
-
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	/*
-	 * Allocate the IRQ
-	 */
-
-	retval = request_irq(info->irq, rs_interrupt_single, IRQF_SHARED,
-			     "esp serial", info);
-
-	if (retval) {
-		if (capable(CAP_SYS_ADMIN)) {
-			if (info->port.tty)
-				set_bit(TTY_IO_ERROR,
-					&info->port.tty->flags);
-			retval = 0;
-		}
-		goto out_unlocked;
-	}
-
-	if (!(info->stat_flags & ESP_STAT_USE_PIO) && !dma_buffer) {
-		dma_buffer = (char *)__get_dma_pages(
-			GFP_KERNEL, get_order(DMA_BUFFER_SZ));
-
-		/* use PIO mode if DMA buf/chan cannot be allocated */
-		if (!dma_buffer)
-			info->stat_flags |= ESP_STAT_USE_PIO;
-		else if (request_dma(dma, "esp serial")) {
-			free_pages((unsigned long)dma_buffer,
-				   get_order(DMA_BUFFER_SZ));
-			dma_buffer = NULL;
-			info->stat_flags |= ESP_STAT_USE_PIO;
-		}
-
-	}
-
-	info->MCR = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2;
-
-	spin_lock_irqsave(&info->lock, flags);
-	serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-	serial_out(info, UART_ESI_CMD2, UART_MCR);
-	serial_out(info, UART_ESI_CMD2, info->MCR);
-
-	/*
-	 * Finally, enable interrupts
-	 */
-	/* info->IER = UART_IER_MSI | UART_IER_RLSI | UART_IER_RDI; */
-	info->IER = UART_IER_RLSI | UART_IER_RDI | UART_IER_DMA_TMOUT |
-			UART_IER_DMA_TC;
-	serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-	serial_out(info, UART_ESI_CMD2, info->IER);
-
-	if (info->port.tty)
-		clear_bit(TTY_IO_ERROR, &info->port.tty->flags);
-	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	/*
-	 * Set up the tty->alt_speed kludge
-	 */
-	if (info->port.tty) {
-		if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
-			info->port.tty->alt_speed = 57600;
-		if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
-			info->port.tty->alt_speed = 115200;
-		if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
-			info->port.tty->alt_speed = 230400;
-		if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
-			info->port.tty->alt_speed = 460800;
-	}
-
-	/*
-	 * set the speed of the serial port
-	 */
-	change_speed(info);
-	info->port.flags |= ASYNC_INITIALIZED;
-	return 0;
-
-out:
-	spin_unlock_irqrestore(&info->lock, flags);
-out_unlocked:
-	return retval;
-}
-
-/*
- * This routine will shutdown a serial port; interrupts are disabled, and
- * DTR is dropped if the hangup on close termio flag is on.
- */
-static void shutdown(struct esp_struct *info)
-{
-	unsigned long	flags, f;
-
-	if (!(info->port.flags & ASYNC_INITIALIZED))
-		return;
-
-#ifdef SERIAL_DEBUG_OPEN
-	printk("Shutting down serial port %d (irq %d)....", info->line,
-	       info->irq);
-#endif
-
-	spin_lock_irqsave(&info->lock, flags);
-	/*
-	 * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
-	 * here so the queue might never be waken up
-	 */
-	wake_up_interruptible(&info->port.delta_msr_wait);
-	wake_up_interruptible(&info->break_wait);
-
-	/* stop a DMA transfer on the port being closed */
-	/* DMA lock is higher priority always */
-	if (info->stat_flags & (ESP_STAT_DMA_RX | ESP_STAT_DMA_TX)) {
-		f = claim_dma_lock();
-		disable_dma(dma);
-		clear_dma_ff(dma);
-		release_dma_lock(f);
-
-		dma_bytes = 0;
-	}
-
-	/*
-	 * Free the IRQ
-	 */
-	free_irq(info->irq, info);
-
-	if (dma_buffer) {
-		struct esp_struct *current_port = ports;
-
-		while (current_port) {
-			if ((current_port != info) &&
-			    (current_port->port.flags & ASYNC_INITIALIZED))
-				break;
-
-			current_port = current_port->next_port;
-		}
-
-		if (!current_port) {
-			free_dma(dma);
-			free_pages((unsigned long)dma_buffer,
-				   get_order(DMA_BUFFER_SZ));
-			dma_buffer = NULL;
-		}
-	}
-
-	if (info->xmit_buf) {
-		free_page((unsigned long) info->xmit_buf);
-		info->xmit_buf = NULL;
-	}
-
-	info->IER = 0;
-	serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-	serial_out(info, UART_ESI_CMD2, 0x00);
-
-	if (!info->port.tty || (info->port.tty->termios->c_cflag & HUPCL))
-		info->MCR &= ~(UART_MCR_DTR|UART_MCR_RTS);
-
-	info->MCR &= ~UART_MCR_OUT2;
-	serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-	serial_out(info, UART_ESI_CMD2, UART_MCR);
-	serial_out(info, UART_ESI_CMD2, info->MCR);
-
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
-
-	info->port.flags &= ~ASYNC_INITIALIZED;
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-/*
- * This routine is called to set the UART divisor registers to match
- * the specified baud rate for a serial port.
- */
-static void change_speed(struct esp_struct *info)
-{
-	unsigned short port;
-	int	quot = 0;
-	unsigned cflag, cval;
-	int	baud, bits;
-	unsigned char flow1 = 0, flow2 = 0;
-	unsigned long flags;
-
-	if (!info->port.tty || !info->port.tty->termios)
-		return;
-	cflag = info->port.tty->termios->c_cflag;
-	port = info->io_port;
-
-	/* byte size and parity */
-	switch (cflag & CSIZE) {
-	case CS5: cval = 0x00; bits = 7; break;
-	case CS6: cval = 0x01; bits = 8; break;
-	case CS7: cval = 0x02; bits = 9; break;
-	case CS8: cval = 0x03; bits = 10; break;
-	default:  cval = 0x00; bits = 7; break;
-	}
-	if (cflag & CSTOPB) {
-		cval |= 0x04;
-		bits++;
-	}
-	if (cflag & PARENB) {
-		cval |= UART_LCR_PARITY;
-		bits++;
-	}
-	if (!(cflag & PARODD))
-		cval |= UART_LCR_EPAR;
-#ifdef CMSPAR
-	if (cflag & CMSPAR)
-		cval |= UART_LCR_SPAR;
-#endif
-	baud = tty_get_baud_rate(info->port.tty);
-	if (baud == 38400 &&
-		((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST))
-		quot = info->custom_divisor;
-	else {
-		if (baud == 134) /* Special case since 134 is really 134.5 */
-			quot = (2*BASE_BAUD / 269);
-		else if (baud)
-			quot = BASE_BAUD / baud;
-	}
-	/* If the quotient is ever zero, default to 9600 bps */
-	if (!quot)
-		quot = BASE_BAUD / 9600;
-
-	if (baud) {
-		/* Actual rate */
-		baud = BASE_BAUD/quot;
-		tty_encode_baud_rate(info->port.tty, baud, baud);
-	}
-	info->timeout = ((1024 * HZ * bits * quot) / BASE_BAUD) + (HZ / 50);
-
-	/* CTS flow control flag and modem status interrupts */
-	/* info->IER &= ~UART_IER_MSI; */
-	if (cflag & CRTSCTS) {
-		info->port.flags |= ASYNC_CTS_FLOW;
-		/* info->IER |= UART_IER_MSI; */
-		flow1 = 0x04;
-		flow2 = 0x10;
-	} else
-		info->port.flags &= ~ASYNC_CTS_FLOW;
-	if (cflag & CLOCAL)
-		info->port.flags &= ~ASYNC_CHECK_CD;
-	else
-		info->port.flags |= ASYNC_CHECK_CD;
-
-	/*
-	 * Set up parity check flag
-	 */
-	info->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
-	if (I_INPCK(info->port.tty))
-		info->read_status_mask |= UART_LSR_FE | UART_LSR_PE;
-	if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
-		info->read_status_mask |= UART_LSR_BI;
-
-	info->ignore_status_mask = 0;
-#if 0
-	/* This should be safe, but for some broken bits of hardware... */
-	if (I_IGNPAR(info->port.tty)) {
-		info->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-		info->read_status_mask |= UART_LSR_PE | UART_LSR_FE;
-	}
-#endif
-	if (I_IGNBRK(info->port.tty)) {
-		info->ignore_status_mask |= UART_LSR_BI;
-		info->read_status_mask |= UART_LSR_BI;
-		/*
-		 * If we're ignore parity and break indicators, ignore
-		 * overruns too.  (For real raw support).
-		 */
-		if (I_IGNPAR(info->port.tty)) {
-			info->ignore_status_mask |= UART_LSR_OE | \
-				UART_LSR_PE | UART_LSR_FE;
-			info->read_status_mask |= UART_LSR_OE | \
-				UART_LSR_PE | UART_LSR_FE;
-		}
-	}
-
-	if (I_IXOFF(info->port.tty))
-		flow1 |= 0x81;
-
-	spin_lock_irqsave(&info->lock, flags);
-	/* set baud */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_BAUD);
-	serial_out(info, UART_ESI_CMD2, quot >> 8);
-	serial_out(info, UART_ESI_CMD2, quot & 0xff);
-
-	/* set data bits, parity, etc. */
-	serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-	serial_out(info, UART_ESI_CMD2, UART_LCR);
-	serial_out(info, UART_ESI_CMD2, cval);
-
-	/* Enable flow control */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_FLOW_CNTL);
-	serial_out(info, UART_ESI_CMD2, flow1);
-	serial_out(info, UART_ESI_CMD2, flow2);
-
-	/* set flow control characters (XON/XOFF only) */
-	if (I_IXOFF(info->port.tty)) {
-		serial_out(info, UART_ESI_CMD1, ESI_SET_FLOW_CHARS);
-		serial_out(info, UART_ESI_CMD2, START_CHAR(info->port.tty));
-		serial_out(info, UART_ESI_CMD2, STOP_CHAR(info->port.tty));
-		serial_out(info, UART_ESI_CMD2, 0x10);
-		serial_out(info, UART_ESI_CMD2, 0x21);
-		switch (cflag & CSIZE) {
-		case CS5:
-			serial_out(info, UART_ESI_CMD2, 0x1f);
-			break;
-		case CS6:
-			serial_out(info, UART_ESI_CMD2, 0x3f);
-			break;
-		case CS7:
-		case CS8:
-			serial_out(info, UART_ESI_CMD2, 0x7f);
-			break;
-		default:
-			serial_out(info, UART_ESI_CMD2, 0xff);
-			break;
-		}
-	}
-
-	/* Set high/low water */
-	serial_out(info, UART_ESI_CMD1, ESI_SET_FLOW_LVL);
-	serial_out(info, UART_ESI_CMD2, info->config.flow_off >> 8);
-	serial_out(info, UART_ESI_CMD2, info->config.flow_off);
-	serial_out(info, UART_ESI_CMD2, info->config.flow_on >> 8);
-	serial_out(info, UART_ESI_CMD2, info->config.flow_on);
-
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-static int rs_put_char(struct tty_struct *tty, unsigned char ch)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-	int ret = 0;
-
-	if (serial_paranoia_check(info, tty->name, "rs_put_char"))
-		return 0;
-
-	if (!info->xmit_buf)
-		return 0;
-
-	spin_lock_irqsave(&info->lock, flags);
-	if (info->xmit_cnt < ESP_XMIT_SIZE - 1) {
-		info->xmit_buf[info->xmit_head++] = ch;
-		info->xmit_head &= ESP_XMIT_SIZE-1;
-		info->xmit_cnt++;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-	return ret;
-}
-
-static void rs_flush_chars(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_flush_chars"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	if (info->xmit_cnt <= 0 || tty->stopped || !info->xmit_buf)
-		goto out;
-
-	if (!(info->IER & UART_IER_THRI)) {
-		info->IER |= UART_IER_THRI;
-		serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-		serial_out(info, UART_ESI_CMD2, info->IER);
-	}
-out:
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-static int rs_write(struct tty_struct *tty,
-		    const unsigned char *buf, int count)
-{
-	int	c, t, ret = 0;
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_write"))
-		return 0;
-
-	if (!info->xmit_buf)
-		return 0;
-
-	while (1) {
-		/* Thanks to R. Wolff for suggesting how to do this with */
-		/* interrupts enabled */
-
-		c = count;
-		t = ESP_XMIT_SIZE - info->xmit_cnt - 1;
-
-		if (t < c)
-			c = t;
-
-		t = ESP_XMIT_SIZE - info->xmit_head;
-
-		if (t < c)
-			c = t;
-
-		if (c <= 0)
-			break;
-
-		memcpy(info->xmit_buf + info->xmit_head, buf, c);
-
-		info->xmit_head = (info->xmit_head + c) & (ESP_XMIT_SIZE-1);
-		info->xmit_cnt += c;
-		buf += c;
-		count -= c;
-		ret += c;
-	}
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	if (info->xmit_cnt && !tty->stopped && !(info->IER & UART_IER_THRI)) {
-		info->IER |= UART_IER_THRI;
-		serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-		serial_out(info, UART_ESI_CMD2, info->IER);
-	}
-
-	spin_unlock_irqrestore(&info->lock, flags);
-	return ret;
-}
-
-static int rs_write_room(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	int	ret;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_write_room"))
-		return 0;
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	ret = ESP_XMIT_SIZE - info->xmit_cnt - 1;
-	if (ret < 0)
-		ret = 0;
-	spin_unlock_irqrestore(&info->lock, flags);
-	return ret;
-}
-
-static int rs_chars_in_buffer(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-
-	if (serial_paranoia_check(info, tty->name, "rs_chars_in_buffer"))
-		return 0;
-	return info->xmit_cnt;
-}
-
-static void rs_flush_buffer(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_flush_buffer"))
-		return;
-	spin_lock_irqsave(&info->lock, flags);
-	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
-	spin_unlock_irqrestore(&info->lock, flags);
-	tty_wakeup(tty);
-}
-
-/*
- * ------------------------------------------------------------
- * rs_throttle()
- *
- * This routine is called by the upper-layer tty layer to signal that
- * incoming characters should be throttled.
- * ------------------------------------------------------------
- */
-static void rs_throttle(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-#ifdef SERIAL_DEBUG_THROTTLE
-	char	buf[64];
-
-	printk("throttle %s: %d....\n", tty_name(tty, buf),
-						tty_chars_in_buffer(tty));
-#endif
-
-	if (serial_paranoia_check(info, tty->name, "rs_throttle"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-	info->IER &= ~UART_IER_RDI;
-	serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-	serial_out(info, UART_ESI_CMD2, info->IER);
-	serial_out(info, UART_ESI_CMD1, ESI_SET_RX_TIMEOUT);
-	serial_out(info, UART_ESI_CMD2, 0x00);
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-static void rs_unthrottle(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-#ifdef SERIAL_DEBUG_THROTTLE
-	char	buf[64];
-
-	printk(KERN_DEBUG "unthrottle %s: %d....\n", tty_name(tty, buf),
-	       tty_chars_in_buffer(tty));
-#endif
-
-	if (serial_paranoia_check(info, tty->name, "rs_unthrottle"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-	info->IER |= UART_IER_RDI;
-	serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-	serial_out(info, UART_ESI_CMD2, info->IER);
-	serial_out(info, UART_ESI_CMD1, ESI_SET_RX_TIMEOUT);
-	serial_out(info, UART_ESI_CMD2, info->config.rx_timeout);
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-/*
- * ------------------------------------------------------------
- * rs_ioctl() and friends
- * ------------------------------------------------------------
- */
-
-static int get_serial_info(struct esp_struct *info,
-			   struct serial_struct __user *retinfo)
-{
-	struct serial_struct tmp;
-
-	lock_kernel();
-	memset(&tmp, 0, sizeof(tmp));
-	tmp.type = PORT_16550A;
-	tmp.line = info->line;
-	tmp.port = info->io_port;
-	tmp.irq = info->irq;
-	tmp.flags = info->port.flags;
-	tmp.xmit_fifo_size = 1024;
-	tmp.baud_base = BASE_BAUD;
-	tmp.close_delay = info->close_delay;
-	tmp.closing_wait = info->closing_wait;
-	tmp.custom_divisor = info->custom_divisor;
-	tmp.hub6 = 0;
-	unlock_kernel();
-	if (copy_to_user(retinfo, &tmp, sizeof(*retinfo)))
-		return -EFAULT;
-	return 0;
-}
-
-static int get_esp_config(struct esp_struct *info,
-			  struct hayes_esp_config __user *retinfo)
-{
-	struct hayes_esp_config tmp;
-
-	if (!retinfo)
-		return -EFAULT;
-
-	memset(&tmp, 0, sizeof(tmp));
-	lock_kernel();
-	tmp.rx_timeout = info->config.rx_timeout;
-	tmp.rx_trigger = info->config.rx_trigger;
-	tmp.tx_trigger = info->config.tx_trigger;
-	tmp.flow_off = info->config.flow_off;
-	tmp.flow_on = info->config.flow_on;
-	tmp.pio_threshold = info->config.pio_threshold;
-	tmp.dma_channel = (info->stat_flags & ESP_STAT_NEVER_DMA ? 0 : dma);
-	unlock_kernel();
-
-	return copy_to_user(retinfo, &tmp, sizeof(*retinfo)) ? -EFAULT : 0;
-}
-
-static int set_serial_info(struct esp_struct *info,
-			   struct serial_struct __user *new_info)
-{
-	struct serial_struct new_serial;
-	struct esp_struct old_info;
-	unsigned int change_irq;
-	int retval = 0;
-	struct esp_struct *current_async;
-
-	if (copy_from_user(&new_serial, new_info, sizeof(new_serial)))
-		return -EFAULT;
-	old_info = *info;
-
-	if ((new_serial.type != PORT_16550A) ||
-	    (new_serial.hub6) ||
-	    (info->io_port != new_serial.port) ||
-	    (new_serial.baud_base != BASE_BAUD) ||
-	    (new_serial.irq > 15) ||
-	    (new_serial.irq < 2) ||
-	    (new_serial.irq == 6) ||
-	    (new_serial.irq == 8) ||
-	    (new_serial.irq == 13))
-		return -EINVAL;
-
-	change_irq = new_serial.irq != info->irq;
-
-	if (change_irq && (info->line % 8))
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		if (change_irq ||
-		    (new_serial.close_delay != info->close_delay) ||
-		    ((new_serial.flags & ~ASYNC_USR_MASK) !=
-		     (info->port.flags & ~ASYNC_USR_MASK)))
-			return -EPERM;
-		info->port.flags = ((info->port.flags & ~ASYNC_USR_MASK) |
-			       (new_serial.flags & ASYNC_USR_MASK));
-		info->custom_divisor = new_serial.custom_divisor;
-	} else {
-		if (new_serial.irq == 2)
-			new_serial.irq = 9;
-
-		if (change_irq) {
-			current_async = ports;
-
-			while (current_async) {
-				if ((current_async->line >= info->line) &&
-				    (current_async->line < (info->line + 8))) {
-					if (current_async == info) {
-						if (current_async->port.count > 1)
-							return -EBUSY;
-					} else if (current_async->port.count)
-						return -EBUSY;
-				}
-
-				current_async = current_async->next_port;
-			}
-		}
-
-		/*
-		 * OK, past this point, all the error checking has been done.
-		 * At this point, we start making changes.....
-		 */
-
-		info->port.flags = ((info->port.flags & ~ASYNC_FLAGS) |
-			       (new_serial.flags & ASYNC_FLAGS));
-		info->custom_divisor = new_serial.custom_divisor;
-		info->close_delay = new_serial.close_delay * HZ/100;
-		info->closing_wait = new_serial.closing_wait * HZ/100;
-
-		if (change_irq) {
-			/*
-			 * We need to shutdown the serial port at the old
-			 * port/irq combination.
-			 */
-			shutdown(info);
-
-			current_async = ports;
-
-			while (current_async) {
-				if ((current_async->line >= info->line) &&
-				    (current_async->line < (info->line + 8)))
-					current_async->irq = new_serial.irq;
-
-				current_async = current_async->next_port;
-			}
-
-			serial_out(info, UART_ESI_CMD1, ESI_SET_ENH_IRQ);
-			if (info->irq == 9)
-				serial_out(info, UART_ESI_CMD2, 0x02);
-			else
-				serial_out(info, UART_ESI_CMD2, info->irq);
-		}
-	}
-
-	if (info->port.flags & ASYNC_INITIALIZED) {
-		if (((old_info.port.flags & ASYNC_SPD_MASK) !=
-		     (info->port.flags & ASYNC_SPD_MASK)) ||
-		    (old_info.custom_divisor != info->custom_divisor)) {
-			if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
-				info->port.tty->alt_speed = 57600;
-			if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
-				info->port.tty->alt_speed = 115200;
-			if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
-				info->port.tty->alt_speed = 230400;
-			if ((info->port.flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
-				info->port.tty->alt_speed = 460800;
-			change_speed(info);
-		}
-	} else
-		retval = startup(info);
-
-	return retval;
-}
-
-static int set_esp_config(struct esp_struct *info,
-			  struct hayes_esp_config __user *new_info)
-{
-	struct hayes_esp_config new_config;
-	unsigned int change_dma;
-	int retval = 0;
-	struct esp_struct *current_async;
-	unsigned long flags;
-
-	/* Perhaps a non-sysadmin user should be able to do some of these */
-	/* operations.  I haven't decided yet. */
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (copy_from_user(&new_config, new_info, sizeof(new_config)))
-		return -EFAULT;
-
-	if ((new_config.flow_on >= new_config.flow_off) ||
-	    (new_config.rx_trigger < 1) ||
-	    (new_config.tx_trigger < 1) ||
-	    (new_config.flow_off < 1) ||
-	    (new_config.flow_on < 1) ||
-	    (new_config.rx_trigger > 1023) ||
-	    (new_config.tx_trigger > 1023) ||
-	    (new_config.flow_off > 1023) ||
-	    (new_config.flow_on > 1023) ||
-	    (new_config.pio_threshold < 0) ||
-	    (new_config.pio_threshold > 1024))
-		return -EINVAL;
-
-	if ((new_config.dma_channel != 1) && (new_config.dma_channel != 3))
-		new_config.dma_channel = 0;
-
-	if (info->stat_flags & ESP_STAT_NEVER_DMA)
-		change_dma = new_config.dma_channel;
-	else
-		change_dma = (new_config.dma_channel != dma);
-
-	if (change_dma) {
-		if (new_config.dma_channel) {
-			/* PIO mode to DMA mode transition OR */
-			/* change current DMA channel */
-			current_async = ports;
-
-			while (current_async) {
-				if (current_async == info) {
-					if (current_async->port.count > 1)
-						return -EBUSY;
-				} else if (current_async->port.count)
-					return -EBUSY;
-
-				current_async = current_async->next_port;
-			}
-
-			shutdown(info);
-			dma = new_config.dma_channel;
-			info->stat_flags &= ~ESP_STAT_NEVER_DMA;
-
-			/* all ports must use the same DMA channel */
-
-			spin_lock_irqsave(&info->lock, flags);
-			current_async = ports;
-
-			while (current_async) {
-				esp_basic_init(current_async);
-				current_async = current_async->next_port;
-			}
-			spin_unlock_irqrestore(&info->lock, flags);
-		} else {
-			/* DMA mode to PIO mode only */
-			if (info->port.count > 1)
-				return -EBUSY;
-
-			shutdown(info);
-			spin_lock_irqsave(&info->lock, flags);
-			info->stat_flags |= ESP_STAT_NEVER_DMA;
-			esp_basic_init(info);
-			spin_unlock_irqrestore(&info->lock, flags);
-		}
-	}
-
-	info->config.pio_threshold = new_config.pio_threshold;
-
-	if ((new_config.flow_off != info->config.flow_off) ||
-	    (new_config.flow_on != info->config.flow_on)) {
-		info->config.flow_off = new_config.flow_off;
-		info->config.flow_on = new_config.flow_on;
-
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_SET_FLOW_LVL);
-		serial_out(info, UART_ESI_CMD2, new_config.flow_off >> 8);
-		serial_out(info, UART_ESI_CMD2, new_config.flow_off);
-		serial_out(info, UART_ESI_CMD2, new_config.flow_on >> 8);
-		serial_out(info, UART_ESI_CMD2, new_config.flow_on);
-		spin_unlock_irqrestore(&info->lock, flags);
-	}
-
-	if ((new_config.rx_trigger != info->config.rx_trigger) ||
-	    (new_config.tx_trigger != info->config.tx_trigger)) {
-		info->config.rx_trigger = new_config.rx_trigger;
-		info->config.tx_trigger = new_config.tx_trigger;
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_SET_TRIGGER);
-		serial_out(info, UART_ESI_CMD2,
-			   new_config.rx_trigger >> 8);
-		serial_out(info, UART_ESI_CMD2, new_config.rx_trigger);
-		serial_out(info, UART_ESI_CMD2,
-			   new_config.tx_trigger >> 8);
-		serial_out(info, UART_ESI_CMD2, new_config.tx_trigger);
-		spin_unlock_irqrestore(&info->lock, flags);
-	}
-
-	if (new_config.rx_timeout != info->config.rx_timeout) {
-		info->config.rx_timeout = new_config.rx_timeout;
-		spin_lock_irqsave(&info->lock, flags);
-
-		if (info->IER & UART_IER_RDI) {
-			serial_out(info, UART_ESI_CMD1,
-				   ESI_SET_RX_TIMEOUT);
-			serial_out(info, UART_ESI_CMD2,
-				   new_config.rx_timeout);
-		}
-
-		spin_unlock_irqrestore(&info->lock, flags);
-	}
-
-	if (!(info->port.flags & ASYNC_INITIALIZED))
-		retval = startup(info);
-
-	return retval;
-}
-
-/*
- * get_lsr_info - get line status register info
- *
- * Purpose: Let user call ioctl() to get info when the UART physically
- * 	    is emptied.  On bus types like RS485, the transmitter must
- * 	    release the bus after transmitting. This must be done when
- * 	    the transmit shift register is empty, not be done when the
- * 	    transmit holding register is empty.  This functionality
- * 	    allows an RS485 driver to be written in user space.
- */
-static int get_lsr_info(struct esp_struct *info, unsigned int __user *value)
-{
-	unsigned char status;
-	unsigned int result;
-	unsigned long flags;
-
-	spin_lock_irqsave(&info->lock, flags);
-	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-	status = serial_in(info, UART_ESI_STAT1);
-	spin_unlock_irqrestore(&info->lock, flags);
-	result = ((status & UART_LSR_TEMT) ? TIOCSER_TEMT : 0);
-	return put_user(result, value);
-}
-
-
-static int esp_tiocmget(struct tty_struct *tty, struct file *file)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned char control, status;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, __func__))
-		return -ENODEV;
-	if (tty->flags & (1 << TTY_IO_ERROR))
-		return -EIO;
-
-	control = info->MCR;
-
-	spin_lock_irqsave(&info->lock, flags);
-	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-	status = serial_in(info, UART_ESI_STAT2);
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	return    ((control & UART_MCR_RTS) ? TIOCM_RTS : 0)
-		| ((control & UART_MCR_DTR) ? TIOCM_DTR : 0)
-		| ((status  & UART_MSR_DCD) ? TIOCM_CAR : 0)
-		| ((status  & UART_MSR_RI) ? TIOCM_RNG : 0)
-		| ((status  & UART_MSR_DSR) ? TIOCM_DSR : 0)
-		| ((status  & UART_MSR_CTS) ? TIOCM_CTS : 0);
-}
-
-static int esp_tiocmset(struct tty_struct *tty, struct file *file,
-			unsigned int set, unsigned int clear)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, __func__))
-		return -ENODEV;
-	if (tty->flags & (1 << TTY_IO_ERROR))
-		return -EIO;
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	if (set & TIOCM_RTS)
-		info->MCR |= UART_MCR_RTS;
-	if (set & TIOCM_DTR)
-		info->MCR |= UART_MCR_DTR;
-
-	if (clear & TIOCM_RTS)
-		info->MCR &= ~UART_MCR_RTS;
-	if (clear & TIOCM_DTR)
-		info->MCR &= ~UART_MCR_DTR;
-
-	serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-	serial_out(info, UART_ESI_CMD2, UART_MCR);
-	serial_out(info, UART_ESI_CMD2, info->MCR);
-
-	spin_unlock_irqrestore(&info->lock, flags);
-	return 0;
-}
-
-/*
- * rs_break() --- routine which turns the break handling on or off
- */
-static int esp_break(struct tty_struct *tty, int break_state)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "esp_break"))
-		return -EINVAL;
-
-	if (break_state == -1) {
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_ISSUE_BREAK);
-		serial_out(info, UART_ESI_CMD2, 0x01);
-		spin_unlock_irqrestore(&info->lock, flags);
-
-		/* FIXME - new style wait needed here */
-		interruptible_sleep_on(&info->break_wait);
-	} else {
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_ISSUE_BREAK);
-		serial_out(info, UART_ESI_CMD2, 0x00);
-		spin_unlock_irqrestore(&info->lock, flags);
-	}
-	return 0;
-}
-
-static int rs_ioctl(struct tty_struct *tty, struct file *file,
-		    unsigned int cmd, unsigned long arg)
-{
-	struct esp_struct *info = tty->driver_data;
-	struct async_icount cprev, cnow;	/* kernel counter temps */
-	struct serial_icounter_struct __user *p_cuser;	/* user space */
-	void __user *argp = (void __user *)arg;
-	unsigned long flags;
-	int ret;
-
-	if (serial_paranoia_check(info, tty->name, "rs_ioctl"))
-		return -ENODEV;
-
-	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
-	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGWILD)  &&
-	    (cmd != TIOCSERSWILD) && (cmd != TIOCSERGSTRUCT) &&
-	    (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT) &&
-	    (cmd != TIOCGHAYESESP) && (cmd != TIOCSHAYESESP)) {
-		if (tty->flags & (1 << TTY_IO_ERROR))
-		    return -EIO;
-	}
-
-	switch (cmd) {
-	case TIOCGSERIAL:
-		return get_serial_info(info, argp);
-	case TIOCSSERIAL:
-		lock_kernel();
-		ret = set_serial_info(info, argp);
-		unlock_kernel();
-		return ret;
-	case TIOCSERGWILD:
-		return put_user(0L, (unsigned long __user *)argp);
-	case TIOCSERGETLSR: /* Get line status register */
-		return get_lsr_info(info, argp);
-	case TIOCSERSWILD:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		return 0;
-	/*
-	 * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
-	 * - mask passed in arg for lines of interest
-	 *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
-	 * Caller should use TIOCGICOUNT to see which one it was
-	 */
-	case TIOCMIWAIT:
-		spin_lock_irqsave(&info->lock, flags);
-		cprev = info->icount;	/* note the counters on entry */
-		spin_unlock_irqrestore(&info->lock, flags);
-		while (1) {
-			/* FIXME: convert to new style wakeup */
-			interruptible_sleep_on(&info->port.delta_msr_wait);
-			/* see if a signal did it */
-			if (signal_pending(current))
-				return -ERESTARTSYS;
-			spin_lock_irqsave(&info->lock, flags);
-			cnow = info->icount;	/* atomic copy */
-			spin_unlock_irqrestore(&info->lock, flags);
-			if (cnow.rng == cprev.rng &&
-			    cnow.dsr == cprev.dsr &&
-			    cnow.dcd == cprev.dcd &&
-			    cnow.cts == cprev.cts)
-				return -EIO; /* no change => error */
-			if (((arg & TIOCM_RNG) &&
-			     (cnow.rng != cprev.rng)) ||
-			     ((arg & TIOCM_DSR) &&
-			      (cnow.dsr != cprev.dsr)) ||
-			     ((arg & TIOCM_CD) &&
-			      (cnow.dcd != cprev.dcd)) ||
-			     ((arg & TIOCM_CTS) &&
-			      (cnow.cts != cprev.cts))) {
-				return 0;
-			}
-			cprev = cnow;
-		}
-		/* NOTREACHED */
-	/*
-	 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
-	 * Return: write counters to the user passed counter struct
-	 * NB: both 1->0 and 0->1 transitions are counted except for
-	 *     RI where only 0->1 is counted.
-	 */
-	case TIOCGICOUNT:
-		spin_lock_irqsave(&info->lock, flags);
-		cnow = info->icount;
-		spin_unlock_irqrestore(&info->lock, flags);
-		p_cuser = argp;
-		if (put_user(cnow.cts, &p_cuser->cts) ||
-		    put_user(cnow.dsr, &p_cuser->dsr) ||
-		    put_user(cnow.rng, &p_cuser->rng) ||
-		    put_user(cnow.dcd, &p_cuser->dcd))
-			return -EFAULT;
-			return 0;
-	case TIOCGHAYESESP:
-		return get_esp_config(info, argp);
-	case TIOCSHAYESESP:
-		lock_kernel();
-		ret = set_esp_config(info, argp);
-		unlock_kernel();
-		return ret;
-	default:
-		return -ENOIOCTLCMD;
-	}
-	return 0;
-}
-
-static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	change_speed(info);
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	/* Handle transition to B0 status */
-	if ((old_termios->c_cflag & CBAUD) &&
-		!(tty->termios->c_cflag & CBAUD)) {
-		info->MCR &= ~(UART_MCR_DTR|UART_MCR_RTS);
-		serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-		serial_out(info, UART_ESI_CMD2, UART_MCR);
-		serial_out(info, UART_ESI_CMD2, info->MCR);
-	}
-
-	/* Handle transition away from B0 status */
-	if (!(old_termios->c_cflag & CBAUD) &&
-		(tty->termios->c_cflag & CBAUD)) {
-		info->MCR |= (UART_MCR_DTR | UART_MCR_RTS);
-		serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-		serial_out(info, UART_ESI_CMD2, UART_MCR);
-		serial_out(info, UART_ESI_CMD2, info->MCR);
-	}
-
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	/* Handle turning of CRTSCTS */
-	if ((old_termios->c_cflag & CRTSCTS) &&
-	    !(tty->termios->c_cflag & CRTSCTS)) {
-		rs_start(tty);
-	}
-}
-
-/*
- * ------------------------------------------------------------
- * rs_close()
- *
- * This routine is called when the serial port gets closed.  First, we
- * wait for the last remaining data to be sent.  Then, we unlink its
- * async structure from the interrupt chain if necessary, and we free
- * that IRQ if nothing is left in the chain.
- * ------------------------------------------------------------
- */
-static void rs_close(struct tty_struct *tty, struct file *filp)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long flags;
-
-	if (!info || serial_paranoia_check(info, tty->name, "rs_close"))
-		return;
-
-	spin_lock_irqsave(&info->lock, flags);
-
-	if (tty_hung_up_p(filp)) {
-		DBG_CNT("before DEC-hung");
-		goto out;
-	}
-
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "rs_close ttys%d, count = %d\n",
-						info->line, info->port.count);
-#endif
-	if (tty->count == 1 && info->port.count != 1) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_DEBUG "rs_close: bad serial port count; tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	if (--info->port.count < 0) {
-		printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
-		       info->line, info->port.count);
-		info->port.count = 0;
-	}
-	if (info->port.count) {
-		DBG_CNT("before DEC-2");
-		goto out;
-	}
-	info->port.flags |= ASYNC_CLOSING;
-
-	spin_unlock_irqrestore(&info->lock, flags);
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->closing_wait);
-	/*
-	 * At this point we stop accepting input.  To do this, we
-	 * disable the receive line status interrupts, and tell the
-	 * interrupt driver to stop checking the data ready bit in the
-	 * line status register.
-	 */
-	/* info->IER &= ~UART_IER_RLSI; */
-	info->IER &= ~UART_IER_RDI;
-	info->read_status_mask &= ~UART_LSR_DR;
-	if (info->port.flags & ASYNC_INITIALIZED) {
-
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_SET_SRV_MASK);
-		serial_out(info, UART_ESI_CMD2, info->IER);
-
-		/* disable receive timeout */
-		serial_out(info, UART_ESI_CMD1, ESI_SET_RX_TIMEOUT);
-		serial_out(info, UART_ESI_CMD2, 0x00);
-
-		spin_unlock_irqrestore(&info->lock, flags);
-
-		/*
-		 * Before we drop DTR, make sure the UART transmitter
-		 * has completely drained; this is especially
-		 * important if there is a transmit FIFO!
-		 */
-		rs_wait_until_sent(tty, info->timeout);
-	}
-	shutdown(info);
-	rs_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-	tty->closing = 0;
-	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->close_delay)
-			msleep_interruptible(jiffies_to_msecs(info->close_delay));
-		wake_up_interruptible(&info->port.open_wait);
-	}
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&info->port.close_wait);
-	return;
-
-out:
-	spin_unlock_irqrestore(&info->lock, flags);
-}
-
-static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-	struct esp_struct *info = tty->driver_data;
-	unsigned long orig_jiffies, char_time;
-	unsigned long flags;
-
-	if (serial_paranoia_check(info, tty->name, "rs_wait_until_sent"))
-		return;
-
-	orig_jiffies = jiffies;
-	char_time = ((info->timeout - HZ / 50) / 1024) / 5;
-
-	if (!char_time)
-		char_time = 1;
-
-	spin_lock_irqsave(&info->lock, flags);
-	serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-	serial_out(info, UART_ESI_CMD1, ESI_GET_TX_AVAIL);
-
-	while ((serial_in(info, UART_ESI_STAT1) != 0x03) ||
-		(serial_in(info, UART_ESI_STAT2) != 0xff)) {
-
-		spin_unlock_irqrestore(&info->lock, flags);
-		msleep_interruptible(jiffies_to_msecs(char_time));
-
-		if (signal_pending(current))
-			return;
-
-		if (timeout && time_after(jiffies, orig_jiffies + timeout))
-			return;
-
-		spin_lock_irqsave(&info->lock, flags);
-		serial_out(info, UART_ESI_CMD1, ESI_NO_COMMAND);
-		serial_out(info, UART_ESI_CMD1, ESI_GET_TX_AVAIL);
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-	set_current_state(TASK_RUNNING);
-}
-
-/*
- * esp_hangup() --- called by tty_hangup() when a hangup is signaled.
- */
-static void esp_hangup(struct tty_struct *tty)
-{
-	struct esp_struct *info = tty->driver_data;
-
-	if (serial_paranoia_check(info, tty->name, "esp_hangup"))
-		return;
-
-	rs_flush_buffer(tty);
-	shutdown(info);
-	info->port.count = 0;
-	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	info->port.tty = NULL;
-	wake_up_interruptible(&info->port.open_wait);
-}
-
-static int esp_carrier_raised(struct tty_port *port)
-{
-	struct esp_struct *info = container_of(port, struct esp_struct, port);
-	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-	if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
-		return 1;
-	return 0;
-}
-
-/*
- * ------------------------------------------------------------
- * esp_open() and friends
- * ------------------------------------------------------------
- */
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct esp_struct *info)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int		retval;
-	int		do_clocal = 0;
-	unsigned long	flags;
-	int		cd;
-	struct tty_port *port = &info->port;
-
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp) ||
-	    (port->flags & ASYNC_CLOSING)) {
-		if (port->flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&port->close_wait);
-#ifdef SERIAL_DO_RESTART
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-#else
-		return -EAGAIN;
-#endif
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->count is dropped by one, so that
-	 * rs_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "block_til_ready before block: ttys%d, count = %d\n",
-	       info->line, port->count);
-#endif
-	spin_lock_irqsave(&info->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			unsigned int scratch;
-
-			serial_out(info, UART_ESI_CMD1, ESI_READ_UART);
-			serial_out(info, UART_ESI_CMD2, UART_MCR);
-			scratch = serial_in(info, UART_ESI_STAT1);
-			serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-			serial_out(info, UART_ESI_CMD2, UART_MCR);
-			serial_out(info, UART_ESI_CMD2,
-				scratch | UART_MCR_DTR | UART_MCR_RTS);
-		}
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-		    !(port->flags & ASYNC_INITIALIZED)) {
-#ifdef SERIAL_DO_RESTART
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-#else
-			retval = -EAGAIN;
-#endif
-			break;
-		}
-
-		cd = tty_port_carrier_raised(port);
-
-		if (!(port->flags & ASYNC_CLOSING) &&
-		    (do_clocal))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-#ifdef SERIAL_DEBUG_OPEN
-		printk(KERN_DEBUG "block_til_ready blocking: ttys%d, count = %d\n",
-		       info->line, port->count);
-#endif
-		spin_unlock_irqrestore(&info->lock, flags);
-		schedule();
-		spin_lock_irqsave(&info->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	spin_unlock_irqrestore(&info->lock, flags);
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "block_til_ready after blocking: ttys%d, count = %d\n",
-	       info->line, port->count);
-#endif
-	if (retval)
-		return retval;
-	port->flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
-}
-
-/*
- * This routine is called whenever a serial port is opened.  It
- * enables interrupts for a serial port, linking in its async structure into
- * the IRQ chain.   It also performs the serial-specific
- * initialization for the tty structure.
- */
-static int esp_open(struct tty_struct *tty, struct file *filp)
-{
-	struct esp_struct	*info;
-	int 			retval, line;
-	unsigned long		flags;
-
-	line = tty->index;
-	if ((line < 0) || (line >= NR_PORTS))
-		return -ENODEV;
-
-	/* find the port in the chain */
-
-	info = ports;
-
-	while (info && (info->line != line))
-		info = info->next_port;
-
-	if (!info) {
-		serial_paranoia_check(info, tty->name, "esp_open");
-		return -ENODEV;
-	}
-
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "esp_open %s, count = %d\n", tty->name, info->port.count);
-#endif
-	spin_lock_irqsave(&info->lock, flags);
-	info->port.count++;
-	tty->driver_data = info;
-	info->port.tty = tty;
-
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	/*
-	 * Start up serial port
-	 */
-	retval = startup(info);
-	if (retval)
-		return retval;
-
-	retval = block_til_ready(tty, filp, info);
-	if (retval) {
-#ifdef SERIAL_DEBUG_OPEN
-		printk(KERN_DEBUG "esp_open returning after block_til_ready with %d\n",
-		       retval);
-#endif
-		return retval;
-	}
-#ifdef SERIAL_DEBUG_OPEN
-	printk(KERN_DEBUG "esp_open %s successful...", tty->name);
-#endif
-	return 0;
-}
-
-/*
- * ---------------------------------------------------------------------
- * espserial_init() and friends
- *
- * espserial_init() is called at boot-time to initialize the serial driver.
- * ---------------------------------------------------------------------
- */
-
-/*
- * This routine prints out the appropriate serial driver version
- * number, and identifies which options were configured into this
- * driver.
- */
-
-static void __init show_serial_version(void)
-{
-	printk(KERN_INFO "%s version %s (DMA %u)\n",
-		serial_name, serial_version, dma);
-}
-
-/*
- * This routine is called by espserial_init() to initialize a specific serial
- * port.
- */
-static int autoconfig(struct esp_struct *info)
-{
-	int port_detected = 0;
-	unsigned long flags;
-
-	if (!request_region(info->io_port, REGION_SIZE, "esp serial"))
-		return -EIO;
-
-	spin_lock_irqsave(&info->lock, flags);
-	/*
-	 * Check for ESP card
-	 */
-
-	if (serial_in(info, UART_ESI_BASE) == 0xf3) {
-		serial_out(info, UART_ESI_CMD1, 0x00);
-		serial_out(info, UART_ESI_CMD1, 0x01);
-
-		if ((serial_in(info, UART_ESI_STAT2) & 0x70) == 0x20) {
-			port_detected = 1;
-
-			if (!(info->irq)) {
-				serial_out(info, UART_ESI_CMD1, 0x02);
-
-				if (serial_in(info, UART_ESI_STAT1) & 0x01)
-					info->irq = 3;
-				else
-					info->irq = 4;
-			}
-
-
-			/* put card in enhanced mode */
-			/* this prevents access through */
-			/* the "old" IO ports */
-			esp_basic_init(info);
-
-			/* clear out MCR */
-			serial_out(info, UART_ESI_CMD1, ESI_WRITE_UART);
-			serial_out(info, UART_ESI_CMD2, UART_MCR);
-			serial_out(info, UART_ESI_CMD2, 0x00);
-		}
-	}
-	if (!port_detected)
-		release_region(info->io_port, REGION_SIZE);
-
-	spin_unlock_irqrestore(&info->lock, flags);
-	return (port_detected);
-}
-
-static const struct tty_operations esp_ops = {
-	.open = esp_open,
-	.close = rs_close,
-	.write = rs_write,
-	.put_char = rs_put_char,
-	.flush_chars = rs_flush_chars,
-	.write_room = rs_write_room,
-	.chars_in_buffer = rs_chars_in_buffer,
-	.flush_buffer = rs_flush_buffer,
-	.ioctl = rs_ioctl,
-	.throttle = rs_throttle,
-	.unthrottle = rs_unthrottle,
-	.set_termios = rs_set_termios,
-	.stop = rs_stop,
-	.start = rs_start,
-	.hangup = esp_hangup,
-	.break_ctl = esp_break,
-	.wait_until_sent = rs_wait_until_sent,
-	.tiocmget = esp_tiocmget,
-	.tiocmset = esp_tiocmset,
-};
-
-static const struct tty_port_operations esp_port_ops = {
-	.esp_carrier_raised,
-};
-
-/*
- * The serial driver boot-time initialization code!
- */
-static int __init espserial_init(void)
-{
-	int i, offset;
-	struct esp_struct *info;
-	struct esp_struct *last_primary = NULL;
-	int esp[] = { 0x100, 0x140, 0x180, 0x200, 0x240, 0x280, 0x300, 0x380 };
-
-	esp_driver = alloc_tty_driver(NR_PORTS);
-	if (!esp_driver)
-		return -ENOMEM;
-
-	for (i = 0; i < NR_PRIMARY; i++) {
-		if (irq[i] != 0) {
-			if ((irq[i] < 2) || (irq[i] > 15) || (irq[i] == 6) ||
-			    (irq[i] == 8) || (irq[i] == 13))
-				irq[i] = 0;
-			else if (irq[i] == 2)
-				irq[i] = 9;
-		}
-	}
-
-	if ((dma != 1) && (dma != 3))
-		dma = 0;
-
-	if ((rx_trigger < 1) || (rx_trigger > 1023))
-		rx_trigger = 768;
-
-	if ((tx_trigger < 1) || (tx_trigger > 1023))
-		tx_trigger = 768;
-
-	if ((flow_off < 1) || (flow_off > 1023))
-		flow_off = 1016;
-
-	if ((flow_on < 1) || (flow_on > 1023))
-		flow_on = 944;
-
-	if ((rx_timeout < 0) || (rx_timeout > 255))
-		rx_timeout = 128;
-
-	if (flow_on >= flow_off)
-		flow_on = flow_off - 1;
-
-	show_serial_version();
-
-	/* Initialize the tty_driver structure */
-
-	esp_driver->owner = THIS_MODULE;
-	esp_driver->name = "ttyP";
-	esp_driver->major = ESP_IN_MAJOR;
-	esp_driver->minor_start = 0;
-	esp_driver->type = TTY_DRIVER_TYPE_SERIAL;
-	esp_driver->subtype = SERIAL_TYPE_NORMAL;
-	esp_driver->init_termios = tty_std_termios;
-	esp_driver->init_termios.c_cflag =
-		B9600 | CS8 | CREAD | HUPCL | CLOCAL;
-	esp_driver->init_termios.c_ispeed = 9600;
-	esp_driver->init_termios.c_ospeed = 9600;
-	esp_driver->flags = TTY_DRIVER_REAL_RAW;
-	tty_set_operations(esp_driver, &esp_ops);
-	if (tty_register_driver(esp_driver)) {
-		printk(KERN_ERR "Couldn't register esp serial driver");
-		put_tty_driver(esp_driver);
-		return 1;
-	}
-
-	info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL);
-
-	if (!info) {
-		printk(KERN_ERR "Couldn't allocate memory for esp serial device information\n");
-		tty_unregister_driver(esp_driver);
-		put_tty_driver(esp_driver);
-		return 1;
-	}
-
-	spin_lock_init(&info->lock);
-	/* rx_trigger, tx_trigger are needed by autoconfig */
-	info->config.rx_trigger = rx_trigger;
-	info->config.tx_trigger = tx_trigger;
-
-	i = 0;
-	offset = 0;
-
-	do {
-		tty_port_init(&info->port);
-		info->port.ops = &esp_port_ops;
-		info->io_port = esp[i] + offset;
-		info->irq = irq[i];
-		info->line = (i * 8) + (offset / 8);
-
-		if (!autoconfig(info)) {
-			i++;
-			offset = 0;
-			continue;
-		}
-
-		info->custom_divisor = (divisor[i] >> (offset / 2)) & 0xf;
-		info->port.flags = STD_COM_FLAGS;
-		if (info->custom_divisor)
-			info->port.flags |= ASYNC_SPD_CUST;
-		info->magic = ESP_MAGIC;
-		info->close_delay = 5*HZ/10;
-		info->closing_wait = 30*HZ;
-		info->config.rx_timeout = rx_timeout;
-		info->config.flow_on = flow_on;
-		info->config.flow_off = flow_off;
-		info->config.pio_threshold = pio_threshold;
-		info->next_port = ports;
-		init_waitqueue_head(&info->break_wait);
-		ports = info;
-		printk(KERN_INFO "ttyP%d at 0x%04x (irq = %d) is an ESP ",
-			info->line, info->io_port, info->irq);
-
-		if (info->line % 8) {
-			printk("secondary port\n");
-			/* 8 port cards can't do DMA */
-			info->stat_flags |= ESP_STAT_NEVER_DMA;
-
-			if (last_primary)
-				last_primary->stat_flags |= ESP_STAT_NEVER_DMA;
-		} else {
-			printk("primary port\n");
-			last_primary = info;
-			irq[i] = info->irq;
-		}
-
-		if (!dma)
-			info->stat_flags |= ESP_STAT_NEVER_DMA;
-
-		info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL);
-		if (!info) {
-			printk(KERN_ERR "Couldn't allocate memory for esp serial device information\n");
-			/* allow use of the already detected ports */
-			return 0;
-		}
-
-		spin_lock_init(&info->lock);
-		/* rx_trigger, tx_trigger are needed by autoconfig */
-		info->config.rx_trigger = rx_trigger;
-		info->config.tx_trigger = tx_trigger;
-
-		if (offset == 56) {
-			i++;
-			offset = 0;
-		} else {
-			offset += 8;
-		}
-	} while (i < NR_PRIMARY);
-
-	/* free the last port memory allocation */
-	kfree(info);
-
-	return 0;
-}
-
-static void __exit espserial_exit(void)
-{
-	int e1;
-	struct esp_struct *temp_async;
-	struct esp_pio_buffer *pio_buf;
-
-	e1 = tty_unregister_driver(esp_driver);
-	if (e1)
-		printk(KERN_ERR "esp: failed to unregister driver (%d)\n", e1);
-	put_tty_driver(esp_driver);
-
-	while (ports) {
-		if (ports->io_port)
-			release_region(ports->io_port, REGION_SIZE);
-		temp_async = ports->next_port;
-		kfree(ports);
-		ports = temp_async;
-	}
-
-	if (dma_buffer)
-		free_pages((unsigned long)dma_buffer,
-			get_order(DMA_BUFFER_SZ));
-
-	while (free_pio_buf) {
-		pio_buf = free_pio_buf->next;
-		kfree(free_pio_buf);
-		free_pio_buf = pio_buf;
-	}
-}
-
-module_init(espserial_init);
-module_exit(espserial_exit);
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 5a5385749e16..f72914db2a11 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -214,7 +214,6 @@ unifdef-y += futex.h
 unifdef-y += fs.h
 unifdef-y += gameport.h
 unifdef-y += generic_serial.h
-unifdef-y += hayesesp.h
 unifdef-y += hdlcdrv.h
 unifdef-y += hdlc.h
 unifdef-y += hdreg.h
diff --git a/include/linux/hayesesp.h b/include/linux/hayesesp.h
deleted file mode 100644
index 92b08cfe4a75..000000000000
--- a/include/linux/hayesesp.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef HAYESESP_H
-#define HAYESESP_H
-
-struct hayes_esp_config {
-	short flow_on;
-	short flow_off;
-	short rx_trigger;
-	short tx_trigger;
-	short pio_threshold;
-	unsigned char rx_timeout;
-	char dma_channel;
-};
-
-#ifdef __KERNEL__
-
-#define ESP_DMA_CHANNEL   0
-#define ESP_RX_TRIGGER    768
-#define ESP_TX_TRIGGER    768
-#define ESP_FLOW_OFF      1016
-#define ESP_FLOW_ON       944
-#define ESP_RX_TMOUT      128
-#define ESP_PIO_THRESHOLD 32
-
-#define ESP_IN_MAJOR	57	/* major dev # for dial in */
-#define ESP_OUT_MAJOR	58	/* major dev # for dial out */
-#define ESPC_SCALE 	3
-#define UART_ESI_BASE	0x00
-#define UART_ESI_SID	0x01
-#define UART_ESI_RX	0x02
-#define UART_ESI_TX	0x02
-#define UART_ESI_CMD1	0x04
-#define UART_ESI_CMD2	0x05
-#define UART_ESI_STAT1	0x04
-#define UART_ESI_STAT2	0x05
-#define UART_ESI_RWS	0x07
-
-#define UART_IER_DMA_TMOUT	0x80
-#define UART_IER_DMA_TC		0x08
-
-#define ESI_SET_IRQ		0x04
-#define ESI_SET_DMA_TMOUT	0x05
-#define ESI_SET_SRV_MASK	0x06
-#define ESI_SET_ERR_MASK	0x07
-#define ESI_SET_FLOW_CNTL	0x08
-#define ESI_SET_FLOW_CHARS	0x09
-#define ESI_SET_FLOW_LVL	0x0a
-#define ESI_SET_TRIGGER		0x0b
-#define ESI_SET_RX_TIMEOUT	0x0c
-#define ESI_SET_FLOW_TMOUT	0x0d
-#define ESI_WRITE_UART		0x0e
-#define ESI_READ_UART		0x0f
-#define ESI_SET_MODE		0x10
-#define ESI_GET_ERR_STAT	0x12
-#define ESI_GET_UART_STAT	0x13
-#define ESI_GET_RX_AVAIL	0x14
-#define ESI_GET_TX_AVAIL	0x15
-#define ESI_START_DMA_RX	0x16
-#define ESI_START_DMA_TX	0x17
-#define ESI_ISSUE_BREAK		0x1a
-#define ESI_FLUSH_RX		0x1b
-#define ESI_FLUSH_TX		0x1c
-#define ESI_SET_BAUD		0x1d
-#define ESI_SET_ENH_IRQ		0x1f
-#define ESI_SET_REINTR		0x20
-#define ESI_SET_PRESCALAR	0x23
-#define ESI_NO_COMMAND		0xff
-
-#define ESP_STAT_RX_TIMEOUT	0x01
-#define ESP_STAT_DMA_RX		0x02
-#define ESP_STAT_DMA_TX		0x04
-#define ESP_STAT_NEVER_DMA      0x08
-#define ESP_STAT_USE_PIO        0x10
-
-#define ESP_MAGIC		0x53ee
-#define ESP_XMIT_SIZE		4096
-
-struct esp_struct {
-	int			magic;
-	struct tty_port		port;
-	spinlock_t		lock;
-	int			io_port;
-	int			irq;
-	int			read_status_mask;
-	int			ignore_status_mask;
-	int			timeout;
-	int			stat_flags;
-	int			custom_divisor;
-	int			close_delay;
-	unsigned short		closing_wait;
-	unsigned short		closing_wait2;
-	int			IER; 	/* Interrupt Enable Register */
-	int			MCR; 	/* Modem control register */
-	unsigned long		last_active;
-	int			line;
-	unsigned char 		*xmit_buf;
-	int			xmit_head;
-	int			xmit_tail;
-	int			xmit_cnt;
-	wait_queue_head_t	break_wait;
-	struct async_icount	icount;	/* kernel counters for the 4 input interrupts */
-	struct hayes_esp_config config; /* port configuration */
-	struct esp_struct	*next_port; /* For the linked list */
-};
-
-struct esp_pio_buffer {
-	unsigned char data[1024];
-	struct esp_pio_buffer *next;
-};
-
-#endif /* __KERNEL__ */
-
-
-#endif /* ESP_H */
-
-- 
cgit v1.2.3


From 64bc397914265a9ead8d73b63bb31ab3bdd25f67 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 6 Oct 2009 16:06:11 +0100
Subject: tty_port: add "tty_port_open" helper

For the moment this just moves the USB logic over and fixes the 'what if
we open and hangup at the same time' race noticed by Oliver Neukum.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c         | 36 +++++++++++++++++++++++++++++-
 drivers/usb/serial/usb-serial.c | 49 +++++++++++++++++------------------------
 include/linux/tty.h             | 10 ++++++++-
 3 files changed, 64 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index c63f3d33914a..b22a61a4fbe5 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -99,10 +99,11 @@ EXPORT_SYMBOL(tty_port_tty_set);
 
 static void tty_port_shutdown(struct tty_port *port)
 {
+	mutex_lock(&port->mutex);
 	if (port->ops->shutdown &&
 		test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags))
 			port->ops->shutdown(port);
-
+	mutex_unlock(&port->mutex);
 }
 
 /**
@@ -381,3 +382,36 @@ void tty_port_close(struct tty_port *port, struct tty_struct *tty,
 	tty_port_tty_set(port, NULL);
 }
 EXPORT_SYMBOL(tty_port_close);
+
+int tty_port_open(struct tty_port *port, struct tty_struct *tty,
+                                                        struct file *filp)
+{
+	spin_lock_irq(&port->lock);
+	if (!tty_hung_up_p(filp))
+		++port->count;
+	spin_unlock_irq(&port->lock);
+	tty_port_tty_set(port, tty);
+
+	/*
+	 * Do the device-specific open only if the hardware isn't
+	 * already initialized. Serialize open and shutdown using the
+	 * port mutex.
+	 */
+
+	mutex_lock(&port->mutex);
+
+	if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
+		if (port->ops->activate) {
+			int retval = port->ops->activate(port, tty);
+			if (retval) {
+        		        mutex_unlock(&port->mutex);
+        			return retval;
+        		}
+                }
+		set_bit(ASYNCB_INITIALIZED, &port->flags);
+	}
+	mutex_unlock(&port->mutex);
+	return tty_port_block_til_ready(port, tty, filp);
+}
+
+EXPORT_SYMBOL(tty_port_open);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index bd3fa7ff15b1..b0649d92251f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -247,41 +247,31 @@ static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
 	return retval;
 }
 
-static int serial_open(struct tty_struct *tty, struct file *filp)
+static int serial_activate(struct tty_port *tport, struct tty_struct *tty)
 {
-	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial_port *port =
+		container_of(tport, struct usb_serial_port, port);
 	struct usb_serial *serial = port->serial;
 	int retval;
 
-	dbg("%s - port %d", __func__, port->number);
-
-	spin_lock_irq(&port->port.lock);
-	if (!tty_hung_up_p(filp))
-		++port->port.count;
-	spin_unlock_irq(&port->port.lock);
-	tty_port_tty_set(&port->port, tty);
+	if (mutex_lock_interruptible(&port->mutex))
+		return -ERESTARTSYS;
+	mutex_lock(&serial->disc_mutex);
+	if (serial->disconnected)
+		retval = -ENODEV;
+	else
+		retval = port->serial->type->open(tty, port);
+	mutex_unlock(&serial->disc_mutex);
+	mutex_unlock(&port->mutex);
+	return retval;
+}
 
-	/* Do the device-specific open only if the hardware isn't
-	 * already initialized.
-	 */
-	if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags)) {
-		if (mutex_lock_interruptible(&port->mutex))
-			return -ERESTARTSYS;
-		mutex_lock(&serial->disc_mutex);
-		if (serial->disconnected)
-			retval = -ENODEV;
-		else
-			retval = port->serial->type->open(tty, port);
-		mutex_unlock(&serial->disc_mutex);
-		mutex_unlock(&port->mutex);
-		if (retval)
-			return retval;
-		set_bit(ASYNCB_INITIALIZED, &port->port.flags);
-	}
+static int serial_open(struct tty_struct *tty, struct file *filp)
+{
+	struct usb_serial_port *port = tty->driver_data;
 
-	/* Now do the correct tty layer semantics */
-	retval = tty_port_block_til_ready(&port->port, tty, filp);
-	return retval;
+	dbg("%s - port %d", __func__, port->number);
+	return tty_port_open(&port->port, tty, filp);
 }
 
 /**
@@ -725,6 +715,7 @@ static void serial_dtr_rts(struct tty_port *port, int on)
 static const struct tty_port_operations serial_port_ops = {
 	.carrier_raised = serial_carrier_raised,
 	.dtr_rts = serial_dtr_rts,
+	.activate = serial_activate,
 };
 
 int usb_serial_probe(struct usb_interface *interface,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f0f43d08d8b8..6352ac257fcb 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -190,9 +190,15 @@ struct tty_port_operations {
 	/* Control the DTR line */
 	void (*dtr_rts)(struct tty_port *port, int raise);
 	/* Called when the last close completes or a hangup finishes
-	   IFF the port was initialized. Do not use to free resources */
+	   IFF the port was initialized. Do not use to free resources. Called
+	   under the port mutex to serialize against activate/shutdowns */
 	void (*shutdown)(struct tty_port *port);
 	void (*drop)(struct tty_port *port);
+	/* Called under the port mutex from tty_port_open, serialized using
+	   the port mutex */
+        /* FIXME: long term getting the tty argument *out* of this would be
+           good for consoles */
+	int (*activate)(struct tty_port *port, struct tty_struct *tty);
 };
 	
 struct tty_port {
@@ -467,6 +473,8 @@ extern int tty_port_close_start(struct tty_port *port,
 extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
+extern int tty_port_open(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
 extern inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
-- 
cgit v1.2.3


From 82fc5943430e3cbf15033ed4186a73f90906345d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 6 Oct 2009 16:06:46 +0100
Subject: usb_serial: Kill port mutex

The tty port has a port mutex used for all the port related locking so we
don't need the one in the USB serial layer any more.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/opticon.c    | 4 ++--
 drivers/usb/serial/usb-serial.c | 1 -
 include/linux/usb/serial.h      | 3 ---
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 80f59b6350cb..c03fdc0242dd 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -501,12 +501,12 @@ static int opticon_resume(struct usb_interface *intf)
 	struct usb_serial_port *port = serial->port[0];
 	int result;
 
-	mutex_lock(&port->mutex);
+	mutex_lock(&port->port.mutex);
 	if (port->port.count)
 		result = usb_submit_urb(priv->bulk_read_urb, GFP_NOIO);
 	else
 		result = 0;
-	mutex_unlock(&port->mutex);
+	mutex_unlock(&port->port.mutex);
 	return result;
 }
 
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 829a46684e1d..4543f359be75 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -897,7 +897,6 @@ int usb_serial_probe(struct usb_interface *interface,
 		spin_lock_init(&port->lock);
 		/* Keep this for private driver use for the moment but
 		   should probably go away */
-		mutex_init(&port->mutex);
 		INIT_WORK(&port->work, usb_serial_port_work);
 		serial->port[i] = port;
 		port->dev.parent = &interface->dev;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index ce911ebf91e8..acf6e457c04b 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -39,8 +39,6 @@ enum port_dev_state {
  * @serial: pointer back to the struct usb_serial owner of this port.
  * @port: pointer to the corresponding tty_port for this port.
  * @lock: spinlock to grab when updating portions of this structure.
- * @mutex: mutex used to synchronize serial_open() and serial_close()
- *	access for this port.
  * @number: the number of the port (the minor number).
  * @interrupt_in_buffer: pointer to the interrupt in buffer for this port.
  * @interrupt_in_urb: pointer to the interrupt in struct urb for this port.
@@ -77,7 +75,6 @@ struct usb_serial_port {
 	struct usb_serial	*serial;
 	struct tty_port		port;
 	spinlock_t		lock;
-	struct mutex            mutex;
 	unsigned char		number;
 
 	unsigned char		*interrupt_in_buffer;
-- 
cgit v1.2.3


From 44e4909e453eaa09c7de409fc9ee4ebefd986c1c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 30 Nov 2009 13:16:41 +0000
Subject: tty: tty_port: Change the buffer allocator locking

We want to be able to do this without regard for the activate/own open
method being used which causes a problem using port->mutex. Add another
mutex for now. Once everything uses port_open to do buffer allocs we can
kill it back off

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c | 9 +++++----
 include/linux/tty.h     | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index dd471d63fae2..3ef644a2e517 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -25,6 +25,7 @@ void tty_port_init(struct tty_port *port)
 	init_waitqueue_head(&port->close_wait);
 	init_waitqueue_head(&port->delta_msr_wait);
 	mutex_init(&port->mutex);
+	mutex_init(&port->buf_mutex);
 	spin_lock_init(&port->lock);
 	port->close_delay = (50 * HZ) / 100;
 	port->closing_wait = (3000 * HZ) / 100;
@@ -34,10 +35,10 @@ EXPORT_SYMBOL(tty_port_init);
 int tty_port_alloc_xmit_buf(struct tty_port *port)
 {
 	/* We may sleep in get_zeroed_page() */
-	mutex_lock(&port->mutex);
+	mutex_lock(&port->buf_mutex);
 	if (port->xmit_buf == NULL)
 		port->xmit_buf = (unsigned char *)get_zeroed_page(GFP_KERNEL);
-	mutex_unlock(&port->mutex);
+	mutex_unlock(&port->buf_mutex);
 	if (port->xmit_buf == NULL)
 		return -ENOMEM;
 	return 0;
@@ -46,12 +47,12 @@ EXPORT_SYMBOL(tty_port_alloc_xmit_buf);
 
 void tty_port_free_xmit_buf(struct tty_port *port)
 {
-	mutex_lock(&port->mutex);
+	mutex_lock(&port->buf_mutex);
 	if (port->xmit_buf != NULL) {
 		free_page((unsigned long)port->xmit_buf);
 		port->xmit_buf = NULL;
 	}
-	mutex_unlock(&port->mutex);
+	mutex_unlock(&port->buf_mutex);
 }
 EXPORT_SYMBOL(tty_port_free_xmit_buf);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6352ac257fcb..e9269ca1542a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -212,6 +212,7 @@ struct tty_port {
 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
 	struct mutex		mutex;		/* Locking */
+	struct mutex		buf_mutex;	/* Buffer alloc lock */
 	unsigned char		*xmit_buf;	/* Optional buffer */
 	unsigned int		close_delay;	/* Close port delay */
 	unsigned int		closing_wait;	/* Delay for output */
-- 
cgit v1.2.3


From 568aafc627e2978509e8a80c640ba534d1e843cc Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 30 Nov 2009 13:17:14 +0000
Subject: tty: tty_port: Add a kref object to the tty port

Users of tty port need a way to refcount ports when hotplugging is
involved.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c | 18 ++++++++++++++++++
 include/linux/tty.h     | 12 ++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 84006de2900f..be492dd66437 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -29,6 +29,7 @@ void tty_port_init(struct tty_port *port)
 	spin_lock_init(&port->lock);
 	port->close_delay = (50 * HZ) / 100;
 	port->closing_wait = (3000 * HZ) / 100;
+	kref_init(&port->kref);
 }
 EXPORT_SYMBOL(tty_port_init);
 
@@ -56,6 +57,23 @@ void tty_port_free_xmit_buf(struct tty_port *port)
 }
 EXPORT_SYMBOL(tty_port_free_xmit_buf);
 
+static void tty_port_destructor(struct kref *kref)
+{
+	struct tty_port *port = container_of(kref, struct tty_port, kref);
+	if (port->xmit_buf)
+		free_page((unsigned long)port->xmit_buf);
+	if (port->ops->destruct)
+		port->ops->destruct(port);
+	else
+		kfree(port);
+}
+
+void tty_port_put(struct tty_port *port)
+{
+	if (port)
+		kref_put(&port->kref, tty_port_destructor);
+}
+EXPORT_SYMBOL(tty_port_put);
 
 /**
  *	tty_port_tty_get	-	get a tty reference
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e9269ca1542a..e6da667ba34d 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -199,6 +199,8 @@ struct tty_port_operations {
         /* FIXME: long term getting the tty argument *out* of this would be
            good for consoles */
 	int (*activate)(struct tty_port *port, struct tty_struct *tty);
+	/* Called on the final put of a port */
+	void (*destruct)(struct tty_port *port);
 };
 	
 struct tty_port {
@@ -219,6 +221,7 @@ struct tty_port {
 	int			drain_delay;	/* Set to zero if no pure time
 						   based drain is needed else
 						   set to size of fifo */
+	struct kref		kref;		/* Ref counter */
 };
 
 /*
@@ -461,6 +464,15 @@ extern int tty_write_lock(struct tty_struct *tty, int ndelay);
 extern void tty_port_init(struct tty_port *port);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
+extern void tty_port_put(struct tty_port *port);
+
+extern inline struct tty_port *tty_port_get(struct tty_port *port)
+{
+	if (port)
+		kref_get(&port->kref);
+	return port;
+}
+
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
-- 
cgit v1.2.3


From 6ed847d8efd08658ece10c9129cd511c8d7452cd Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 30 Nov 2009 13:17:30 +0000
Subject: tty: isicom: sort out the board init logic

Split this into two flags - INIT meaning the board is set up and ACTIVE
meaning the board has ports open. Remove the broken HUPCL casing and push
the counts somewhere sensible.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/isicom.c  | 41 +++++++++++------------------------------
 include/linux/isicom.h |  1 +
 2 files changed, 12 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index e7be3ec6d21c..1e91c302ee42 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -793,21 +793,19 @@ static inline void isicom_setup_board(struct isi_board *bp)
 {
 	int channel;
 	struct isi_port *port;
-	unsigned long flags;
 
-	spin_lock_irqsave(&bp->card_lock, flags);
-	if (bp->status & BOARD_ACTIVE) {
-		spin_unlock_irqrestore(&bp->card_lock, flags);
-		return;
-	}
-	port = bp->ports;
-	bp->status |= BOARD_ACTIVE;
-	for (channel = 0; channel < bp->port_count; channel++, port++)
-		drop_dtr_rts(port);
 	bp->count++;
-	spin_unlock_irqrestore(&bp->card_lock, flags);
+	if (!(bp->status & BOARD_INIT)) {
+		port = bp->ports;
+		for (channel = 0; channel < bp->port_count; channel++, port++)
+			drop_dtr_rts(port);
+	}
+	bp->status |= BOARD_ACTIVE | BOARD_INIT;
 }
 
+/* Activate and thus setup board are protected from races against shutdown
+   by the tty_port mutex */
+
 static int isicom_activate(struct tty_port *tport, struct tty_struct *tty)
 {
 	struct isi_port *port = container_of(tport, struct isi_port, port);
@@ -884,19 +882,10 @@ static int isicom_open(struct tty_struct *tty, struct file *filp)
 
 /* close et all */
 
-static inline void isicom_shutdown_board(struct isi_board *bp)
-{
-	if (bp->status & BOARD_ACTIVE)
-		bp->status &= ~BOARD_ACTIVE;
-}
-
 /* card->lock HAS to be held */
 static void isicom_shutdown_port(struct isi_port *port)
 {
 	struct isi_board *card = port->card;
-	struct tty_struct *tty;
-
-	tty = tty_port_tty_get(&port->port);
 
 	tty_port_free_xmit_buf(&port->port);
 	if (--card->count < 0) {
@@ -904,17 +893,9 @@ static void isicom_shutdown_port(struct isi_port *port)
 			card->base, card->count);
 		card->count = 0;
 	}
-
 	/* last port was closed, shutdown that board too */
-	if (tty && C_HUPCL(tty)) {
-		/* FIXME: this logic is bogus - it's the old logic that was
-		   bogus before but it still wants fixing */
-		if (!card->count) {
-			if (card->status & BOARD_ACTIVE)
-				card->status &= ~BOARD_ACTIVE;
-		}
-	}
-	tty_kref_put(tty);
+	if (!card->count)
+		card->status &= BOARD_ACTIVE;
 }
 
 static void isicom_flush_buffer(struct tty_struct *tty)
diff --git a/include/linux/isicom.h b/include/linux/isicom.h
index bbd42197298f..b92e05650639 100644
--- a/include/linux/isicom.h
+++ b/include/linux/isicom.h
@@ -67,6 +67,7 @@
 
 #define		FIRMWARE_LOADED		0x0001
 #define		BOARD_ACTIVE		0x0002
+#define		BOARD_INIT		0x0004
 
  	/* isi_port status bitmap  */
 
-- 
cgit v1.2.3


From eeb89d918c2fa2b809e464136bbafdaec2aacb30 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 30 Nov 2009 13:18:29 +0000
Subject: tty: push the BKL down into the handlers a bit

Start trying to untangle the remaining BKL mess

Updated to fix missing unlock_kernel noted by Dan Carpenter

Signed-off-by: Alan "I must be out of my tree" Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/pty.c       |   2 +-
 drivers/char/tty_io.c    | 141 +++++++++++++++++++++++++++--------------------
 drivers/char/tty_ldisc.c |  13 +++++
 include/linux/tty.h      |   2 +-
 4 files changed, 95 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index d86c0bc05c1c..385c44b3034f 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -659,7 +659,7 @@ static int __ptmx_open(struct inode *inode, struct file *filp)
 	if (!retval)
 		return 0;
 out1:
-	tty_release_dev(filp);
+	tty_release(inode, filp);
 	return retval;
 out:
 	devpts_kill_index(inode, index);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 59499ee0fe6a..1e2413093615 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -142,7 +142,6 @@ ssize_t redirected_tty_write(struct file *, const char __user *,
 							size_t, loff_t *);
 static unsigned int tty_poll(struct file *, poll_table *);
 static int tty_open(struct inode *, struct file *);
-static int tty_release(struct inode *, struct file *);
 long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 static long tty_compat_ioctl(struct file *file, unsigned int cmd,
@@ -1017,14 +1016,16 @@ out:
 
 void tty_write_message(struct tty_struct *tty, char *msg)
 {
-	lock_kernel();
 	if (tty) {
 		mutex_lock(&tty->atomic_write_lock);
-		if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags))
+		lock_kernel();
+		if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
+			unlock_kernel();
 			tty->ops->write(tty, msg, strlen(msg));
+		} else
+			unlock_kernel();
 		tty_write_unlock(tty);
 	}
-	unlock_kernel();
 	return;
 }
 
@@ -1202,14 +1203,21 @@ static int tty_driver_install_tty(struct tty_driver *driver,
 						struct tty_struct *tty)
 {
 	int idx = tty->index;
+	int ret;
 
-	if (driver->ops->install)
-		return driver->ops->install(driver, tty);
+	if (driver->ops->install) {
+		lock_kernel();
+		ret = driver->ops->install(driver, tty);
+		unlock_kernel();
+		return ret;
+	}
 
 	if (tty_init_termios(tty) == 0) {
+		lock_kernel();
 		tty_driver_kref_get(driver);
 		tty->count++;
 		driver->ttys[idx] = tty;
+		unlock_kernel();
 		return 0;
 	}
 	return -ENOMEM;
@@ -1302,10 +1310,14 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 	struct tty_struct *tty;
 	int retval;
 
+	lock_kernel();
 	/* Check if pty master is being opened multiple times */
 	if (driver->subtype == PTY_TYPE_MASTER &&
-		(driver->flags & TTY_DRIVER_DEVPTS_MEM) && !first_ok)
+		(driver->flags & TTY_DRIVER_DEVPTS_MEM) && !first_ok) {
+		unlock_kernel();
 		return ERR_PTR(-EIO);
+	}
+	unlock_kernel();
 
 	/*
 	 * First time open is complex, especially for PTY devices.
@@ -1335,8 +1347,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 	 * If we fail here just call release_tty to clean up.  No need
 	 * to decrement the use counts, as release_tty doesn't care.
 	 */
-
+	lock_kernel();
 	retval = tty_ldisc_setup(tty, tty->link);
+	unlock_kernel();
 	if (retval)
 		goto release_mem_out;
 	return tty;
@@ -1350,7 +1363,9 @@ release_mem_out:
 	if (printk_ratelimit())
 		printk(KERN_INFO "tty_init_dev: ldisc open failed, "
 				 "clearing slot %d\n", idx);
+	lock_kernel();
 	release_tty(tty, idx);
+	unlock_kernel();
 	return ERR_PTR(retval);
 }
 
@@ -1464,7 +1479,17 @@ static void release_tty(struct tty_struct *tty, int idx)
 	tty_kref_put(tty);
 }
 
-/*
+/**
+ *	tty_release		-	vfs callback for close
+ *	@inode: inode of tty
+ *	@filp: file pointer for handle to tty
+ *
+ *	Called the last time each file handle is closed that references
+ *	this tty. There may however be several such references.
+ *
+ *	Locking:
+ *		Takes bkl. See tty_release_dev
+ *
  * Even releasing the tty structures is a tricky business.. We have
  * to be very careful that the structures are all released at the
  * same time, as interrupts might otherwise get the wrong pointers.
@@ -1472,20 +1497,20 @@ static void release_tty(struct tty_struct *tty, int idx)
  * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
  * lead to double frees or releasing memory still in use.
  */
-void tty_release_dev(struct file *filp)
+
+int tty_release(struct inode *inode, struct file *filp)
 {
 	struct tty_struct *tty, *o_tty;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	devpts;
 	int	idx;
 	char	buf[64];
-	struct 	inode *inode;
 
-	inode = filp->f_path.dentry->d_inode;
 	tty = (struct tty_struct *)filp->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_release_dev"))
-		return;
+		return 0;
 
+	lock_kernel();
 	check_tty_count(tty, "tty_release_dev");
 
 	tty_fasync(-1, filp, 0);
@@ -1500,19 +1525,22 @@ void tty_release_dev(struct file *filp)
 	if (idx < 0 || idx >= tty->driver->num) {
 		printk(KERN_DEBUG "tty_release_dev: bad idx when trying to "
 				  "free (%s)\n", tty->name);
-		return;
+		unlock_kernel();
+		return 0;
 	}
 	if (!devpts) {
 		if (tty != tty->driver->ttys[idx]) {
+			unlock_kernel();
 			printk(KERN_DEBUG "tty_release_dev: driver.table[%d] not tty "
 			       "for (%s)\n", idx, tty->name);
-			return;
+			return 0;
 		}
 		if (tty->termios != tty->driver->termios[idx]) {
+			unlock_kernel();
 			printk(KERN_DEBUG "tty_release_dev: driver.termios[%d] not termios "
 			       "for (%s)\n",
 			       idx, tty->name);
-			return;
+			return 0;
 		}
 	}
 #endif
@@ -1526,26 +1554,30 @@ void tty_release_dev(struct file *filp)
 	if (tty->driver->other &&
 	     !(tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
 		if (o_tty != tty->driver->other->ttys[idx]) {
+			unlock_kernel();
 			printk(KERN_DEBUG "tty_release_dev: other->table[%d] "
 					  "not o_tty for (%s)\n",
 			       idx, tty->name);
-			return;
+			return 0 ;
 		}
 		if (o_tty->termios != tty->driver->other->termios[idx]) {
+			unlock_kernel();
 			printk(KERN_DEBUG "tty_release_dev: other->termios[%d] "
 					  "not o_termios for (%s)\n",
 			       idx, tty->name);
-			return;
+			return 0;
 		}
 		if (o_tty->link != tty) {
+			unlock_kernel();
 			printk(KERN_DEBUG "tty_release_dev: bad pty pointers\n");
-			return;
+			return 0;
 		}
 	}
 #endif
 	if (tty->ops->close)
 		tty->ops->close(tty, filp);
 
+	unlock_kernel();
 	/*
 	 * Sanity check: if tty->count is going to zero, there shouldn't be
 	 * any waiters on tty->read_wait or tty->write_wait.  We test the
@@ -1568,6 +1600,7 @@ void tty_release_dev(struct file *filp)
 		   opens on /dev/tty */
 
 		mutex_lock(&tty_mutex);
+		lock_kernel();
 		tty_closing = tty->count <= 1;
 		o_tty_closing = o_tty &&
 			(o_tty->count <= (pty_master ? 1 : 0));
@@ -1598,6 +1631,7 @@ void tty_release_dev(struct file *filp)
 
 		printk(KERN_WARNING "tty_release_dev: %s: read/write wait queue "
 				    "active!\n", tty_name(tty, buf));
+		unlock_kernel();
 		mutex_unlock(&tty_mutex);
 		schedule();
 	}
@@ -1661,8 +1695,10 @@ void tty_release_dev(struct file *filp)
 	mutex_unlock(&tty_mutex);
 
 	/* check whether both sides are closing ... */
-	if (!tty_closing || (o_tty && !o_tty_closing))
-		return;
+	if (!tty_closing || (o_tty && !o_tty_closing)) {
+		unlock_kernel();
+		return 0;
+	}
 
 #ifdef TTY_DEBUG_HANGUP
 	printk(KERN_DEBUG "freeing tty structure...");
@@ -1680,10 +1716,12 @@ void tty_release_dev(struct file *filp)
 	/* Make this pty number available for reallocation */
 	if (devpts)
 		devpts_kill_index(inode, idx);
+	unlock_kernel();
+	return 0;
 }
 
 /**
- *	__tty_open		-	open a tty device
+ *	tty_open		-	open a tty device
  *	@inode: inode of device file
  *	@filp: file pointer to tty
  *
@@ -1703,7 +1741,7 @@ void tty_release_dev(struct file *filp)
  *		 ->siglock protects ->signal/->sighand
  */
 
-static int __tty_open(struct inode *inode, struct file *filp)
+static int tty_open(struct inode *inode, struct file *filp)
 {
 	struct tty_struct *tty = NULL;
 	int noctty, retval;
@@ -1720,10 +1758,12 @@ retry_open:
 	retval = 0;
 
 	mutex_lock(&tty_mutex);
+	lock_kernel();
 
 	if (device == MKDEV(TTYAUX_MAJOR, 0)) {
 		tty = get_current_tty();
 		if (!tty) {
+			unlock_kernel();
 			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
@@ -1755,12 +1795,14 @@ retry_open:
 				goto got_driver;
 			}
 		}
+		unlock_kernel();
 		mutex_unlock(&tty_mutex);
 		return -ENODEV;
 	}
 
 	driver = get_tty_driver(device, &index);
 	if (!driver) {
+		unlock_kernel();
 		mutex_unlock(&tty_mutex);
 		return -ENODEV;
 	}
@@ -1770,6 +1812,7 @@ got_driver:
 		tty = tty_driver_lookup_tty(driver, inode, index);
 
 		if (IS_ERR(tty)) {
+			unlock_kernel();
 			mutex_unlock(&tty_mutex);
 			return PTR_ERR(tty);
 		}
@@ -1784,8 +1827,10 @@ got_driver:
 
 	mutex_unlock(&tty_mutex);
 	tty_driver_kref_put(driver);
-	if (IS_ERR(tty))
+	if (IS_ERR(tty)) {
+		unlock_kernel();
 		return PTR_ERR(tty);
+	}
 
 	filp->private_data = tty;
 	file_move(filp, &tty->tty_files);
@@ -1813,11 +1858,15 @@ got_driver:
 		printk(KERN_DEBUG "error %d in opening %s...", retval,
 		       tty->name);
 #endif
-		tty_release_dev(filp);
-		if (retval != -ERESTARTSYS)
+		tty_release(inode, filp);
+		if (retval != -ERESTARTSYS) {
+			unlock_kernel();
 			return retval;
-		if (signal_pending(current))
+		}
+		if (signal_pending(current)) {
+			unlock_kernel();
 			return retval;
+		}
 		schedule();
 		/*
 		 * Need to reset f_op in case a hangup happened.
@@ -1826,8 +1875,11 @@ got_driver:
 			filp->f_op = &tty_fops;
 		goto retry_open;
 	}
+	unlock_kernel();
+
 
 	mutex_lock(&tty_mutex);
+	lock_kernel();
 	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
@@ -1835,44 +1887,13 @@ got_driver:
 	    tty->session == NULL)
 		__proc_set_tty(current, tty);
 	spin_unlock_irq(&current->sighand->siglock);
+	unlock_kernel();
 	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
-/* BKL pushdown: scary code avoidance wrapper */
-static int tty_open(struct inode *inode, struct file *filp)
-{
-	int ret;
-
-	lock_kernel();
-	ret = __tty_open(inode, filp);
-	unlock_kernel();
-	return ret;
-}
 
 
-
-
-/**
- *	tty_release		-	vfs callback for close
- *	@inode: inode of tty
- *	@filp: file pointer for handle to tty
- *
- *	Called the last time each file handle is closed that references
- *	this tty. There may however be several such references.
- *
- *	Locking:
- *		Takes bkl. See tty_release_dev
- */
-
-static int tty_release(struct inode *inode, struct file *filp)
-{
-	lock_kernel();
-	tty_release_dev(filp);
-	unlock_kernel();
-	return 0;
-}
-
 /**
  *	tty_poll	-	check tty status
  *	@filp: file being polled
@@ -2317,9 +2338,7 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
 	if (get_user(ldisc, p))
 		return -EFAULT;
 
-	lock_kernel();
 	ret = tty_set_ldisc(tty, ldisc);
-	unlock_kernel();
 
 	return ret;
 }
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index feb55075819b..d914e77f7f01 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -34,6 +34,8 @@
 #include <linux/vt_kern.h>
 #include <linux/selection.h>
 
+#include <linux/smp_lock.h>	/* For the moment */
+
 #include <linux/kmod.h>
 #include <linux/nsproxy.h>
 
@@ -545,6 +547,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	if (IS_ERR(new_ldisc))
 		return PTR_ERR(new_ldisc);
 
+	lock_kernel();
 	/*
 	 *	We need to look at the tty locking here for pty/tty pairs
 	 *	when both sides try to change in parallel.
@@ -558,6 +561,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	 */
 
 	if (tty->ldisc->ops->num == ldisc) {
+		unlock_kernel();
 		tty_ldisc_put(new_ldisc);
 		return 0;
 	}
@@ -569,6 +573,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	tty_wait_until_sent(tty, 0);
 
+	unlock_kernel();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/*
@@ -582,6 +587,9 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
 		mutex_lock(&tty->ldisc_mutex);
 	}
+
+	lock_kernel();
+
 	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 
 	/*
@@ -592,6 +600,8 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	tty->receive_room = 0;
 
 	o_ldisc = tty->ldisc;
+
+	unlock_kernel();
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -617,12 +627,14 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	flush_scheduled_work();
 
 	mutex_lock(&tty->ldisc_mutex);
+	lock_kernel();
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
 		/* We were raced by the hangup method. It will have stomped
 		   the ldisc data and closed the ldisc down */
 		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 		mutex_unlock(&tty->ldisc_mutex);
 		tty_ldisc_put(new_ldisc);
+		unlock_kernel();
 		return -EIO;
 	}
 
@@ -664,6 +676,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	if (o_work)
 		schedule_delayed_work(&o_tty->buf.work, 1);
 	mutex_unlock(&tty->ldisc_mutex);
+	unlock_kernel();
 	return retval;
 }
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e6da667ba34d..405a9035fe40 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -449,7 +449,7 @@ extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 								int first_ok);
-extern void tty_release_dev(struct file *filp);
+extern int tty_release(struct inode *inode, struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
 extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
-- 
cgit v1.2.3


From 967c9ef9b8c3bdec1bd3a380edac19e0b9fbeadc Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 11 Dec 2009 22:00:57 -0800
Subject: Input: i8042 - allow installing platform filters for incoming data

Some hardware (such as Dell laptops) signal a variety of events through
the i8042 controller, even if these don't map to keyboard events. Add
support for drivers to filter the i8042 event stream in order to respond
to these events and (if appropriate) block them from entering the input
stream.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042.c | 58 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/i8042.h       | 18 +++++++++++++-
 2 files changed, 72 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 634da68f7f35..d84a36e545f6 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -126,6 +126,8 @@ static unsigned char i8042_suppress_kbd_ack;
 static struct platform_device *i8042_platform_device;
 
 static irqreturn_t i8042_interrupt(int irq, void *dev_id);
+static bool (*i8042_platform_filter)(unsigned char data, unsigned char str,
+				     struct serio *serio);
 
 void i8042_lock_chip(void)
 {
@@ -139,6 +141,48 @@ void i8042_unlock_chip(void)
 }
 EXPORT_SYMBOL(i8042_unlock_chip);
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	i8042_platform_filter = filter;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_install_filter);
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *port))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter != filter) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	i8042_platform_filter = NULL;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_remove_filter);
+
 /*
  * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
  * be ready for reading values from it / writing values to it.
@@ -373,7 +417,8 @@ static void i8042_stop(struct serio *serio)
  * It is called from i8042_interrupt and thus is running with interrupts
  * off and i8042_lock held.
  */
-static bool i8042_filter(unsigned char data, unsigned char str)
+static bool i8042_filter(unsigned char data, unsigned char str,
+			 struct serio *serio)
 {
 	if (unlikely(i8042_suppress_kbd_ack)) {
 		if ((~str & I8042_STR_AUXDATA) &&
@@ -384,6 +429,11 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 		}
 	}
 
+	if (i8042_platform_filter && i8042_platform_filter(data, str, serio)) {
+		dbg("Filtered out by platfrom filter\n");
+		return true;
+	}
+
 	return false;
 }
 
@@ -396,6 +446,7 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 {
 	struct i8042_port *port;
+	struct serio *serio;
 	unsigned long flags;
 	unsigned char str, data;
 	unsigned int dfl;
@@ -462,18 +513,19 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 	}
 
 	port = &i8042_ports[port_no];
+	serio = port->exists ? port->serio : NULL;
 
 	dbg("%02x <- i8042 (interrupt, %d, %d%s%s)",
 	    data, port_no, irq,
 	    dfl & SERIO_PARITY ? ", bad parity" : "",
 	    dfl & SERIO_TIMEOUT ? ", timeout" : "");
 
-	filtered = i8042_filter(data, str);
+	filtered = i8042_filter(data, str, serio);
 
 	spin_unlock_irqrestore(&i8042_lock, flags);
 
 	if (likely(port->exists && !filtered))
-		serio_interrupt(port->serio, data, dfl);
+		serio_interrupt(serio, data, dfl);
 
  out:
 	return IRQ_RETVAL(ret);
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 60c3360ef6ad..9bf6870ee5f4 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -39,6 +39,10 @@ void i8042_lock_chip(void);
 void i8042_unlock_chip(void);
 int i8042_command(unsigned char *param, int command);
 bool i8042_check_port_owner(const struct serio *);
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio));
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio));
 
 #else
 
@@ -52,7 +56,7 @@ void i8042_unlock_chip(void)
 
 int i8042_command(unsigned char *param, int command)
 {
-	return -ENOSYS;
+	return -ENODEV;
 }
 
 bool i8042_check_port_owner(const struct serio *serio)
@@ -60,6 +64,18 @@ bool i8042_check_port_owner(const struct serio *serio)
 	return false;
 }
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	return -ENODEV;
+}
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio))
+{
+	return -ENODEV;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 01fc0ac198eabcbf460e1ed058860a935b6c2c9a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 19 Apr 2009 21:57:19 +0200
Subject: kbuild: move bounds.h to include/generated

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                 | 1 -
 Kbuild                     | 2 +-
 Makefile                   | 2 +-
 include/linux/mmzone.h     | 2 +-
 include/linux/page-flags.h | 2 +-
 kernel/bounds.c            | 2 +-
 6 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/.gitignore b/.gitignore
index 946c7ec5c922..36d9cd6d4281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,7 +52,6 @@ include/linux/autoconf.h
 include/linux/compile.h
 include/linux/version.h
 include/linux/utsrelease.h
-include/linux/bounds.h
 include/generated
 
 # stgit generated dirs
diff --git a/Kbuild b/Kbuild
index f056b4feee51..1165d7a5ca4a 100644
--- a/Kbuild
+++ b/Kbuild
@@ -8,7 +8,7 @@
 #####
 # 1) Generate bounds.h
 
-bounds-file := include/linux/bounds.h
+bounds-file := include/generated/bounds.h
 
 always  := $(bounds-file)
 targets := $(bounds-file) kernel/bounds.s
diff --git a/Makefile b/Makefile
index 07711786dc95..b58e9312ce30 100644
--- a/Makefile
+++ b/Makefile
@@ -1197,7 +1197,7 @@ MRPROPER_DIRS  += include/config include2 usr/include include/generated
 MRPROPER_FILES += .config .config.old include/asm .version .old_version \
                   include/linux/autoconf.h include/linux/version.h      \
                   include/linux/utsrelease.h                            \
-                  include/linux/bounds.h include/asm*/asm-offsets.h     \
+                  include/asm*/asm-offsets.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f7561730d88..30fe668c2542 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,7 @@
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #include <asm/atomic.h>
 #include <asm/page.h>
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b173955..ef36725aa515 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #ifndef __GENERATING_BOUNDS_H
 #include <linux/mm_types.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #endif /* !__GENERATING_BOUNDS_H */
 
 /*
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
 
 void foo(void)
 {
-	/* The enum constants to put into include/linux/bounds.h */
+	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 	/* End of constants */
-- 
cgit v1.2.3


From 98b8788ae91694499d1995035625bea16a4db0c4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:39:40 +0200
Subject: drop explicit include of autoconf.h

kbuild.h forces include of autoconf.h on the
commandline using -include - so we do not need to
include the file explicit.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/cris/arch-v32/kernel/head.S                | 1 -
 arch/cris/kernel/asm-offsets.c                  | 1 -
 arch/cris/kernel/vmlinux.lds.S                  | 1 -
 arch/ia64/kvm/asm-offsets.c                     | 1 -
 drivers/accessibility/braille/braille_console.c | 1 -
 drivers/hid/hid-lg.h                            | 2 --
 drivers/platform/x86/compal-laptop.c            | 1 -
 drivers/staging/iio/ring_sw.h                   | 1 -
 include/linux/mmdebug.h                         | 2 --
 9 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
index 3db478eb5155..76266f80a5f1 100644
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -10,7 +10,6 @@
  * The macros found in mmu_defs_asm.h uses the ## concatenation operator, so
  * -traditional must not be used when assembling this file.
  */
-#include <linux/autoconf.h>
 #include <arch/memmap.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/intr_vect.h>
diff --git a/arch/cris/kernel/asm-offsets.c b/arch/cris/kernel/asm-offsets.c
index ddd6fbbe75de..dd7b8e983221 100644
--- a/arch/cris/kernel/asm-offsets.c
+++ b/arch/cris/kernel/asm-offsets.c
@@ -1,6 +1,5 @@
 #include <linux/sched.h>
 #include <asm/thread_info.h>
-#include <linux/autoconf.h>
 
 /*
  * Generate definitions needed by assembly language modules.
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index bbfda67d2907..d49d17d2a14f 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -8,7 +8,6 @@
  * the kernel has booted.
  */
 
-#include <linux/autoconf.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 0c3564a7a033..9324c875caf5 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -22,7 +22,6 @@
  *
  */
 
-#include <linux/autoconf.h>
 #include <linux/kvm_host.h>
 #include <linux/kbuild.h>
 
diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index d672cfe7ca59..cb423f5aef24 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -21,7 +21,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/autoconf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h
index 27ae750ca878..bf31592eaf79 100644
--- a/drivers/hid/hid-lg.h
+++ b/drivers/hid/hid-lg.h
@@ -1,8 +1,6 @@
 #ifndef __HID_LG_H
 #define __HID_LG_H
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_LOGITECH_FF
 int lgff_init(struct hid_device *hdev);
 #else
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 11003bba10d3..1a387e79f719 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -51,7 +51,6 @@
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/platform_device.h>
-#include <linux/autoconf.h>
 
 #define COMPAL_DRIVER_VERSION "0.2.6"
 
diff --git a/drivers/staging/iio/ring_sw.h b/drivers/staging/iio/ring_sw.h
index f0b86f02cd80..fd677f008365 100644
--- a/drivers/staging/iio/ring_sw.h
+++ b/drivers/staging/iio/ring_sw.h
@@ -29,7 +29,6 @@
  * driver requests - some may support multiple options */
 
 
-#include <linux/autoconf.h>
 #include "iio.h"
 #include "ring_generic.h"
 
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 8a5509877192..ee24ef8ab616 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,8 +1,6 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #else
-- 
cgit v1.2.3


From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:52:28 +0200
Subject: kbuild: move utsrelease.h to include/generated

Fix up all users of utsrelease.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                                   | 1 -
 Makefile                                     | 5 ++---
 arch/alpha/boot/bootp.c                      | 2 +-
 arch/alpha/boot/bootpz.c                     | 2 +-
 arch/alpha/boot/main.c                       | 2 +-
 arch/frv/kernel/setup.c                      | 2 +-
 arch/powerpc/platforms/52xx/efika.c          | 2 +-
 arch/powerpc/platforms/amigaone/setup.c      | 2 +-
 arch/powerpc/platforms/chrp/setup.c          | 2 +-
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 arch/x86/boot/header.S                       | 2 +-
 arch/x86/boot/version.c                      | 2 +-
 drivers/staging/panel/panel.c                | 2 +-
 include/linux/vermagic.h                     | 2 +-
 init/version.c                               | 2 +-
 kernel/kexec.c                               | 2 +-
 kernel/trace/trace.c                         | 2 +-
 17 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/.gitignore b/.gitignore
index c6c19ea6ea96..002d5304968b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,6 @@ Module.symvers
 #
 include/config
 include/linux/version.h
-include/linux/utsrelease.h
 include/generated
 
 # stgit generated dirs
diff --git a/Makefile b/Makefile
index 3bdd932e3d88..860224d7cbcf 100644
--- a/Makefile
+++ b/Makefile
@@ -968,7 +968,7 @@ endif
 # prepare2 creates a makefile if using a separate output directory
 prepare2: prepare3 outputmakefile
 
-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \
                    include/config/auto.conf
 	$(cmd_crmodverdir)
 
@@ -1005,7 +1005,7 @@ endef
 include/linux/version.h: $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
 
-include/linux/utsrelease.h: include/config/kernel.release FORCE
+include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
 
 PHONY += headerdep
@@ -1151,7 +1151,6 @@ CLEAN_FILES +=	vmlinux System.map \
 MRPROPER_DIRS  += include/config usr/include include/generated
 MRPROPER_FILES += .config .config.old .version .old_version             \
                   include/linux/version.h                               \
-                  include/linux/utsrelease.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index 3af21c789339..3c8d1b25c661 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -9,7 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index 1036b515e20c..ade3f129dc27 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -11,7 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 89f3be071ae5..644b7db55438 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -7,7 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 55e4fab7c0bc..75cf7f4b2fa8 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index bcc69e1f77c1..45c0cb9b67e6 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 9290a7a442d0..fb4eb0df054c 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <asm/machdep.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cd4ad9aea760..0a6f5ab8aab3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -23,7 +23,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/adb.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cf660916ae0b..9dd789a7370d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/page.h>
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
 #include <asm/page_types.h>
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 4d88763e39cb..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,7 +13,7 @@
  */
 
 #include "boot.h"
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <generated/compile.h>
 
 const char kernel_version[] =
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 4ce399b6d237..f98a52448eae 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -55,7 +55,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <linux/io.h>
 #include <asm/uaccess.h>
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 79b9837d9ca0..cf97b5b9d1fe 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,4 @@
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/module.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/init/version.c b/init/version.c
index 82328aaca1ef..adff586401a5 100644
--- a/init/version.c
+++ b/init/version.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/version.h>
 
 #ifndef CONFIG_KALLSYMS
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..83f54e2a6eed 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
 #include <linux/suspend.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..bfb1b64bfa9d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
-- 
cgit v1.2.3


From 7a77080dbec28ab2bceb422398601dcc53c142ad Mon Sep 17 00:00:00 2001
From: Jie Zhang <jie.zhang@analog.com>
Date: Thu, 12 Nov 2009 17:59:56 +0800
Subject: net: add net_tstamp.h to headers_install

include/linux/net_tstamp.h is userspace API for hardware time stamping
of network packets. It should be exported to userspace.

Signed-off-by: Jie Zhang <jie.zhang@analog.com>
Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Patrick Ohly <patrick.ohly@gmx.de>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index f72914db2a11..756f831cbdd5 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,6 +118,7 @@ header-y += mtio.h
 header-y += ncp_no.h
 header-y += neighbour.h
 header-y += net_dropmon.h
+header-y += net_tstamp.h
 header-y += netfilter_arp.h
 header-y += netrom.h
 header-y += nfs2.h
-- 
cgit v1.2.3


From c5df7f775148723de39274537a886e9502eef336 Mon Sep 17 00:00:00 2001
From: Albert Herranz <albert_herranz@yahoo.es>
Date: Sat, 12 Dec 2009 06:31:54 +0000
Subject: powerpc: allow ioremap within reserved memory regions

Add a flag to let a platform ioremap memory regions marked as reserved.

This flag will be used later by the Nintendo Wii support code to allow
ioremapping the I/O region sitting between MEM1 and MEM2 and marked
as reserved RAM in the patch "wii: use both mem1 and mem2 as ram".

This will no longer be needed when proper discontig memory support
for 32-bit PowerPC is added to the kernel.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/mm/init_32.c    | 5 +++++
 arch/powerpc/mm/mmu_decl.h   | 1 +
 arch/powerpc/mm/pgtable_32.c | 4 +++-
 include/linux/lmb.h          | 1 +
 lib/lmb.c                    | 7 ++++++-
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 703c7c2e0d9f..4ec900af332f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -82,6 +82,11 @@ extern struct task_struct *current_set[NR_CPUS];
 int __map_without_bats;
 int __map_without_ltlbs;
 
+/*
+ * This tells the system to allow ioremapping memory marked as reserved.
+ */
+int __allow_ioremap_reserved;
+
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9aa39fe74f8a..34dacc32250d 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -115,6 +115,7 @@ extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 extern void invalidate_tlbcam_entry(int index);
 
 extern int __map_without_bats;
+extern int __allow_ioremap_reserved;
 extern unsigned long ioremap_base;
 extern unsigned int rtas_data, rtas_size;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index b55bbe87acb8..177e4038b43c 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/lmb.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -191,7 +192,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory))) {
+	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
+	    !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) {
 		printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 2442e3f3d033..ef82b8fcbddb 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -54,6 +54,7 @@ extern u64 __init lmb_phys_mem_size(void);
 extern u64 lmb_end_of_DRAM(void);
 extern void __init lmb_enforce_memory_limit(u64 memory_limit);
 extern int __init lmb_is_reserved(u64 addr);
+extern int lmb_is_region_reserved(u64 base, u64 size);
 extern int lmb_find(struct lmb_property *res);
 
 extern void lmb_dump_all(void);
diff --git a/lib/lmb.c b/lib/lmb.c
index 0343c05609f0..9cee17142b2c 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -263,7 +263,7 @@ long __init lmb_reserve(u64 base, u64 size)
 	return lmb_add_region(_rgn, base, size);
 }
 
-long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
+long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
 {
 	unsigned long i;
 
@@ -493,6 +493,11 @@ int __init lmb_is_reserved(u64 addr)
 	return 0;
 }
 
+int lmb_is_region_reserved(u64 base, u64 size)
+{
+	return lmb_overlaps_region(&lmb.reserved, base, size);
+}
+
 /*
  * Given a <base, len>, find which memory regions belong to this range.
  * Adjust the request and return a contiguous chunk.
-- 
cgit v1.2.3


From 8051effcbced8478119167b93b0e9554cb82d28e Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 26 Nov 2009 11:10:05 +0000
Subject: spi: SuperH MSIOF SPI Master driver V2

This patch is V2 of SPI Master support for the SuperH MSIOF.
Full duplex, spi mode 0-3, active high cs, 3-wire and lsb
first should all be supported, but the driver has so far
only been tested with "mmc_spi".

The MSIOF hardware comes with 32-bit FIFOs for receive and
transmit, and this driver simply breaks the SPI messages
into FIFO-sized chunks. The MSIOF hardware manages the pins
for clock, receive and transmit (sck/miso/mosi), but the chip
select pin is managed by software and must be configured as
a regular GPIO pin by the board code.

Performance wise there is still room for improvement, but
on a Ecovec board with the built-in sh7724 MSIOF0 this driver
gets Mini-sd read speeds of about half a megabyte per second.

Future work include better clock setup and merging of 8-bit
transfers into 32-bit words to reduce interrupt load and
improve throughput.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig          |   7 +
 drivers/spi/Makefile         |   1 +
 drivers/spi/spi_sh_msiof.c   | 691 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/sh_msiof.h |  10 +
 4 files changed, 709 insertions(+)
 create mode 100644 drivers/spi/spi_sh_msiof.c
 create mode 100644 include/linux/spi/sh_msiof.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index c00f9e5b9df8..06e1c0c9d35e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -223,6 +223,13 @@ config SPI_S3C24XX_GPIO
 	  the inbuilt hardware cannot provide the transfer mode, or
 	  where the board is using non hardware connected pins.
 
+config SPI_SH_MSIOF
+	tristate "SuperH MSIOF SPI controller"
+	depends on SUPERH && HAVE_CLK
+	select SPI_BITBANG
+	help
+	  SPI driver for SuperH MSIOF blocks.
+
 config SPI_SH_SCI
 	tristate "SuperH SCI SPI controller"
 	depends on SUPERH
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 2f2bebc63b64..9c51e2526892 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_SPI_XILINX)		+= xilinx_spi.o
 obj-$(CONFIG_SPI_XILINX_OF)		+= xilinx_spi_of.o
 obj-$(CONFIG_SPI_XILINX_PLTFM)		+= xilinx_spi_pltfm.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi_sh_sci.o
+obj-$(CONFIG_SPI_SH_MSIOF)		+= spi_sh_msiof.o
 obj-$(CONFIG_SPI_STMP3XXX)		+= spi_stmp.o
 # 	... add above this line ...
 
diff --git a/drivers/spi/spi_sh_msiof.c b/drivers/spi/spi_sh_msiof.c
new file mode 100644
index 000000000000..51e5e1dfa6e5
--- /dev/null
+++ b/drivers/spi/spi_sh_msiof.c
@@ -0,0 +1,691 @@
+/*
+ * SuperH MSIOF SPI Master Interface
+ *
+ * Copyright (c) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/completion.h>
+#include <linux/pm_runtime.h>
+#include <linux/gpio.h>
+#include <linux/bitmap.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+#include <linux/spi/sh_msiof.h>
+
+#include <asm/spi.h>
+#include <asm/unaligned.h>
+
+struct sh_msiof_spi_priv {
+	struct spi_bitbang bitbang; /* must be first for spi_bitbang.c */
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct platform_device *pdev;
+	struct sh_msiof_spi_info *info;
+	struct completion done;
+	unsigned long flags;
+	int tx_fifo_size;
+	int rx_fifo_size;
+};
+
+#define TMDR1	0x00
+#define TMDR2	0x04
+#define TMDR3	0x08
+#define RMDR1	0x10
+#define RMDR2	0x14
+#define RMDR3	0x18
+#define TSCR	0x20
+#define RSCR	0x22
+#define CTR	0x28
+#define FCTR	0x30
+#define STR	0x40
+#define IER	0x44
+#define TDR1	0x48
+#define TDR2	0x4c
+#define TFDR	0x50
+#define RDR1	0x58
+#define RDR2	0x5c
+#define RFDR	0x60
+
+#define CTR_TSCKE (1 << 15)
+#define CTR_TFSE  (1 << 14)
+#define CTR_TXE   (1 << 9)
+#define CTR_RXE   (1 << 8)
+
+#define STR_TEOF  (1 << 23)
+#define STR_REOF  (1 << 7)
+
+static unsigned long sh_msiof_read(struct sh_msiof_spi_priv *p, int reg_offs)
+{
+	switch (reg_offs) {
+	case TSCR:
+	case RSCR:
+		return ioread16(p->mapbase + reg_offs);
+	default:
+		return ioread32(p->mapbase + reg_offs);
+	}
+}
+
+static void sh_msiof_write(struct sh_msiof_spi_priv *p, int reg_offs,
+			   unsigned long value)
+{
+	switch (reg_offs) {
+	case TSCR:
+	case RSCR:
+		iowrite16(value, p->mapbase + reg_offs);
+		break;
+	default:
+		iowrite32(value, p->mapbase + reg_offs);
+		break;
+	}
+}
+
+static int sh_msiof_modify_ctr_wait(struct sh_msiof_spi_priv *p,
+				    unsigned long clr, unsigned long set)
+{
+	unsigned long mask = clr | set;
+	unsigned long data;
+	int k;
+
+	data = sh_msiof_read(p, CTR);
+	data &= ~clr;
+	data |= set;
+	sh_msiof_write(p, CTR, data);
+
+	for (k = 100; k > 0; k--) {
+		if ((sh_msiof_read(p, CTR) & mask) == set)
+			break;
+
+		udelay(10);
+	}
+
+	return k > 0 ? 0 : -ETIMEDOUT;
+}
+
+static irqreturn_t sh_msiof_spi_irq(int irq, void *data)
+{
+	struct sh_msiof_spi_priv *p = data;
+
+	/* just disable the interrupt and wake up */
+	sh_msiof_write(p, IER, 0);
+	complete(&p->done);
+
+	return IRQ_HANDLED;
+}
+
+static struct {
+	unsigned short div;
+	unsigned short scr;
+} const sh_msiof_spi_clk_table[] = {
+	{ 1, 0x0007 },
+	{ 2, 0x0000 },
+	{ 4, 0x0001 },
+	{ 8, 0x0002 },
+	{ 16, 0x0003 },
+	{ 32, 0x0004 },
+	{ 64, 0x1f00 },
+	{ 128, 0x1f01 },
+	{ 256, 0x1f02 },
+	{ 512, 0x1f03 },
+	{ 1024, 0x1f04 },
+};
+
+static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
+				      unsigned long parent_rate,
+				      unsigned long spi_hz)
+{
+	unsigned long div = 1024;
+	size_t k;
+
+	if (!WARN_ON(!spi_hz || !parent_rate))
+		div = parent_rate / spi_hz;
+
+	/* TODO: make more fine grained */
+
+	for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_clk_table); k++) {
+		if (sh_msiof_spi_clk_table[k].div >= div)
+			break;
+	}
+
+	k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_clk_table) - 1);
+
+	sh_msiof_write(p, TSCR, sh_msiof_spi_clk_table[k].scr);
+	sh_msiof_write(p, RSCR, sh_msiof_spi_clk_table[k].scr);
+}
+
+static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p,
+				      int cpol, int cpha,
+				      int tx_hi_z, int lsb_first)
+{
+	unsigned long tmp;
+	int edge;
+
+	/*
+	 * CPOL CPHA     TSCKIZ RSCKIZ TEDG REDG(!)
+	 *    0    0         10     10    1    0
+	 *    0    1         10     10    0    1
+	 *    1    0         11     11    0    1
+	 *    1    1         11     11    1    0
+	 *
+	 * (!) Note: REDG is inverted recommended data sheet setting
+	 */
+
+	sh_msiof_write(p, FCTR, 0);
+	sh_msiof_write(p, TMDR1, 0xe2000005 | (lsb_first << 24));
+	sh_msiof_write(p, RMDR1, 0x22000005 | (lsb_first << 24));
+
+	tmp = 0xa0000000;
+	tmp |= cpol << 30; /* TSCKIZ */
+	tmp |= cpol << 28; /* RSCKIZ */
+
+	edge = cpol ? cpha : !cpha;
+
+	tmp |= edge << 27; /* TEDG */
+	tmp |= !edge << 26; /* REDG */
+	tmp |= (tx_hi_z ? 2 : 0) << 22; /* TXDIZ */
+	sh_msiof_write(p, CTR, tmp);
+}
+
+static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p,
+				       const void *tx_buf, void *rx_buf,
+				       int bits, int words)
+{
+	unsigned long dr2;
+
+	dr2 = ((bits - 1) << 24) | ((words - 1) << 16);
+
+	if (tx_buf)
+		sh_msiof_write(p, TMDR2, dr2);
+	else
+		sh_msiof_write(p, TMDR2, dr2 | 1);
+
+	if (rx_buf)
+		sh_msiof_write(p, RMDR2, dr2);
+
+	sh_msiof_write(p, IER, STR_TEOF | STR_REOF);
+}
+
+static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
+{
+	sh_msiof_write(p, STR, sh_msiof_read(p, STR));
+}
+
+static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p,
+				      const void *tx_buf, int words, int fs)
+{
+	const unsigned char *buf_8 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, buf_8[k] << fs);
+}
+
+static void sh_msiof_spi_write_fifo_16(struct sh_msiof_spi_priv *p,
+				       const void *tx_buf, int words, int fs)
+{
+	const unsigned short *buf_16 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, buf_16[k] << fs);
+}
+
+static void sh_msiof_spi_write_fifo_16u(struct sh_msiof_spi_priv *p,
+					const void *tx_buf, int words, int fs)
+{
+	const unsigned short *buf_16 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, get_unaligned(&buf_16[k]) << fs);
+}
+
+static void sh_msiof_spi_write_fifo_32(struct sh_msiof_spi_priv *p,
+				       const void *tx_buf, int words, int fs)
+{
+	const unsigned int *buf_32 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, buf_32[k] << fs);
+}
+
+static void sh_msiof_spi_write_fifo_32u(struct sh_msiof_spi_priv *p,
+					const void *tx_buf, int words, int fs)
+{
+	const unsigned int *buf_32 = tx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		sh_msiof_write(p, TFDR, get_unaligned(&buf_32[k]) << fs);
+}
+
+static void sh_msiof_spi_read_fifo_8(struct sh_msiof_spi_priv *p,
+				     void *rx_buf, int words, int fs)
+{
+	unsigned char *buf_8 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		buf_8[k] = sh_msiof_read(p, RFDR) >> fs;
+}
+
+static void sh_msiof_spi_read_fifo_16(struct sh_msiof_spi_priv *p,
+				      void *rx_buf, int words, int fs)
+{
+	unsigned short *buf_16 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		buf_16[k] = sh_msiof_read(p, RFDR) >> fs;
+}
+
+static void sh_msiof_spi_read_fifo_16u(struct sh_msiof_spi_priv *p,
+				       void *rx_buf, int words, int fs)
+{
+	unsigned short *buf_16 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		put_unaligned(sh_msiof_read(p, RFDR) >> fs, &buf_16[k]);
+}
+
+static void sh_msiof_spi_read_fifo_32(struct sh_msiof_spi_priv *p,
+				      void *rx_buf, int words, int fs)
+{
+	unsigned int *buf_32 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		buf_32[k] = sh_msiof_read(p, RFDR) >> fs;
+}
+
+static void sh_msiof_spi_read_fifo_32u(struct sh_msiof_spi_priv *p,
+				       void *rx_buf, int words, int fs)
+{
+	unsigned int *buf_32 = rx_buf;
+	int k;
+
+	for (k = 0; k < words; k++)
+		put_unaligned(sh_msiof_read(p, RFDR) >> fs, &buf_32[k]);
+}
+
+static int sh_msiof_spi_bits(struct spi_device *spi, struct spi_transfer *t)
+{
+	int bits;
+
+	bits = t ? t->bits_per_word : 0;
+	bits = bits ? bits : spi->bits_per_word;
+	return bits;
+}
+
+static unsigned long sh_msiof_spi_hz(struct spi_device *spi,
+				     struct spi_transfer *t)
+{
+	unsigned long hz;
+
+	hz = t ? t->speed_hz : 0;
+	hz = hz ? hz : spi->max_speed_hz;
+	return hz;
+}
+
+static int sh_msiof_spi_setup_transfer(struct spi_device *spi,
+				       struct spi_transfer *t)
+{
+	int bits;
+
+	/* noting to check hz values against since parent clock is disabled */
+
+	bits = sh_msiof_spi_bits(spi, t);
+	if (bits < 8)
+		return -EINVAL;
+	if (bits > 32)
+		return -EINVAL;
+
+	return spi_bitbang_setup_transfer(spi, t);
+}
+
+static void sh_msiof_spi_chipselect(struct spi_device *spi, int is_on)
+{
+	struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master);
+	int value;
+
+	/* chip select is active low unless SPI_CS_HIGH is set */
+	if (spi->mode & SPI_CS_HIGH)
+		value = (is_on == BITBANG_CS_ACTIVE) ? 1 : 0;
+	else
+		value = (is_on == BITBANG_CS_ACTIVE) ? 0 : 1;
+
+	if (is_on == BITBANG_CS_ACTIVE) {
+		if (!test_and_set_bit(0, &p->flags)) {
+			pm_runtime_get_sync(&p->pdev->dev);
+			clk_enable(p->clk);
+		}
+
+		/* Configure pins before asserting CS */
+		sh_msiof_spi_set_pin_regs(p, !!(spi->mode & SPI_CPOL),
+					  !!(spi->mode & SPI_CPHA),
+					  !!(spi->mode & SPI_3WIRE),
+					  !!(spi->mode & SPI_LSB_FIRST));
+	}
+
+	/* use spi->controller data for CS (same strategy as spi_gpio) */
+	gpio_set_value((unsigned)spi->controller_data, value);
+
+	if (is_on == BITBANG_CS_INACTIVE) {
+		if (test_and_clear_bit(0, &p->flags)) {
+			clk_disable(p->clk);
+			pm_runtime_put(&p->pdev->dev);
+		}
+	}
+}
+
+static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
+				  void (*tx_fifo)(struct sh_msiof_spi_priv *,
+						  const void *, int, int),
+				  void (*rx_fifo)(struct sh_msiof_spi_priv *,
+						  void *, int, int),
+				  const void *tx_buf, void *rx_buf,
+				  int words, int bits)
+{
+	int fifo_shift;
+	int ret;
+
+	/* limit maximum word transfer to rx/tx fifo size */
+	if (tx_buf)
+		words = min_t(int, words, p->tx_fifo_size);
+	if (rx_buf)
+		words = min_t(int, words, p->rx_fifo_size);
+
+	/* the fifo contents need shifting */
+	fifo_shift = 32 - bits;
+
+	/* setup msiof transfer mode registers */
+	sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words);
+
+	/* write tx fifo */
+	if (tx_buf)
+		tx_fifo(p, tx_buf, words, fifo_shift);
+
+	/* setup clock and rx/tx signals */
+	ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TSCKE);
+	if (rx_buf)
+		ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_RXE);
+	ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TXE);
+
+	/* start by setting frame bit */
+	INIT_COMPLETION(p->done);
+	ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TFSE);
+	if (ret) {
+		dev_err(&p->pdev->dev, "failed to start hardware\n");
+		goto err;
+	}
+
+	/* wait for tx fifo to be emptied / rx fifo to be filled */
+	wait_for_completion(&p->done);
+
+	/* read rx fifo */
+	if (rx_buf)
+		rx_fifo(p, rx_buf, words, fifo_shift);
+
+	/* clear status bits */
+	sh_msiof_reset_str(p);
+
+	/* shut down frame, tx/tx and clock signals */
+	ret = sh_msiof_modify_ctr_wait(p, CTR_TFSE, 0);
+	ret = ret ? ret : sh_msiof_modify_ctr_wait(p, CTR_TXE, 0);
+	if (rx_buf)
+		ret = ret ? ret : sh_msiof_modify_ctr_wait(p, CTR_RXE, 0);
+	ret = ret ? ret : sh_msiof_modify_ctr_wait(p, CTR_TSCKE, 0);
+	if (ret) {
+		dev_err(&p->pdev->dev, "failed to shut down hardware\n");
+		goto err;
+	}
+
+	return words;
+
+ err:
+	sh_msiof_write(p, IER, 0);
+	return ret;
+}
+
+static int sh_msiof_spi_txrx(struct spi_device *spi, struct spi_transfer *t)
+{
+	struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master);
+	void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, int, int);
+	void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, int, int);
+	int bits;
+	int bytes_per_word;
+	int bytes_done;
+	int words;
+	int n;
+
+	bits = sh_msiof_spi_bits(spi, t);
+
+	/* setup bytes per word and fifo read/write functions */
+	if (bits <= 8) {
+		bytes_per_word = 1;
+		tx_fifo = sh_msiof_spi_write_fifo_8;
+		rx_fifo = sh_msiof_spi_read_fifo_8;
+	} else if (bits <= 16) {
+		bytes_per_word = 2;
+		if ((unsigned long)t->tx_buf & 0x01)
+			tx_fifo = sh_msiof_spi_write_fifo_16u;
+		else
+			tx_fifo = sh_msiof_spi_write_fifo_16;
+
+		if ((unsigned long)t->rx_buf & 0x01)
+			rx_fifo = sh_msiof_spi_read_fifo_16u;
+		else
+			rx_fifo = sh_msiof_spi_read_fifo_16;
+	} else {
+		bytes_per_word = 4;
+		if ((unsigned long)t->tx_buf & 0x03)
+			tx_fifo = sh_msiof_spi_write_fifo_32u;
+		else
+			tx_fifo = sh_msiof_spi_write_fifo_32;
+
+		if ((unsigned long)t->rx_buf & 0x03)
+			rx_fifo = sh_msiof_spi_read_fifo_32u;
+		else
+			rx_fifo = sh_msiof_spi_read_fifo_32;
+	}
+
+	/* setup clocks (clock already enabled in chipselect()) */
+	sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk),
+				  sh_msiof_spi_hz(spi, t));
+
+	/* transfer in fifo sized chunks */
+	words = t->len / bytes_per_word;
+	bytes_done = 0;
+
+	while (bytes_done < t->len) {
+		n = sh_msiof_spi_txrx_once(p, tx_fifo, rx_fifo,
+					   t->tx_buf + bytes_done,
+					   t->rx_buf + bytes_done,
+					   words, bits);
+		if (n < 0)
+			break;
+
+		bytes_done += n * bytes_per_word;
+		words -= n;
+	}
+
+	return bytes_done;
+}
+
+static u32 sh_msiof_spi_txrx_word(struct spi_device *spi, unsigned nsecs,
+				  u32 word, u8 bits)
+{
+	BUG(); /* unused but needed by bitbang code */
+	return 0;
+}
+
+static int sh_msiof_spi_probe(struct platform_device *pdev)
+{
+	struct resource	*r;
+	struct spi_master *master;
+	struct sh_msiof_spi_priv *p;
+	char clk_name[16];
+	int i;
+	int ret;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct sh_msiof_spi_priv));
+	if (master == NULL) {
+		dev_err(&pdev->dev, "failed to allocate spi master\n");
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	p = spi_master_get_devdata(master);
+
+	platform_set_drvdata(pdev, p);
+	p->info = pdev->dev.platform_data;
+	init_completion(&p->done);
+
+	snprintf(clk_name, sizeof(clk_name), "msiof%d", pdev->id);
+	p->clk = clk_get(&pdev->dev, clk_name);
+	if (IS_ERR(p->clk)) {
+		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i = platform_get_irq(pdev, 0);
+	if (!r || i < 0) {
+		dev_err(&pdev->dev, "cannot get platform resources\n");
+		ret = -ENOENT;
+		goto err2;
+	}
+	p->mapbase = ioremap_nocache(r->start, resource_size(r));
+	if (!p->mapbase) {
+		dev_err(&pdev->dev, "unable to ioremap\n");
+		ret = -ENXIO;
+		goto err2;
+	}
+
+	ret = request_irq(i, sh_msiof_spi_irq, IRQF_DISABLED,
+			  dev_name(&pdev->dev), p);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to request irq\n");
+		goto err3;
+	}
+
+	p->pdev = pdev;
+	pm_runtime_enable(&pdev->dev);
+
+	/* The standard version of MSIOF use 64 word FIFOs */
+	p->tx_fifo_size = 64;
+	p->rx_fifo_size = 64;
+
+	/* Platform data may override FIFO sizes */
+	if (p->info->tx_fifo_override)
+		p->tx_fifo_size = p->info->tx_fifo_override;
+	if (p->info->rx_fifo_override)
+		p->rx_fifo_size = p->info->rx_fifo_override;
+
+	/* init master and bitbang code */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->mode_bits |= SPI_LSB_FIRST | SPI_3WIRE;
+	master->flags = 0;
+	master->bus_num = pdev->id;
+	master->num_chipselect = p->info->num_chipselect;
+	master->setup = spi_bitbang_setup;
+	master->cleanup = spi_bitbang_cleanup;
+
+	p->bitbang.master = master;
+	p->bitbang.chipselect = sh_msiof_spi_chipselect;
+	p->bitbang.setup_transfer = sh_msiof_spi_setup_transfer;
+	p->bitbang.txrx_bufs = sh_msiof_spi_txrx;
+	p->bitbang.txrx_word[SPI_MODE_0] = sh_msiof_spi_txrx_word;
+	p->bitbang.txrx_word[SPI_MODE_1] = sh_msiof_spi_txrx_word;
+	p->bitbang.txrx_word[SPI_MODE_2] = sh_msiof_spi_txrx_word;
+	p->bitbang.txrx_word[SPI_MODE_3] = sh_msiof_spi_txrx_word;
+
+	ret = spi_bitbang_start(&p->bitbang);
+	if (ret == 0)
+		return 0;
+
+	pm_runtime_disable(&pdev->dev);
+ err3:
+	iounmap(p->mapbase);
+ err2:
+	clk_put(p->clk);
+ err1:
+	spi_master_put(master);
+ err0:
+	return ret;
+}
+
+static int sh_msiof_spi_remove(struct platform_device *pdev)
+{
+	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = spi_bitbang_stop(&p->bitbang);
+	if (!ret) {
+		pm_runtime_disable(&pdev->dev);
+		free_irq(platform_get_irq(pdev, 0), sh_msiof_spi_irq);
+		iounmap(p->mapbase);
+		clk_put(p->clk);
+		spi_master_put(p->bitbang.master);
+	}
+	return ret;
+}
+
+static int sh_msiof_spi_runtime_nop(struct device *dev)
+{
+	/* Runtime PM callback shared between ->runtime_suspend()
+	 * and ->runtime_resume(). Simply returns success.
+	 *
+	 * This driver re-initializes all registers after
+	 * pm_runtime_get_sync() anyway so there is no need
+	 * to save and restore registers here.
+	 */
+	return 0;
+}
+
+static struct dev_pm_ops sh_msiof_spi_dev_pm_ops = {
+	.runtime_suspend = sh_msiof_spi_runtime_nop,
+	.runtime_resume = sh_msiof_spi_runtime_nop,
+};
+
+static struct platform_driver sh_msiof_spi_drv = {
+	.probe		= sh_msiof_spi_probe,
+	.remove		= sh_msiof_spi_remove,
+	.driver		= {
+		.name		= "spi_sh_msiof",
+		.owner		= THIS_MODULE,
+		.pm		= &sh_msiof_spi_dev_pm_ops,
+	},
+};
+
+static int __init sh_msiof_spi_init(void)
+{
+	return platform_driver_register(&sh_msiof_spi_drv);
+}
+module_init(sh_msiof_spi_init);
+
+static void __exit sh_msiof_spi_exit(void)
+{
+	platform_driver_unregister(&sh_msiof_spi_drv);
+}
+module_exit(sh_msiof_spi_exit);
+
+MODULE_DESCRIPTION("SuperH MSIOF SPI Master Interface Driver");
+MODULE_AUTHOR("Magnus Damm");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:spi_sh_msiof");
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
new file mode 100644
index 000000000000..2e8db3d2d2e5
--- /dev/null
+++ b/include/linux/spi/sh_msiof.h
@@ -0,0 +1,10 @@
+#ifndef __SPI_SH_MSIOF_H__
+#define __SPI_SH_MSIOF_H__
+
+struct sh_msiof_spi_info {
+	int tx_fifo_override;
+	int rx_fifo_override;
+	u16 num_chipselect;
+};
+
+#endif /* __SPI_SH_MSIOF_H__ */
-- 
cgit v1.2.3


From 87d9b4e1c52867a45331a9a5495f6448e0c68b23 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:20 +0800
Subject: tracing: Extract duplicate ftrace_raw_init_event_foo()

Use a generic trace_event_raw_init() function for all event's raw_init
callbacks (but kprobes) instead of defining the same version for each
of these.
This shrinks the kernel code:

   text    data     bss     dec     hex filename
5355293 1961928 7103260 14420481         dc0a01 vmlinux.o.old
5346802 1961864 7103260 14411926         dbe896 vmlinux.o

raw_init can't be removed, because ftrace events and kprobe events
use different raw_init callbacks. Though it's possible to totally
remove raw_init, I choose to leave it as it is for now.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <4B1DC48C.7080603@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h |  1 +
 include/linux/syscalls.h     |  4 ++--
 include/trace/ftrace.h       | 35 ++++-------------------------------
 kernel/trace/trace_events.c  | 14 ++++++++++++++
 4 files changed, 21 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 38f8d6553831..ea44b8911094 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -158,6 +158,7 @@ enum {
 	FILTER_PTR_STRING,
 };
 
+extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bc70c5810fec..94ac28437bef 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -145,7 +145,7 @@ struct perf_event_attr;
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -167,7 +167,7 @@ struct perf_event_attr;
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c4eca380204e..6055b0604c86 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -623,23 +623,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
  * static struct ftrace_event_call __used
  * __attribute__((__aligned__(4)))
  * __attribute__((section("_ftrace_events"))) event_<call> = {
  *	.name			= "<call>",
  *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.raw_init		= trace_event_raw_init,
  *	.regfunc		= ftrace_reg_event_<call>,
  *	.unregfunc		= ftrace_unreg_event_<call>,
  *	.show_format		= ftrace_format_<call>,
@@ -647,9 +636,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
@@ -744,19 +730,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 									\
 static struct trace_event ftrace_event_type_##call = {			\
 	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}
+};
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
@@ -776,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##template,		\
@@ -793,7 +767,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##call,			\
@@ -953,7 +927,6 @@ end:									\
 	perf_swevent_put_recursion_context(rctx);			\
 end_recursion:								\
 	local_irq_restore(irq_flags);					\
-									\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..8ed66e0d476b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -105,6 +105,20 @@ void trace_destroy_fields(struct ftrace_event_call *call)
 	}
 }
 
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-- 
cgit v1.2.3


From 614a71a26ba3d97e9fa85649db69a682b78e407d Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:36 +0800
Subject: tracing: Pull up calls to trace_define_common_fields()

Call trace_define_common_fields() in event_create_dir() only.
This avoids trace events to handle it from their define_fields
callbacks and shrinks the kernel code size:

   text    data     bss     dec     hex filename
5346802 1961864 7103260 14411926         dbe896 vmlinux.o.old
5345151 1961864 7103260 14410275         dbe223 vmlinux.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <4B1DC49C.8000107@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h  | 1 -
 include/trace/ftrace.h        | 4 ----
 kernel/trace/trace_events.c   | 7 ++++---
 kernel/trace/trace_export.c   | 4 ----
 kernel/trace/trace_kprobe.c   | 8 --------
 kernel/trace/trace_syscalls.c | 8 --------
 6 files changed, 4 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ea44b8911094..db97c64ce0e9 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -159,7 +159,6 @@ enum {
 };
 
 extern int trace_event_raw_init(struct ftrace_event_call *call);
-extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 6055b0604c86..2af2f7a2c1bd 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct ftrace_raw_##call field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8ed66e0d476b..97b0b3aa166d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	if (ret)							\
 		return ret;
 
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
 {
 	int ret;
 	struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
@@ -927,7 +926,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		 		  id);
 
 	if (call->define_fields) {
-		ret = call->define_fields(call);
+		ret = trace_define_common_fields(call);
+		if (!ret)
+			ret = call->define_fields(call);
 		if (ret < 0) {
 			pr_warning("Could not initialize trace point"
 				   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 	struct struct_name field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..e3c80e925896 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1113,10 +1113,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
 	/* Set argument names as fields */
@@ -1131,10 +1127,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kretprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..b957edd0ca3b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From e00bf2ec60605eb95687b7a0c3b83c87c48541dc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:17:29 +0800
Subject: tracing: Change event->profile_count to be int type

Like total_profile_count, struct ftrace_event_call::profile_count
is protected by event_mutex, so it doesn't need to be atomic_t.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B1DC549.5010705@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h       | 2 +-
 include/linux/syscalls.h           | 2 --
 include/trace/ftrace.h             | 1 -
 kernel/trace/trace_event_profile.c | 6 +++---
 kernel/trace/trace_kprobe.c        | 1 -
 5 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index db97c64ce0e9..2233c98d80df 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,7 +131,7 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	atomic_t		profile_count;
+	int			profile_count;
 	int			(*profile_enable)(struct ftrace_event_call *);
 	void			(*profile_disable)(struct ftrace_event_call *);
 };
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 94ac28437bef..72d69860d901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -102,12 +102,10 @@ struct perf_event_attr;
 #ifdef CONFIG_EVENT_PROFILE
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysenter_enable,				       \
 	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysexit_enable,				       \
 	.profile_disable = prof_sysexit_disable,
 #else
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0c21af85211c..73523151a731 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -629,7 +629,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
 	.profile_enable = ftrace_profile_enable_##call,			\
 	.profile_disable = ftrace_profile_disable_##call,
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	char *buf;
 	int ret = -ENOMEM;
 
-	if (atomic_inc_return(&event->profile_count))
+	if (event->profile_count++ > 0)
 		return 0;
 
 	if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
 		perf_trace_buf = NULL;
 	}
 fail_buf:
-	atomic_dec(&event->profile_count);
+	event->profile_count--;
 
 	return ret;
 }
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
 	char *buf, *nmi_buf;
 
-	if (!atomic_add_negative(-1, &event->profile_count))
+	if (--event->profile_count > 0)
 		return;
 
 	event->profile_disable(event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e3c80e925896..6ed223447a3f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1426,7 +1426,6 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_EVENT_PROFILE
-	atomic_set(&call->profile_count, -1);
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
-- 
cgit v1.2.3


From 4107da2a2853c070fb3effa58a83f94dc067fc44 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Thu, 17 Sep 2009 08:54:03 -0400
Subject: mfd: Add 88PM8607 driver

This adds a core driver for 88PM8607 found in Marvell DKB development
platform. This driver is a proxy for all accesses to 88PM8607
sub-drivers which will be merged on top of this one, RTC, regulators,
battery and so on.

This chip is manufactured by Marvell.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm8607.c       | 302 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/Kconfig          |  10 ++
 drivers/mfd/Makefile         |   1 +
 include/linux/mfd/88pm8607.h | 217 +++++++++++++++++++++++++++++++
 4 files changed, 530 insertions(+)
 create mode 100644 drivers/mfd/88pm8607.c
 create mode 100644 include/linux/mfd/88pm8607.h

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm8607.c b/drivers/mfd/88pm8607.c
new file mode 100644
index 000000000000..7e3f65907993
--- /dev/null
+++ b/drivers/mfd/88pm8607.c
@@ -0,0 +1,302 @@
+/*
+ * Base driver for Marvell 88PM8607
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/88pm8607.h>
+
+
+#define PM8607_REG_RESOURCE(_start, _end)		\
+{							\
+	.start	= PM8607_##_start,			\
+	.end	= PM8607_##_end,			\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource pm8607_regulator_resources[] = {
+	PM8607_REG_RESOURCE(BUCK1, BUCK1),
+	PM8607_REG_RESOURCE(BUCK2, BUCK2),
+	PM8607_REG_RESOURCE(BUCK3, BUCK3),
+	PM8607_REG_RESOURCE(LDO1,  LDO1),
+	PM8607_REG_RESOURCE(LDO2,  LDO2),
+	PM8607_REG_RESOURCE(LDO3,  LDO3),
+	PM8607_REG_RESOURCE(LDO4,  LDO4),
+	PM8607_REG_RESOURCE(LDO5,  LDO5),
+	PM8607_REG_RESOURCE(LDO6,  LDO6),
+	PM8607_REG_RESOURCE(LDO7,  LDO7),
+	PM8607_REG_RESOURCE(LDO8,  LDO8),
+	PM8607_REG_RESOURCE(LDO9,  LDO9),
+	PM8607_REG_RESOURCE(LDO10, LDO10),
+	PM8607_REG_RESOURCE(LDO12, LDO12),
+	PM8607_REG_RESOURCE(LDO14, LDO14),
+};
+
+#define PM8607_REG_DEVS(_name, _id)					\
+{									\
+	.name		= "88pm8607-" #_name,				\
+	.num_resources	= 1,						\
+	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
+}
+
+static struct mfd_cell pm8607_devs[] = {
+	PM8607_REG_DEVS(buck1, BUCK1),
+	PM8607_REG_DEVS(buck2, BUCK2),
+	PM8607_REG_DEVS(buck3, BUCK3),
+	PM8607_REG_DEVS(ldo1,  LDO1),
+	PM8607_REG_DEVS(ldo2,  LDO2),
+	PM8607_REG_DEVS(ldo3,  LDO3),
+	PM8607_REG_DEVS(ldo4,  LDO4),
+	PM8607_REG_DEVS(ldo5,  LDO5),
+	PM8607_REG_DEVS(ldo6,  LDO6),
+	PM8607_REG_DEVS(ldo7,  LDO7),
+	PM8607_REG_DEVS(ldo8,  LDO8),
+	PM8607_REG_DEVS(ldo9,  LDO9),
+	PM8607_REG_DEVS(ldo10, LDO10),
+	PM8607_REG_DEVS(ldo12, LDO12),
+	PM8607_REG_DEVS(ldo14, LDO14),
+};
+
+static inline int pm8607_read_device(struct pm8607_chip *chip,
+				     int reg, int bytes, void *dest)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int pm8607_write_device(struct pm8607_chip *chip,
+				      int reg, int bytes, void *src)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int pm8607_reg_read(struct pm8607_chip *chip, int reg)
+{
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return (int)data;
+}
+EXPORT_SYMBOL(pm8607_reg_read);
+
+int pm8607_reg_write(struct pm8607_chip *chip, int reg,
+		     unsigned char data)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_reg_write);
+
+int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
+		     int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_read);
+
+int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
+		      int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_write);
+
+int pm8607_set_bits(struct pm8607_chip *chip, int reg,
+		    unsigned char mask, unsigned char data)
+{
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = chip->write(chip, reg, 1, &value);
+out:
+	mutex_unlock(&chip->io_lock);
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_set_bits);
+
+
+static const struct i2c_device_id pm8607_id_table[] = {
+	{ "88PM8607", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, pm8607_id_table);
+
+
+static int __devinit pm8607_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct pm8607_platform_data *pdata = client->dev.platform_data;
+	struct pm8607_chip *chip;
+	int i, count;
+	int ret;
+
+	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->dev = &client->dev;
+	chip->read = pm8607_read_device;
+	chip->write = pm8607_write_device;
+	i2c_set_clientdata(client, chip);
+
+	mutex_init(&chip->io_lock);
+	dev_set_drvdata(chip->dev, chip);
+
+	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
+		goto out;
+	}
+	if ((ret & CHIP_ID_MASK) == CHIP_ID)
+		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
+			 ret);
+	else {
+		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
+			"Chip ID: %02x\n", ret);
+		goto out;
+	}
+	chip->chip_id = ret;
+
+	ret = pm8607_reg_read(chip, PM8607_BUCK3);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
+		goto out;
+	}
+	if (ret & PM8607_BUCK3_DOUBLE)
+		chip->buck3_double = 1;
+
+	ret = pm8607_reg_read(chip, PM8607_MISC1);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
+		goto out;
+	}
+	if (pdata->i2c_port == PI2C_PORT)
+		ret |= PM8607_MISC1_PI2C;
+	else
+		ret &= ~PM8607_MISC1_PI2C;
+	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
+		goto out;
+	}
+
+
+	count = ARRAY_SIZE(pm8607_devs);
+	for (i = 0; i < count; i++) {
+		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
+				      1, NULL, 0);
+		if (ret != 0) {
+			dev_err(chip->dev, "Failed to add subdevs\n");
+			goto out;
+		}
+	}
+
+	return 0;
+
+out:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit pm8607_remove(struct i2c_client *client)
+{
+	struct pm8607_chip *chip = i2c_get_clientdata(client);
+
+	mfd_remove_devices(chip->dev);
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver pm8607_driver = {
+	.driver	= {
+		.name	= "88PM8607",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pm8607_probe,
+	.remove		= __devexit_p(pm8607_remove),
+	.id_table	= pm8607_id_table,
+};
+
+static int __init pm8607_init(void)
+{
+	int ret;
+	ret = i2c_add_driver(&pm8607_driver);
+	if (ret != 0)
+		pr_err("Failed to register 88PM8607 I2C driver: %d\n", ret);
+	return ret;
+}
+subsys_initcall(pm8607_init);
+
+static void __exit pm8607_exit(void)
+{
+	i2c_del_driver(&pm8607_driver);
+}
+module_exit(pm8607_exit);
+
+MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a296e717e86e..26a36f887357 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -319,6 +319,16 @@ config EZX_PCAP
 	  This enables the PCAP ASIC present on EZX Phones. This is
 	  needed for MMC, TouchScreen, Sound, USB, etc..
 
+config MFD_88PM8607
+	bool "Support Marvell 88PM8607"
+	depends on I2C
+	select MFD_CORE
+	help
+	  This supports for Marvell 88PM8607 Power Management IC. This includes
+	  the I2C driver and the core APIs _only_, you have to select
+	  individual components like voltage regulators, RTC and
+	  battery-charger under the corresponding menus.
+
 endmenu
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 11350c1d9301..2596add427a2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -52,3 +52,4 @@ obj-$(CONFIG_PCF50633_ADC)	+= pcf50633-adc.o
 obj-$(CONFIG_PCF50633_GPIO)	+= pcf50633-gpio.o
 obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
+obj-$(CONFIG_MFD_88PM8607)	+= 88pm8607.o
diff --git a/include/linux/mfd/88pm8607.h b/include/linux/mfd/88pm8607.h
new file mode 100644
index 000000000000..f41b428d2cec
--- /dev/null
+++ b/include/linux/mfd/88pm8607.h
@@ -0,0 +1,217 @@
+/*
+ * Marvell 88PM8607 Interface
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_88PM8607_H
+#define __LINUX_MFD_88PM8607_H
+
+enum {
+	PM8607_ID_BUCK1 = 0,
+	PM8607_ID_BUCK2,
+	PM8607_ID_BUCK3,
+
+	PM8607_ID_LDO1,
+	PM8607_ID_LDO2,
+	PM8607_ID_LDO3,
+	PM8607_ID_LDO4,
+	PM8607_ID_LDO5,
+	PM8607_ID_LDO6,
+	PM8607_ID_LDO7,
+	PM8607_ID_LDO8,
+	PM8607_ID_LDO9,
+	PM8607_ID_LDO10,
+	PM8607_ID_LDO12,
+	PM8607_ID_LDO14,
+
+	PM8607_ID_RG_MAX,
+};
+
+#define CHIP_ID				(0x40)
+#define CHIP_ID_MASK			(0xF8)
+
+/* Interrupt Registers */
+#define PM8607_STATUS_1			(0x01)
+#define PM8607_STATUS_2			(0x02)
+#define PM8607_INT_STATUS1		(0x03)
+#define PM8607_INT_STATUS2		(0x04)
+#define PM8607_INT_STATUS3		(0x05)
+#define PM8607_INT_MASK_1		(0x06)
+#define PM8607_INT_MASK_2		(0x07)
+#define PM8607_INT_MASK_3		(0x08)
+
+/* Regulator Control Registers */
+#define PM8607_LDO1			(0x10)
+#define PM8607_LDO2			(0x11)
+#define PM8607_LDO3			(0x12)
+#define PM8607_LDO4			(0x13)
+#define PM8607_LDO5			(0x14)
+#define PM8607_LDO6			(0x15)
+#define PM8607_LDO7			(0x16)
+#define PM8607_LDO8			(0x17)
+#define PM8607_LDO9			(0x18)
+#define PM8607_LDO10			(0x19)
+#define PM8607_LDO12			(0x1A)
+#define PM8607_LDO14			(0x1B)
+#define PM8607_SLEEP_MODE1		(0x1C)
+#define PM8607_SLEEP_MODE2		(0x1D)
+#define PM8607_SLEEP_MODE3		(0x1E)
+#define PM8607_SLEEP_MODE4		(0x1F)
+#define PM8607_GO			(0x20)
+#define PM8607_SLEEP_BUCK1		(0x21)
+#define PM8607_SLEEP_BUCK2		(0x22)
+#define PM8607_SLEEP_BUCK3		(0x23)
+#define PM8607_BUCK1			(0x24)
+#define PM8607_BUCK2			(0x25)
+#define PM8607_BUCK3			(0x26)
+#define PM8607_BUCK_CONTROLS		(0x27)
+#define PM8607_SUPPLIES_EN11		(0x2B)
+#define PM8607_SUPPLIES_EN12		(0x2C)
+#define PM8607_GROUP1			(0x2D)
+#define PM8607_GROUP2			(0x2E)
+#define PM8607_GROUP3			(0x2F)
+#define PM8607_GROUP4			(0x30)
+#define PM8607_GROUP5			(0x31)
+#define PM8607_GROUP6			(0x32)
+#define PM8607_SUPPLIES_EN21		(0x33)
+#define PM8607_SUPPLIES_EN22		(0x34)
+
+/* RTC Control Registers */
+#define PM8607_RTC1			(0xA0)
+#define PM8607_RTC_COUNTER1		(0xA1)
+#define PM8607_RTC_COUNTER2		(0xA2)
+#define PM8607_RTC_COUNTER3		(0xA3)
+#define PM8607_RTC_COUNTER4		(0xA4)
+#define PM8607_RTC_EXPIRE1		(0xA5)
+#define PM8607_RTC_EXPIRE2		(0xA6)
+#define PM8607_RTC_EXPIRE3		(0xA7)
+#define PM8607_RTC_EXPIRE4		(0xA8)
+#define PM8607_RTC_TRIM1		(0xA9)
+#define PM8607_RTC_TRIM2		(0xAA)
+#define PM8607_RTC_TRIM3		(0xAB)
+#define PM8607_RTC_TRIM4		(0xAC)
+#define PM8607_RTC_MISC1		(0xAD)
+#define PM8607_RTC_MISC2		(0xAE)
+#define PM8607_RTC_MISC3		(0xAF)
+
+/* Misc Registers */
+#define PM8607_CHIP_ID			(0x00)
+#define PM8607_LDO1			(0x10)
+#define PM8607_DVC3			(0x26)
+#define PM8607_MISC1			(0x40)
+
+/* bit definitions for PM8607 events */
+#define PM8607_EVENT_ONKEY		(1 << 0)
+#define PM8607_EVENT_EXTON		(1 << 1)
+#define PM8607_EVENT_CHG		(1 << 2)
+#define PM8607_EVENT_BAT		(1 << 3)
+#define PM8607_EVENT_RTC		(1 << 4)
+#define PM8607_EVENT_CC			(1 << 5)
+#define PM8607_EVENT_VBAT		(1 << 8)
+#define PM8607_EVENT_VCHG		(1 << 9)
+#define PM8607_EVENT_VSYS		(1 << 10)
+#define PM8607_EVENT_TINT		(1 << 11)
+#define PM8607_EVENT_GPADC0		(1 << 12)
+#define PM8607_EVENT_GPADC1		(1 << 13)
+#define PM8607_EVENT_GPADC2		(1 << 14)
+#define PM8607_EVENT_GPADC3		(1 << 15)
+#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
+#define PM8607_EVENT_PEN		(1 << 17)
+#define PM8607_EVENT_HEADSET		(1 << 18)
+#define PM8607_EVENT_HOOK		(1 << 19)
+#define PM8607_EVENT_MICIN		(1 << 20)
+#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
+#define PM8607_EVENT_CHG_DONE		(1 << 22)
+#define PM8607_EVENT_CHG_FAULT		(1 << 23)
+
+/* bit definitions of Status Query Interface */
+#define PM8607_STATUS_CC		(1 << 3)
+#define PM8607_STATUS_PEN		(1 << 4)
+#define PM8607_STATUS_HEADSET		(1 << 5)
+#define PM8607_STATUS_HOOK		(1 << 6)
+#define PM8607_STATUS_MICIN		(1 << 7)
+#define PM8607_STATUS_ONKEY		(1 << 8)
+#define PM8607_STATUS_EXTON		(1 << 9)
+#define PM8607_STATUS_CHG		(1 << 10)
+#define PM8607_STATUS_BAT		(1 << 11)
+#define PM8607_STATUS_VBUS		(1 << 12)
+#define PM8607_STATUS_OV		(1 << 13)
+
+/* bit definitions of BUCK3 */
+#define PM8607_BUCK3_DOUBLE		(1 << 6)
+
+/* bit definitions of Misc1 */
+#define PM8607_MISC1_PI2C		(1 << 0)
+
+/* Interrupt Number in 88PM8607 */
+enum {
+	PM8607_IRQ_ONKEY = 0,
+	PM8607_IRQ_EXTON,
+	PM8607_IRQ_CHG,
+	PM8607_IRQ_BAT,
+	PM8607_IRQ_RTC,
+	PM8607_IRQ_VBAT = 8,
+	PM8607_IRQ_VCHG,
+	PM8607_IRQ_VSYS,
+	PM8607_IRQ_TINT,
+	PM8607_IRQ_GPADC0,
+	PM8607_IRQ_GPADC1,
+	PM8607_IRQ_GPADC2,
+	PM8607_IRQ_GPADC3,
+	PM8607_IRQ_AUDIO_SHORT = 16,
+	PM8607_IRQ_PEN,
+	PM8607_IRQ_HEADSET,
+	PM8607_IRQ_HOOK,
+	PM8607_IRQ_MICIN,
+	PM8607_IRQ_CHG_FAIL,
+	PM8607_IRQ_CHG_DONE,
+	PM8607_IRQ_CHG_FAULT,
+};
+
+enum {
+	PM8607_CHIP_A0 = 0x40,
+	PM8607_CHIP_A1 = 0x41,
+	PM8607_CHIP_B0 = 0x48,
+};
+
+
+struct pm8607_chip {
+	struct device		*dev;
+	struct mutex		io_lock;
+	struct i2c_client	*client;
+
+	int (*read)(struct pm8607_chip *chip, int reg, int bytes, void *dest);
+	int (*write)(struct pm8607_chip *chip, int reg, int bytes, void *src);
+
+	int			buck3_double;	/* DVC ramp slope double */
+	unsigned char		chip_id;
+
+};
+
+#define PM8607_MAX_REGULATOR	15	/* 3 Bucks, 12 LDOs */
+
+enum {
+	GI2C_PORT = 0,
+	PI2C_PORT,
+};
+
+struct pm8607_platform_data {
+	int	i2c_port;	/* Controlled by GI2C or PI2C */
+	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
+};
+
+extern int pm8607_reg_read(struct pm8607_chip *, int);
+extern int pm8607_reg_write(struct pm8607_chip *, int, unsigned char);
+extern int pm8607_bulk_read(struct pm8607_chip *, int, int,
+			    unsigned char *);
+extern int pm8607_bulk_write(struct pm8607_chip *, int, int,
+			     unsigned char *);
+extern int pm8607_set_bits(struct pm8607_chip *, int, unsigned char,
+			   unsigned char);
+#endif /* __LINUX_MFD_88PM8607_H */
-- 
cgit v1.2.3


From 0c41839e98272a317d4af4dfcb54b599b2c3dcba Mon Sep 17 00:00:00 2001
From: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Date: Mon, 12 Oct 2009 17:11:52 +0200
Subject: mfd: add AB4500 driver

This adds core driver support for AB4500 mixed signal
multimedia & power management chip. This connects to U8500
on the SSP (pl022) and exports read/write functions for
the device to get access to this chip. This also registers
the client devices and sets the parent.

Signed-off-by: srinidhi kasagar <srinidhi.kasagar@stericsson.com>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Reviewed-by: Mark Brown <broonie@sirena.org.uk>
Reviewed-by: Jean-Christophe <plagnioj@jcrosoft.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig        |  10 ++
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/ab4500-core.c  | 207 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/ab4500.h | 262 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 480 insertions(+)
 create mode 100644 drivers/mfd/ab4500-core.c
 create mode 100644 include/linux/mfd/ab4500.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 26a36f887357..2aea25ba7ba0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -329,6 +329,16 @@ config MFD_88PM8607
 	  individual components like voltage regulators, RTC and
 	  battery-charger under the corresponding menus.
 
+config AB4500_CORE
+	tristate "ST-Ericsson's AB4500 Mixed Signal Power management chip"
+	depends on SPI
+	default y
+	help
+	  Select this option to enable access to AB4500 power management
+	  chip. This connects to U8500 on the SSP/SPI bus and exports
+	  read/write functions for the devices to get access to this chip.
+	  This chip embeds various other multimedia funtionalities as well.
+
 endmenu
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2596add427a2..1792b1ff8ffc 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -52,4 +52,5 @@ obj-$(CONFIG_PCF50633_ADC)	+= pcf50633-adc.o
 obj-$(CONFIG_PCF50633_GPIO)	+= pcf50633-gpio.o
 obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
+obj-$(CONFIG_AB4500_CORE)	+= ab4500-core.o
 obj-$(CONFIG_MFD_88PM8607)	+= 88pm8607.o
diff --git a/drivers/mfd/ab4500-core.c b/drivers/mfd/ab4500-core.c
new file mode 100644
index 000000000000..3ad91f7ee22b
--- /dev/null
+++ b/drivers/mfd/ab4500-core.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2009 ST-Ericsson
+ *
+ * Author: Srinidhi KASAGAR <srinidhi.kasagar@stericsson.com>
+ *
+ * This program is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation.
+ *
+ * AB4500 is a companion power management chip used with U8500.
+ * On this platform, this is interfaced with SSP0 controller
+ * which is a ARM primecell pl022.
+ *
+ * At the moment the module just exports read/write features.
+ * Interrupt management to be added - TODO.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/mfd/ab4500.h>
+
+/* just required if probe fails, we need to
+ * unregister the device
+ */
+static struct spi_driver ab4500_driver;
+
+/*
+ * This funtion writes to any AB4500 registers using
+ * SPI protocol &  before it writes it packs the data
+ * in the below 24 bit frame format
+ *
+ *	 *|------------------------------------|
+ *	 *| 23|22...18|17.......10|9|8|7......0|
+ *	 *| r/w  bank       adr          data  |
+ *	 * ------------------------------------
+ *
+ * This function shouldn't be called from interrupt
+ * context
+ */
+int ab4500_write(struct ab4500 *ab4500, unsigned char block,
+		unsigned long addr, unsigned char data)
+{
+	struct spi_transfer xfer;
+	struct spi_message	msg;
+	int err;
+	unsigned long spi_data =
+		block << 18 | addr << 10 | data;
+
+	mutex_lock(&ab4500->lock);
+	ab4500->tx_buf[0] = spi_data;
+	ab4500->rx_buf[0] = 0;
+
+	xfer.tx_buf	= ab4500->tx_buf;
+	xfer.rx_buf 	= NULL;
+	xfer.len	= sizeof(unsigned long);
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	err = spi_sync(ab4500->spi, &msg);
+	mutex_unlock(&ab4500->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(ab4500_write);
+
+int ab4500_read(struct ab4500 *ab4500, unsigned char block,
+		unsigned long addr)
+{
+	struct spi_transfer xfer;
+	struct spi_message	msg;
+	unsigned long spi_data =
+		1 << 23 | block << 18 | addr << 10;
+
+	mutex_lock(&ab4500->lock);
+	ab4500->tx_buf[0] = spi_data;
+	ab4500->rx_buf[0] = 0;
+
+	xfer.tx_buf	= ab4500->tx_buf;
+	xfer.rx_buf 	= ab4500->rx_buf;
+	xfer.len	= sizeof(unsigned long);
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	spi_sync(ab4500->spi, &msg);
+	mutex_unlock(&ab4500->lock);
+
+	return  ab4500->rx_buf[0];
+}
+EXPORT_SYMBOL(ab4500_read);
+
+/* ref: ab3100 core */
+#define AB4500_DEVICE(devname, devid)				\
+static struct platform_device ab4500_##devname##_device = {	\
+	.name	= devid,					\
+	.id	= -1,						\
+}
+
+/* list of childern devices of ab4500 - all are
+ * not populated here - TODO
+ */
+AB4500_DEVICE(charger, "ab4500-charger");
+AB4500_DEVICE(audio, "ab4500-audio");
+AB4500_DEVICE(usb, "ab4500-usb");
+AB4500_DEVICE(tvout, "ab4500-tvout");
+AB4500_DEVICE(sim, "ab4500-sim");
+AB4500_DEVICE(gpadc, "ab4500-gpadc");
+AB4500_DEVICE(clkmgt, "ab4500-clkmgt");
+AB4500_DEVICE(misc, "ab4500-misc");
+
+static struct platform_device *ab4500_platform_devs[] = {
+	&ab4500_charger_device,
+	&ab4500_audio_device,
+	&ab4500_usb_device,
+	&ab4500_tvout_device,
+	&ab4500_sim_device,
+	&ab4500_gpadc_device,
+	&ab4500_clkmgt_device,
+	&ab4500_misc_device,
+};
+
+static int __init ab4500_probe(struct spi_device *spi)
+{
+	struct ab4500	*ab4500;
+	unsigned char revision;
+	int err = 0;
+	int i;
+
+	ab4500 = kzalloc(sizeof *ab4500, GFP_KERNEL);
+	if (!ab4500) {
+		dev_err(&spi->dev, "could not allocate AB4500\n");
+		err = -ENOMEM;
+		goto not_detect;
+	}
+
+	ab4500->spi = spi;
+	spi_set_drvdata(spi, ab4500);
+
+	mutex_init(&ab4500->lock);
+
+	/* read the revision register */
+	revision = ab4500_read(ab4500, AB4500_MISC, AB4500_REV_REG);
+
+	/* revision id 0x0 is for early drop, 0x10 is for cut1.0 */
+	if (revision == 0x0 || revision == 0x10)
+		dev_info(&spi->dev, "Detected chip: %s, revision = %x\n",
+			ab4500_driver.driver.name, revision);
+	else	{
+		dev_err(&spi->dev, "unknown chip: 0x%x\n", revision);
+		goto not_detect;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ab4500_platform_devs); i++)	{
+		ab4500_platform_devs[i]->dev.parent =
+			&spi->dev;
+		platform_set_drvdata(ab4500_platform_devs[i], ab4500);
+	}
+
+	/* register the ab4500 platform devices */
+	platform_add_devices(ab4500_platform_devs,
+			ARRAY_SIZE(ab4500_platform_devs));
+
+	return err;
+
+ not_detect:
+	spi_unregister_driver(&ab4500_driver);
+	kfree(ab4500);
+	return err;
+}
+
+static int __devexit ab4500_remove(struct spi_device *spi)
+{
+	struct ab4500 *ab4500 =
+		spi_get_drvdata(spi);
+
+	kfree(ab4500);
+
+	return 0;
+}
+
+static struct spi_driver ab4500_driver = {
+	.driver = {
+		.name = "ab4500",
+		.owner = THIS_MODULE,
+	},
+	.probe = ab4500_probe,
+	.remove = __devexit_p(ab4500_remove)
+};
+
+static int __devinit ab4500_init(void)
+{
+	return spi_register_driver(&ab4500_driver);
+}
+
+static void __exit ab4500_exit(void)
+{
+	spi_unregister_driver(&ab4500_driver);
+}
+
+subsys_initcall_sync(ab4500_init);
+
+MODULE_AUTHOR("Srinidhi KASAGAR <srinidhi.kasagar@stericsson.com");
+MODULE_DESCRIPTION("AB4500 core driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/ab4500.h b/include/linux/mfd/ab4500.h
new file mode 100644
index 000000000000..a42a7033ae53
--- /dev/null
+++ b/include/linux/mfd/ab4500.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2009 ST-Ericsson
+ *
+ * Author: Srinidhi KASAGAR <srinidhi.kasagar@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * AB4500 device core funtions, for client access
+ */
+#ifndef MFD_AB4500_H
+#define MFD_AB4500_H
+
+#include <linux/device.h>
+
+/*
+ * AB4500 bank addresses
+ */
+#define AB4500_SYS_CTRL1_BLOCK	0x1
+#define AB4500_SYS_CTRL2_BLOCK	0x2
+#define AB4500_REGU_CTRL1	0x3
+#define AB4500_REGU_CTRL2	0x4
+#define AB4500_USB		0x5
+#define AB4500_TVOUT		0x6
+#define AB4500_DBI		0x7
+#define AB4500_ECI_AV_ACC	0x8
+#define AB4500_RESERVED		0x9
+#define AB4500_GPADC		0xA
+#define AB4500_CHARGER		0xB
+#define AB4500_GAS_GAUGE	0xC
+#define AB4500_AUDIO		0xD
+#define AB4500_INTERRUPT	0xE
+#define AB4500_RTC		0xF
+#define AB4500_MISC		0x10
+#define AB4500_DEBUG		0x12
+#define AB4500_PROD_TEST	0x13
+#define AB4500_OTP_EMUL		0x15
+
+/*
+ * System control 1 register offsets.
+ * Bank = 0x01
+ */
+#define AB4500_TURNON_STAT_REG		0x0100
+#define AB4500_RESET_STAT_REG		0x0101
+#define AB4500_PONKEY1_PRESS_STAT_REG	0x0102
+
+#define AB4500_FSM_STAT1_REG		0x0140
+#define AB4500_FSM_STAT2_REG		0x0141
+#define AB4500_SYSCLK_REQ_STAT_REG	0x0142
+#define AB4500_USB_STAT1_REG		0x0143
+#define AB4500_USB_STAT2_REG		0x0144
+#define AB4500_STATUS_SPARE1_REG	0x0145
+#define AB4500_STATUS_SPARE2_REG	0x0146
+
+#define AB4500_CTRL1_REG		0x0180
+#define AB4500_CTRL2_REG		0x0181
+
+/*
+ * System control 2 register offsets.
+ * bank = 0x02
+ */
+#define AB4500_CTRL3_REG		0x0200
+#define AB4500_MAIN_WDOG_CTRL_REG	0x0201
+#define AB4500_MAIN_WDOG_TIMER_REG	0x0202
+#define AB4500_LOW_BAT_REG		0x0203
+#define AB4500_BATT_OK_REG		0x0204
+#define AB4500_SYSCLK_TIMER_REG		0x0205
+#define AB4500_SMPSCLK_CTRL_REG		0x0206
+#define AB4500_SMPSCLK_SEL1_REG		0x0207
+#define AB4500_SMPSCLK_SEL2_REG		0x0208
+#define AB4500_SMPSCLK_SEL3_REG		0x0209
+#define AB4500_SYSULPCLK_CONF_REG	0x020A
+#define AB4500_SYSULPCLK_CTRL1_REG	0x020B
+#define AB4500_SYSCLK_CTRL_REG		0x020C
+#define AB4500_SYSCLK_REQ1_VALID_REG	0x020D
+#define AB4500_SYSCLK_REQ_VALID_REG	0x020E
+#define AB4500_SYSCTRL_SPARE_REG	0x020F
+#define AB4500_PAD_CONF_REG		0x0210
+
+/*
+ * Regu control1 register offsets
+ * Bank = 0x03
+ */
+#define AB4500_REGU_SERIAL_CTRL1_REG	0x0300
+#define AB4500_REGU_SERIAL_CTRL2_REG	0x0301
+#define AB4500_REGU_SERIAL_CTRL3_REG	0x0302
+#define AB4500_REGU_REQ_CTRL1_REG	0x0303
+#define AB4500_REGU_REQ_CTRL2_REG	0x0304
+#define AB4500_REGU_REQ_CTRL3_REG	0x0305
+#define AB4500_REGU_REQ_CTRL4_REG	0x0306
+#define AB4500_REGU_MISC1_REG		0x0380
+#define AB4500_REGU_OTGSUPPLY_CTRL_REG	0x0381
+#define AB4500_REGU_VUSB_CTRL_REG	0x0382
+#define AB4500_REGU_VAUDIO_SUPPLY_REG	0x0383
+#define AB4500_REGU_CTRL1_SPARE_REG	0x0384
+
+/*
+ * Regu control2 Vmod register offsets
+ */
+#define AB4500_REGU_VMOD_REGU_REG	0x0440
+#define AB4500_REGU_VMOD_SEL1_REG	0x0441
+#define AB4500_REGU_VMOD_SEL2_REG	0x0442
+#define AB4500_REGU_CTRL_DISCH_REG	0x0443
+#define AB4500_REGU_CTRL_DISCH2_REG	0x0444
+
+/*
+ * USB/ULPI register offsets
+ * Bank : 0x5
+ */
+#define AB4500_USB_LINE_STAT_REG	0x0580
+#define AB4500_USB_LINE_CTRL1_REG	0x0581
+#define AB4500_USB_LINE_CTRL2_REG	0x0582
+#define AB4500_USB_LINE_CTRL3_REG	0x0583
+#define AB4500_USB_LINE_CTRL4_REG	0x0584
+#define AB4500_USB_LINE_CTRL5_REG	0x0585
+#define AB4500_USB_OTG_CTRL_REG		0x0587
+#define AB4500_USB_OTG_STAT_REG		0x0588
+#define AB4500_USB_OTG_STAT_REG		0x0588
+#define AB4500_USB_CTRL_SPARE_REG	0x0589
+#define AB4500_USB_PHY_CTRL_REG		0x058A
+
+/*
+ * TVOUT / CTRL register offsets
+ * Bank : 0x06
+ */
+#define AB4500_TVOUT_CTRL_REG		0x0680
+
+/*
+ * DBI register offsets
+ * Bank : 0x07
+ */
+#define AB4500_DBI_REG1_REG		0x0700
+#define AB4500_DBI_REG2_REG		0x0701
+
+/*
+ * ECI regsiter offsets
+ * Bank : 0x08
+ */
+#define AB4500_ECI_CTRL_REG		0x0800
+#define AB4500_ECI_HOOKLEVEL_REG	0x0801
+#define AB4500_ECI_DATAOUT_REG		0x0802
+#define AB4500_ECI_DATAIN_REG		0x0803
+
+/*
+ * AV Connector register offsets
+ * Bank : 0x08
+ */
+#define AB4500_AV_CONN_REG		0x0840
+
+/*
+ * Accessory detection register offsets
+ * Bank : 0x08
+ */
+#define AB4500_ACC_DET_DB1_REG		0x0880
+#define AB4500_ACC_DET_DB2_REG		0x0881
+
+/*
+ * GPADC register offsets
+ * Bank : 0x0A
+ */
+#define AB4500_GPADC_CTRL1_REG		0x0A00
+#define AB4500_GPADC_CTRL2_REG		0x0A01
+#define AB4500_GPADC_CTRL3_REG		0x0A02
+#define AB4500_GPADC_AUTO_TIMER_REG	0x0A03
+#define AB4500_GPADC_STAT_REG		0x0A04
+#define AB4500_GPADC_MANDATAL_REG	0x0A05
+#define AB4500_GPADC_MANDATAH_REG	0x0A06
+#define AB4500_GPADC_AUTODATAL_REG	0x0A07
+#define AB4500_GPADC_AUTODATAH_REG	0x0A08
+#define AB4500_GPADC_MUX_CTRL_REG	0x0A09
+
+/*
+ * Charger / status register offfsets
+ * Bank : 0x0B
+ */
+#define AB4500_CH_STATUS1_REG		0x0B00
+#define AB4500_CH_STATUS2_REG		0x0B01
+#define AB4500_CH_USBCH_STAT1_REG	0x0B02
+#define AB4500_CH_USBCH_STAT2_REG	0x0B03
+#define AB4500_CH_FSM_STAT_REG		0x0B04
+#define AB4500_CH_STAT_REG		0x0B05
+
+/*
+ * Charger / control register offfsets
+ * Bank : 0x0B
+ */
+#define AB4500_CH_VOLT_LVL_REG		0x0B40
+
+/*
+ * Charger / main control register offfsets
+ * Bank : 0x0B
+ */
+#define AB4500_MCH_CTRL1		0x0B80
+#define AB4500_MCH_CTRL2		0x0B81
+#define AB4500_MCH_IPT_CURLVL_REG	0x0B82
+#define AB4500_CH_WD_REG		0x0B83
+
+/*
+ * Charger / USB control register offsets
+ * Bank : 0x0B
+ */
+#define AB4500_USBCH_CTRL1_REG		0x0BC0
+#define AB4500_USBCH_CTRL2_REG		0x0BC1
+#define AB4500_USBCH_IPT_CRNTLVL_REG	0x0BC2
+
+/*
+ * RTC bank register offsets
+ * Bank : 0xF
+ */
+#define AB4500_RTC_SOFF_STAT_REG	0x0F00
+#define AB4500_RTC_CC_CONF_REG		0x0F01
+#define AB4500_RTC_READ_REQ_REG		0x0F02
+#define AB4500_RTC_WATCH_TSECMID_REG	0x0F03
+#define AB4500_RTC_WATCH_TSECHI_REG	0x0F04
+#define AB4500_RTC_WATCH_TMIN_LOW_REG	0x0F05
+#define AB4500_RTC_WATCH_TMIN_MID_REG	0x0F06
+#define AB4500_RTC_WATCH_TMIN_HI_REG	0x0F07
+#define AB4500_RTC_ALRM_MIN_LOW_REG	0x0F08
+#define AB4500_RTC_ALRM_MIN_MID_REG	0x0F09
+#define AB4500_RTC_ALRM_MIN_HI_REG	0x0F0A
+#define AB4500_RTC_STAT_REG		0x0F0B
+#define AB4500_RTC_BKUP_CHG_REG		0x0F0C
+#define AB4500_RTC_FORCE_BKUP_REG	0x0F0D
+#define AB4500_RTC_CALIB_REG		0x0F0E
+#define AB4500_RTC_SWITCH_STAT_REG	0x0F0F
+
+/*
+ * PWM Out generators
+ * Bank: 0x10
+ */
+#define AB4500_PWM_OUT_CTRL1_REG	0x1060
+#define AB4500_PWM_OUT_CTRL2_REG	0x1061
+#define AB4500_PWM_OUT_CTRL3_REG	0x1062
+#define AB4500_PWM_OUT_CTRL4_REG	0x1063
+#define AB4500_PWM_OUT_CTRL5_REG	0x1064
+#define AB4500_PWM_OUT_CTRL6_REG	0x1065
+#define AB4500_PWM_OUT_CTRL7_REG	0x1066
+
+#define AB4500_I2C_PAD_CTRL_REG		0x1067
+#define AB4500_REV_REG			0x1080
+
+/**
+ * struct ab4500
+ * @spi: spi device structure
+ * @tx_buf: transmit buffer
+ * @rx_buf: receive buffer
+ * @lock: sync primitive
+ */
+struct ab4500 {
+	struct spi_device	*spi;
+	unsigned long		tx_buf[4];
+	unsigned long		rx_buf[4];
+	struct mutex		lock;
+};
+
+int ab4500_write(struct ab4500 *ab4500, unsigned char block,
+		unsigned long addr, unsigned char data);
+int ab4500_read(struct ab4500 *ab4500, unsigned char block,
+		unsigned long addr);
+
+#endif /* MFD_AB4500_H */
-- 
cgit v1.2.3


From 6f2ecaae72910211034c4f1955da97b2ff994265 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Oct 2009 15:41:05 +0100
Subject: gpiolib: Make WM831x GPIO count dynamic

This supports future devices with fewer GPIOs.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/wm831x-gpio.c      | 4 +---
 drivers/mfd/wm831x-core.c       | 3 +++
 include/linux/mfd/wm831x/core.h | 2 ++
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
index f9c09a54ec7f..f5e4934f1da1 100644
--- a/drivers/gpio/wm831x-gpio.c
+++ b/drivers/gpio/wm831x-gpio.c
@@ -23,8 +23,6 @@
 #include <linux/mfd/wm831x/pdata.h>
 #include <linux/mfd/wm831x/gpio.h>
 
-#define WM831X_GPIO_MAX 16
-
 struct wm831x_gpio {
 	struct wm831x *wm831x;
 	struct gpio_chip gpio_chip;
@@ -192,7 +190,7 @@ static int __devinit wm831x_gpio_probe(struct platform_device *pdev)
 
 	wm831x_gpio->wm831x = wm831x;
 	wm831x_gpio->gpio_chip = template_chip;
-	wm831x_gpio->gpio_chip.ngpio = WM831X_GPIO_MAX;
+	wm831x_gpio->gpio_chip.ngpio = wm831x->num_gpio;
 	wm831x_gpio->gpio_chip.dev = &pdev->dev;
 	if (pdata && pdata->gpio_base)
 		wm831x_gpio->gpio_chip.base = pdata->gpio_base;
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 8504c6ef4a16..8d386c0c8027 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1293,16 +1293,19 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	switch (ret) {
 	case WM8310:
 		parent = WM8310;
+		wm831x->num_gpio = 16;
 		dev_info(wm831x->dev, "WM8310 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8311:
 		parent = WM8311;
+		wm831x->num_gpio = 16;
 		dev_info(wm831x->dev, "WM8311 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8312:
 		parent = WM8312;
+		wm831x->num_gpio = 16;
 		dev_info(wm831x->dev, "WM8312 revision %c\n", 'A' + rev);
 		break;
 
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 91eb493bf14c..c1bc59f6cbf2 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -253,6 +253,8 @@ struct wm831x {
 	unsigned int irq_base;
 	int irq_masks[5];
 
+	int num_gpio;
+
 	struct mutex auxadc_lock;
 
 	/* The WM831x has a security key blocking access to certain
-- 
cgit v1.2.3


From d4e0a89e3d170affa896efcdb4320e38a2f3a546 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Oct 2009 15:41:07 +0100
Subject: mfd: Add support for WM8320 PMICs

The WM8320 is an integrated power management subsystem providing
voltage regulators, RTC, watchdog and other functionality. The
WM8320 is derived from the WM831x and therefore shares most of
the driver code with the WM831x.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |   6 +-
 drivers/mfd/wm831x-core.c       | 159 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |   1 +
 3 files changed, 163 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 2aea25ba7ba0..b0b3e42e7ee6 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -185,12 +185,12 @@ config MFD_WM8400
 	  the functionality of the device.
 
 config MFD_WM831X
-	tristate "Support Wolfson Microelectronics WM831x PMICs"
+	tristate "Support Wolfson Microelectronics WM831x/2x PMICs"
 	select MFD_CORE
 	depends on I2C
 	help
-	  Support for the Wolfson Microelecronics WM831x PMICs.  This
-	  driver provides common support for accessing the device,
+	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs.
+	  This driver provides common support for accessing the device,
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 8d386c0c8027..163029f06185 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -93,6 +93,7 @@ enum wm831x_parent {
 	WM8310 = 0x8310,
 	WM8311 = 0x8311,
 	WM8312 = 0x8312,
+	WM8320 = 0x8320,
 };
 
 static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg)
@@ -478,6 +479,20 @@ static struct resource wm831x_dcdc4_resources[] = {
 	},
 };
 
+static struct resource wm8320_dcdc4_buck_resources[] = {
+	{
+		.start = WM831X_DC4_CONTROL,
+		.end   = WM832X_DC4_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC4,
+		.end   = WM831X_IRQ_UV_DC4,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 static struct resource wm831x_gpio_resources[] = {
 	{
 		.start = WM831X_IRQ_GPIO_1,
@@ -1237,6 +1252,137 @@ static struct mfd_cell wm8312_devs[] = {
 	},
 };
 
+static struct mfd_cell wm8320_devs[] = {
+	{
+		.name = "wm831x-backup",
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm8320_dcdc4_buck_resources),
+		.resources = wm8320_dcdc4_buck_resources,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 6,
+		.num_resources = ARRAY_SIZE(wm831x_ldo6_resources),
+		.resources = wm831x_ldo6_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 8,
+		.num_resources = ARRAY_SIZE(wm831x_ldo8_resources),
+		.resources = wm831x_ldo8_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 9,
+		.num_resources = ARRAY_SIZE(wm831x_ldo9_resources),
+		.resources = wm831x_ldo9_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 10,
+		.num_resources = ARRAY_SIZE(wm831x_ldo10_resources),
+		.resources = wm831x_ldo10_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
 static struct mfd_cell backlight_devs[] = {
 	{
 		.name = "wm831x-backlight",
@@ -1309,6 +1455,12 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		dev_info(wm831x->dev, "WM8312 revision %c\n", 'A' + rev);
 		break;
 
+	case WM8320:
+		parent = WM8320;
+		wm831x->num_gpio = 12;
+		dev_info(wm831x->dev, "WM8320 revision %c\n", 'A' + rev);
+		break;
+
 	default:
 		dev_err(wm831x->dev, "Unknown WM831x device %04x\n", ret);
 		ret = -EINVAL;
@@ -1367,6 +1519,12 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 				      NULL, 0);
 		break;
 
+	case WM8320:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8320_devs, ARRAY_SIZE(wm8320_devs),
+				      NULL, 0);
+		break;
+
 	default:
 		/* If this happens the bus probe function is buggy */
 		BUG();
@@ -1492,6 +1650,7 @@ static const struct i2c_device_id wm831x_i2c_id[] = {
 	{ "wm8310", WM8310 },
 	{ "wm8311", WM8311 },
 	{ "wm8312", WM8312 },
+	{ "wm8320", WM8320 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id);
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index c1bc59f6cbf2..d01d293a6b25 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -117,6 +117,7 @@
 #define WM831X_DC3_SLEEP_CONTROL                0x4063
 #define WM831X_DC4_CONTROL                      0x4064
 #define WM831X_DC4_SLEEP_CONTROL                0x4065
+#define WM832X_DC4_SLEEP_CONTROL                0x4067
 #define WM831X_EPE1_CONTROL                     0x4066
 #define WM831X_EPE2_CONTROL                     0x4067
 #define WM831X_LDO1_CONTROL                     0x4068
-- 
cgit v1.2.3


From a5736e0b62fcb7c1b20892c62e1c5fe5e9387c86 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 12 Oct 2009 17:22:38 +0200
Subject: mfd: Add ADP5520/ADP5501 driver

Base driver for Analog Devices ADP5520/ADP5501 MFD PMICs

Subdevs:
LCD Backlight   : drivers/video/backlight/adp5520_bl.c
LEDs            : drivers/led/leds-adp5520.c
GPIO            : drivers/gpio/adp5520-gpio.c (ADP5520 only)
Keys            : drivers/input/keyboard/adp5520-keys.c (ADP5520 only)

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |  10 ++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/adp5520.c       | 378 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/adp5520.h | 299 +++++++++++++++++++++++++++++++++++
 4 files changed, 688 insertions(+)
 create mode 100644 drivers/mfd/adp5520.c
 create mode 100644 include/linux/mfd/adp5520.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 7df3bcf79490..b1d537053faf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -174,6 +174,16 @@ config PMIC_DA903X
 	  individual components like LCD backlight, voltage regulators,
 	  LEDs and battery-charger under the corresponding menus.
 
+config PMIC_ADP5520
+	bool "Analog Devices ADP5520/01 MFD PMIC Core Support"
+	depends on I2C=y
+	help
+	  Say yes here to add support for Analog Devices AD5520 and ADP5501,
+	  Multifunction Power Management IC. This includes
+	  the I2C driver and the core APIs _only_, you have to select
+	  individual components like LCD backlight, LEDs, GPIOs and Kepad
+	  under the corresponding menus.
+
 config MFD_WM8400
 	tristate "Support Wolfson Microelectronics WM8400"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1792b1ff8ffc..24558574a737 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -54,3 +54,4 @@ obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
 obj-$(CONFIG_AB4500_CORE)	+= ab4500-core.o
 obj-$(CONFIG_MFD_88PM8607)	+= 88pm8607.o
+obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
\ No newline at end of file
diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c
new file mode 100644
index 000000000000..401a9b6029e3
--- /dev/null
+++ b/drivers/mfd/adp5520.c
@@ -0,0 +1,378 @@
+/*
+ * Base driver for Analog Devices ADP5520/ADP5501 MFD PMICs
+ * LCD Backlight: drivers/video/backlight/adp5520_bl
+ * LEDs		: drivers/led/leds-adp5520
+ * GPIO		: drivers/gpio/adp5520-gpio (ADP5520 only)
+ * Keys		: drivers/input/keyboard/adp5520-keys (ADP5520 only)
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Derived from da903x:
+ * Copyright (C) 2008 Compulab, Ltd.
+ * 	Mike Rapoport <mike@compulab.co.il>
+ *
+ * Copyright (C) 2006-2008 Marvell International Ltd.
+ * 	Eric Miao <eric.miao@marvell.com>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+
+#include <linux/mfd/adp5520.h>
+
+struct adp5520_chip {
+	struct i2c_client *client;
+	struct device *dev;
+	struct mutex lock;
+	struct blocking_notifier_head notifier_list;
+	int irq;
+	unsigned long id;
+};
+
+static int __adp5520_read(struct i2c_client *client,
+				int reg, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading at 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static int __adp5520_write(struct i2c_client *client,
+				 int reg, uint8_t val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing 0x%02x to 0x%02x\n",
+				val, reg);
+		return ret;
+	}
+	return 0;
+}
+
+int __adp5520_ack_bits(struct i2c_client *client, int reg, uint8_t bit_mask)
+{
+	struct adp5520_chip *chip = i2c_get_clientdata(client);
+	uint8_t reg_val;
+	int ret;
+
+	mutex_lock(&chip->lock);
+
+	ret = __adp5520_read(client, reg, &reg_val);
+
+	if (!ret) {
+		reg_val |= bit_mask;
+		ret = __adp5520_write(client, reg, reg_val);
+	}
+
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+
+int adp5520_write(struct device *dev, int reg, uint8_t val)
+{
+	return __adp5520_write(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(adp5520_write);
+
+int adp5520_read(struct device *dev, int reg, uint8_t *val)
+{
+	return __adp5520_read(to_i2c_client(dev), reg, val);
+}
+EXPORT_SYMBOL_GPL(adp5520_read);
+
+int adp5520_set_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret;
+
+	mutex_lock(&chip->lock);
+
+	ret = __adp5520_read(chip->client, reg, &reg_val);
+
+	if (!ret && ((reg_val & bit_mask) == 0)) {
+		reg_val |= bit_mask;
+		ret = __adp5520_write(chip->client, reg, reg_val);
+	}
+
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adp5520_set_bits);
+
+int adp5520_clr_bits(struct device *dev, int reg, uint8_t bit_mask)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(dev);
+	uint8_t reg_val;
+	int ret;
+
+	mutex_lock(&chip->lock);
+
+	ret = __adp5520_read(chip->client, reg, &reg_val);
+
+	if (!ret && (reg_val & bit_mask)) {
+		reg_val &= ~bit_mask;
+		ret = __adp5520_write(chip->client, reg, reg_val);
+	}
+
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adp5520_clr_bits);
+
+int adp5520_register_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(dev);
+
+	if (chip->irq) {
+		adp5520_set_bits(chip->dev, ADP5520_INTERRUPT_ENABLE,
+			events & (ADP5520_KP_IEN | ADP5520_KR_IEN |
+			ADP5520_OVP_IEN | ADP5520_CMPR_IEN));
+
+		return blocking_notifier_chain_register(&chip->notifier_list,
+			 nb);
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(adp5520_register_notifier);
+
+int adp5520_unregister_notifier(struct device *dev, struct notifier_block *nb,
+				unsigned int events)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(dev);
+
+	adp5520_clr_bits(chip->dev, ADP5520_INTERRUPT_ENABLE,
+		events & (ADP5520_KP_IEN | ADP5520_KR_IEN |
+		ADP5520_OVP_IEN | ADP5520_CMPR_IEN));
+
+	return blocking_notifier_chain_unregister(&chip->notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(adp5520_unregister_notifier);
+
+static irqreturn_t adp5520_irq_thread(int irq, void *data)
+{
+	struct adp5520_chip *chip = data;
+	unsigned int events;
+	uint8_t reg_val;
+	int ret;
+
+	ret = __adp5520_read(chip->client, ADP5520_MODE_STATUS, &reg_val);
+	if (ret)
+		goto out;
+
+	events =  reg_val & (ADP5520_OVP_INT | ADP5520_CMPR_INT |
+		ADP5520_GPI_INT | ADP5520_KR_INT | ADP5520_KP_INT);
+
+	blocking_notifier_call_chain(&chip->notifier_list, events, NULL);
+	/* ACK, Sticky bits are W1C */
+	__adp5520_ack_bits(chip->client, ADP5520_MODE_STATUS, events);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int __remove_subdev(struct device *dev, void *unused)
+{
+	platform_device_unregister(to_platform_device(dev));
+	return 0;
+}
+
+static int adp5520_remove_subdevs(struct adp5520_chip *chip)
+{
+	return device_for_each_child(chip->dev, NULL, __remove_subdev);
+}
+
+static int __devinit adp5520_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5520_platform_data *pdata = client->dev.platform_data;
+	struct platform_device *pdev;
+	struct adp5520_chip *chip;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Word Data not Supported\n");
+		return -EIO;
+	}
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client = client;
+
+	chip->dev = &client->dev;
+	chip->irq = client->irq;
+	chip->id = id->driver_data;
+	mutex_init(&chip->lock);
+
+	if (chip->irq) {
+		BLOCKING_INIT_NOTIFIER_HEAD(&chip->notifier_list);
+
+		ret = request_threaded_irq(chip->irq, NULL, adp5520_irq_thread,
+				IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				"adp5520", chip);
+		if (ret) {
+			dev_err(&client->dev, "failed to request irq %d\n",
+					chip->irq);
+			goto out_free_chip;
+		}
+	}
+
+	ret = adp5520_write(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY);
+	if (ret) {
+		dev_err(&client->dev, "failed to write\n");
+		goto out_free_irq;
+	}
+
+	if (pdata->keys) {
+		pdev = platform_device_register_data(chip->dev, "adp5520-keys",
+				chip->id, pdata->keys, sizeof(*pdata->keys));
+		if (IS_ERR(pdev)) {
+			ret = PTR_ERR(pdev);
+			goto out_remove_subdevs;
+		}
+	}
+
+	if (pdata->gpio) {
+		pdev = platform_device_register_data(chip->dev, "adp5520-gpio",
+				chip->id, pdata->gpio, sizeof(*pdata->gpio));
+		if (IS_ERR(pdev)) {
+			ret = PTR_ERR(pdev);
+			goto out_remove_subdevs;
+		}
+	}
+
+	if (pdata->leds) {
+		pdev = platform_device_register_data(chip->dev, "adp5520-led",
+				chip->id, pdata->leds, sizeof(*pdata->leds));
+		if (IS_ERR(pdev)) {
+			ret = PTR_ERR(pdev);
+			goto out_remove_subdevs;
+		}
+	}
+
+	if (pdata->backlight) {
+		pdev = platform_device_register_data(chip->dev,
+						"adp5520-backlight",
+						chip->id,
+						pdata->backlight,
+						sizeof(*pdata->backlight));
+		if (IS_ERR(pdev)) {
+			ret = PTR_ERR(pdev);
+			goto out_remove_subdevs;
+		}
+	}
+
+	return 0;
+
+out_remove_subdevs:
+	adp5520_remove_subdevs(chip);
+
+out_free_irq:
+	if (chip->irq)
+		free_irq(chip->irq, chip);
+
+out_free_chip:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+
+	return ret;
+}
+
+static int __devexit adp5520_remove(struct i2c_client *client)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
+
+	if (chip->irq)
+		free_irq(chip->irq, chip);
+
+	adp5520_remove_subdevs(chip);
+	adp5520_write(chip->dev, ADP5520_MODE_STATUS, 0);
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5520_suspend(struct i2c_client *client,
+				 pm_message_t state)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
+
+	adp5520_clr_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY);
+	return 0;
+}
+
+static int adp5520_resume(struct i2c_client *client)
+{
+	struct adp5520_chip *chip = dev_get_drvdata(&client->dev);
+
+	adp5520_set_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY);
+	return 0;
+}
+#else
+#define adp5520_suspend	NULL
+#define adp5520_resume	NULL
+#endif
+
+static const struct i2c_device_id adp5520_id[] = {
+	{ "pmic-adp5520", ID_ADP5520 },
+	{ "pmic-adp5501", ID_ADP5501 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adp5520_id);
+
+static struct i2c_driver adp5520_driver = {
+	.driver = {
+		.name	= "adp5520",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= adp5520_probe,
+	.remove		= __devexit_p(adp5520_remove),
+	.suspend	= adp5520_suspend,
+	.resume		= adp5520_resume,
+	.id_table 	= adp5520_id,
+};
+
+static int __init adp5520_init(void)
+{
+	return i2c_add_driver(&adp5520_driver);
+}
+module_init(adp5520_init);
+
+static void __exit adp5520_exit(void)
+{
+	i2c_del_driver(&adp5520_driver);
+}
+module_exit(adp5520_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5520(01) PMIC-MFD Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/adp5520.h b/include/linux/mfd/adp5520.h
new file mode 100644
index 000000000000..ac37558a4673
--- /dev/null
+++ b/include/linux/mfd/adp5520.h
@@ -0,0 +1,299 @@
+/*
+ * Definitions and platform data for Analog Devices
+ * ADP5520/ADP5501 MFD PMICs (Backlight, LED, GPIO and Keys)
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+
+#ifndef __LINUX_MFD_ADP5520_H
+#define __LINUX_MFD_ADP5520_H
+
+#define ID_ADP5520		5520
+#define ID_ADP5501		5501
+
+/*
+ * ADP5520/ADP5501 Register Map
+ */
+
+#define ADP5520_MODE_STATUS 		0x00
+#define ADP5520_INTERRUPT_ENABLE 	0x01
+#define ADP5520_BL_CONTROL 		0x02
+#define ADP5520_BL_TIME 		0x03
+#define ADP5520_BL_FADE 		0x04
+#define ADP5520_DAYLIGHT_MAX 		0x05
+#define ADP5520_DAYLIGHT_DIM 		0x06
+#define ADP5520_OFFICE_MAX 		0x07
+#define ADP5520_OFFICE_DIM 		0x08
+#define ADP5520_DARK_MAX 		0x09
+#define ADP5520_DARK_DIM 		0x0A
+#define ADP5520_BL_VALUE 		0x0B
+#define ADP5520_ALS_CMPR_CFG 		0x0C
+#define ADP5520_L2_TRIP 		0x0D
+#define ADP5520_L2_HYS 			0x0E
+#define ADP5520_L3_TRIP 		0x0F
+#define ADP5520_L3_HYS 			0x10
+#define ADP5520_LED_CONTROL 		0x11
+#define ADP5520_LED_TIME 		0x12
+#define ADP5520_LED_FADE 		0x13
+#define ADP5520_LED1_CURRENT 		0x14
+#define ADP5520_LED2_CURRENT 		0x15
+#define ADP5520_LED3_CURRENT 		0x16
+
+/*
+ * ADP5520 Register Map
+ */
+
+#define ADP5520_GPIO_CFG_1 		0x17
+#define ADP5520_GPIO_CFG_2 		0x18
+#define ADP5520_GPIO_IN 		0x19
+#define ADP5520_GPIO_OUT 		0x1A
+#define ADP5520_GPIO_INT_EN 		0x1B
+#define ADP5520_GPIO_INT_STAT 		0x1C
+#define ADP5520_GPIO_INT_LVL 		0x1D
+#define ADP5520_GPIO_DEBOUNCE 		0x1E
+#define ADP5520_GPIO_PULLUP 		0x1F
+#define ADP5520_KP_INT_STAT_1 		0x20
+#define ADP5520_KP_INT_STAT_2 		0x21
+#define ADP5520_KR_INT_STAT_1 		0x22
+#define ADP5520_KR_INT_STAT_2 		0x23
+#define ADP5520_KEY_STAT_1 		0x24
+#define ADP5520_KEY_STAT_2 		0x25
+
+/*
+ * MODE_STATUS bits
+ */
+
+#define ADP5520_nSTNBY		(1 << 7)
+#define ADP5520_BL_EN           (1 << 6)
+#define ADP5520_DIM_EN          (1 << 5)
+#define ADP5520_OVP_INT         (1 << 4)
+#define ADP5520_CMPR_INT        (1 << 3)
+#define ADP5520_GPI_INT         (1 << 2)
+#define ADP5520_KR_INT          (1 << 1)
+#define ADP5520_KP_INT          (1 << 0)
+
+/*
+ * INTERRUPT_ENABLE bits
+ */
+
+#define ADP5520_AUTO_LD_EN      (1 << 4)
+#define ADP5520_CMPR_IEN        (1 << 3)
+#define ADP5520_OVP_IEN         (1 << 2)
+#define ADP5520_KR_IEN          (1 << 1)
+#define ADP5520_KP_IEN          (1 << 0)
+
+/*
+ * BL_CONTROL bits
+ */
+
+#define ADP5520_BL_LVL          ((x) << 5)
+#define ADP5520_BL_LAW          ((x) << 4)
+#define ADP5520_BL_AUTO_ADJ     (1 << 3)
+#define ADP5520_OVP_EN          (1 << 2)
+#define ADP5520_FOVR            (1 << 1)
+#define ADP5520_KP_BL_EN        (1 << 0)
+
+/*
+ * ALS_CMPR_CFG bits
+ */
+
+#define ADP5520_L3_OUT		(1 << 3)
+#define ADP5520_L2_OUT		(1 << 2)
+#define ADP5520_L3_EN		(1 << 1)
+
+#define ADP5020_MAX_BRIGHTNESS	0x7F
+
+#define FADE_VAL(in, out)	((0xF & (in)) | ((0xF & (out)) << 4))
+#define BL_CTRL_VAL(law, auto)	(((1 & (auto)) << 3) | ((0x3 & (law)) << 4))
+#define ALS_CMPR_CFG_VAL(filt, l3_en)	(((0x7 & filt) << 5) | l3_en)
+
+/*
+ * LEDs subdevice bits and masks
+ */
+
+#define ADP5520_01_MAXLEDS 3
+
+#define ADP5520_FLAG_LED_MASK 		0x3
+#define ADP5520_FLAG_OFFT_SHIFT 	8
+#define ADP5520_FLAG_OFFT_MASK 		0x3
+
+#define ADP5520_R3_MODE		(1 << 5)
+#define ADP5520_C3_MODE		(1 << 4)
+#define ADP5520_LED_LAW		(1 << 3)
+#define ADP5520_LED3_EN		(1 << 2)
+#define ADP5520_LED2_EN		(1 << 1)
+#define ADP5520_LED1_EN		(1 << 0)
+
+/*
+ * GPIO subdevice bits and masks
+ */
+
+#define ADP5520_MAXGPIOS	8
+
+#define ADP5520_GPIO_C3		(1 << 7)	/* LED2 or GPIO7 aka C3 */
+#define ADP5520_GPIO_C2		(1 << 6)
+#define ADP5520_GPIO_C1		(1 << 5)
+#define ADP5520_GPIO_C0		(1 << 4)
+#define ADP5520_GPIO_R3		(1 << 3)	/* LED3 or GPIO3 aka R3 */
+#define ADP5520_GPIO_R2		(1 << 2)
+#define ADP5520_GPIO_R1		(1 << 1)
+#define ADP5520_GPIO_R0		(1 << 0)
+
+struct adp5520_gpio_platform_data {
+	unsigned gpio_start;
+	u8 gpio_en_mask;
+	u8 gpio_pullup_mask;
+};
+
+/*
+ * Keypad subdevice bits and masks
+ */
+
+#define ADP5520_MAXKEYS	16
+
+#define ADP5520_COL_C3 		(1 << 7)	/* LED2 or GPIO7 aka C3 */
+#define ADP5520_COL_C2		(1 << 6)
+#define ADP5520_COL_C1		(1 << 5)
+#define ADP5520_COL_C0		(1 << 4)
+#define ADP5520_ROW_R3		(1 << 3)	/* LED3 or GPIO3 aka R3 */
+#define ADP5520_ROW_R2		(1 << 2)
+#define ADP5520_ROW_R1		(1 << 1)
+#define ADP5520_ROW_R0		(1 << 0)
+
+#define ADP5520_KEY(row, col) (col + row * 4)
+#define ADP5520_KEYMAPSIZE	ADP5520_MAXKEYS
+
+struct adp5520_keys_platform_data {
+	int rows_en_mask;		/* Number of rows */
+	int cols_en_mask;		/* Number of columns */
+	const unsigned short *keymap;	/* Pointer to keymap */
+	unsigned short keymapsize;	/* Keymap size */
+	unsigned repeat:1;		/* Enable key repeat */
+};
+
+
+/*
+ * LEDs subdevice platform data
+ */
+
+#define FLAG_ID_ADP5520_LED1_ADP5501_LED0 	1	/* ADP5520 PIN ILED */
+#define FLAG_ID_ADP5520_LED2_ADP5501_LED1 	2	/* ADP5520 PIN C3 */
+#define FLAG_ID_ADP5520_LED3_ADP5501_LED2 	3	/* ADP5520 PIN R3 */
+
+#define ADP5520_LED_DIS_BLINK	(0 << ADP5520_FLAG_OFFT_SHIFT)
+#define ADP5520_LED_OFFT_600ms	(1 << ADP5520_FLAG_OFFT_SHIFT)
+#define ADP5520_LED_OFFT_800ms	(2 << ADP5520_FLAG_OFFT_SHIFT)
+#define ADP5520_LED_OFFT_1200ms	(3 << ADP5520_FLAG_OFFT_SHIFT)
+
+#define ADP5520_LED_ONT_200ms	0
+#define ADP5520_LED_ONT_600ms	1
+#define ADP5520_LED_ONT_800ms	2
+#define ADP5520_LED_ONT_1200ms	3
+
+struct adp5520_leds_platform_data {
+	int num_leds;
+	struct led_info	*leds;
+	u8 fade_in;		/* Backlight Fade-In Timer */
+	u8 fade_out;		/* Backlight Fade-Out Timer */
+	u8 led_on_time;
+};
+
+/*
+ * Backlight subdevice platform data
+ */
+
+#define ADP5520_FADE_T_DIS	0	/* Fade Timer Disabled */
+#define ADP5520_FADE_T_300ms	1	/* 0.3 Sec */
+#define ADP5520_FADE_T_600ms	2
+#define ADP5520_FADE_T_900ms	3
+#define ADP5520_FADE_T_1200ms	4
+#define ADP5520_FADE_T_1500ms	5
+#define ADP5520_FADE_T_1800ms	6
+#define ADP5520_FADE_T_2100ms	7
+#define ADP5520_FADE_T_2400ms	8
+#define ADP5520_FADE_T_2700ms	9
+#define ADP5520_FADE_T_3000ms	10
+#define ADP5520_FADE_T_3500ms	11
+#define ADP5520_FADE_T_4000ms	12
+#define ADP5520_FADE_T_4500ms	13
+#define ADP5520_FADE_T_5000ms	14
+#define ADP5520_FADE_T_5500ms	15	/* 5.5 Sec */
+
+#define ADP5520_BL_LAW_LINEAR 	0
+#define ADP5520_BL_LAW_SQUARE 	1
+#define ADP5520_BL_LAW_CUBIC1 	2
+#define ADP5520_BL_LAW_CUBIC2 	3
+
+#define ADP5520_BL_AMBL_FILT_80ms 	0	/* Light sensor filter time */
+#define ADP5520_BL_AMBL_FILT_160ms 	1
+#define ADP5520_BL_AMBL_FILT_320ms 	2
+#define ADP5520_BL_AMBL_FILT_640ms 	3
+#define ADP5520_BL_AMBL_FILT_1280ms 	4
+#define ADP5520_BL_AMBL_FILT_2560ms 	5
+#define ADP5520_BL_AMBL_FILT_5120ms 	6
+#define ADP5520_BL_AMBL_FILT_10240ms 	7	/* 10.24 sec */
+
+	/*
+	 * Blacklight current 0..30mA
+	 */
+#define ADP5520_BL_CUR_mA(I)		((I * 127) / 30)
+
+	/*
+	 * L2 comparator current 0..1000uA
+	 */
+#define ADP5520_L2_COMP_CURR_uA(I)	((I * 255) / 1000)
+
+	/*
+	 * L3 comparator current 0..127uA
+	 */
+#define ADP5520_L3_COMP_CURR_uA(I)	((I * 255) / 127)
+
+struct adp5520_backlight_platform_data {
+	u8 fade_in;		/* Backlight Fade-In Timer */
+	u8 fade_out;		/* Backlight Fade-Out Timer */
+	u8 fade_led_law;	/* fade-on/fade-off transfer characteristic */
+
+	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
+	u8 abml_filt;		/* Light sensor filter time */
+	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1000 uA */
+	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1000 uA */
+	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 127 uA */
+	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 127 uA */
+};
+
+/*
+ * MFD chip platform data
+ */
+
+struct adp5520_platform_data {
+	struct adp5520_keys_platform_data *keys;
+	struct adp5520_gpio_platform_data *gpio;
+	struct adp5520_leds_platform_data *leds;
+	struct adp5520_backlight_platform_data *backlight;
+};
+
+/*
+ * MFD chip functions
+ */
+
+extern int adp5520_read(struct device *dev, int reg, uint8_t *val);
+extern int adp5520_write(struct device *dev, int reg, u8 val);
+extern int adp5520_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
+extern int adp5520_set_bits(struct device *dev, int reg, uint8_t bit_mask);
+
+extern int adp5520_register_notifier(struct device *dev,
+		 struct notifier_block *nb, unsigned int events);
+
+extern int adp5520_unregister_notifier(struct device *dev,
+		struct notifier_block *nb, unsigned int events);
+
+#endif /* __LINUX_MFD_ADP5520_H */
-- 
cgit v1.2.3


From e0a3389ab9cb08813bf325616249abb29c4d2302 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 12 Oct 2009 16:15:09 +0100
Subject: mfd: Split wm8350 IRQ code into a separate file

In preparation for refactoring - it's over 700 lines of well-isolated
code and having it in a file by itself makes things more managable.

While we're at it make sure that we clean up the IRQ if we fail after
acquiring it on init.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/wm8350-core.c       | 769 +-------------------------------------
 drivers/mfd/wm8350-irq.c        | 804 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h |   4 +-
 4 files changed, 815 insertions(+), 763 deletions(-)
 create mode 100644 drivers/mfd/wm8350-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 24558574a737..75638ca8288b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
 obj-$(CONFIG_MFD_WM831X)	+= wm831x.o
 wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
+wm8350-objs			+= wm8350-irq.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index ba27c9dc1ad3..242795feb90d 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -337,733 +337,6 @@ int wm8350_reg_unlock(struct wm8350 *wm8350)
 }
 EXPORT_SYMBOL_GPL(wm8350_reg_unlock);
 
-static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
-{
-	mutex_lock(&wm8350->irq_mutex);
-
-	if (wm8350->irq[irq].handler)
-		wm8350->irq[irq].handler(wm8350, irq, wm8350->irq[irq].data);
-	else {
-		dev_err(wm8350->dev, "irq %d nobody cared. now masked.\n",
-			irq);
-		wm8350_mask_irq(wm8350, irq);
-	}
-
-	mutex_unlock(&wm8350->irq_mutex);
-}
-
-/*
- * This is a threaded IRQ handler so can access I2C/SPI.  Since all
- * interrupts are clear on read the IRQ line will be reasserted and
- * the physical IRQ will be handled again if another interrupt is
- * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.
- */
-static irqreturn_t wm8350_irq(int irq, void *data)
-{
-	struct wm8350 *wm8350 = data;
-	u16 level_one, status1, status2, comp;
-
-	/* TODO: Use block reads to improve performance? */
-	level_one = wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS)
-		& ~wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK);
-	status1 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_1)
-		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_1_MASK);
-	status2 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_2)
-		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_2_MASK);
-	comp = wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS)
-		& ~wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK);
-
-	/* over current */
-	if (level_one & WM8350_OC_INT) {
-		u16 oc;
-
-		oc = wm8350_reg_read(wm8350, WM8350_OVER_CURRENT_INT_STATUS);
-		oc &= ~wm8350_reg_read(wm8350,
-				       WM8350_OVER_CURRENT_INT_STATUS_MASK);
-
-		if (oc & WM8350_OC_LS_EINT)	/* limit switch */
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_OC_LS);
-	}
-
-	/* under voltage */
-	if (level_one & WM8350_UV_INT) {
-		u16 uv;
-
-		uv = wm8350_reg_read(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS);
-		uv &= ~wm8350_reg_read(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK);
-
-		if (uv & WM8350_UV_DC1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC1);
-		if (uv & WM8350_UV_DC2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC2);
-		if (uv & WM8350_UV_DC3_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC3);
-		if (uv & WM8350_UV_DC4_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC4);
-		if (uv & WM8350_UV_DC5_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC5);
-		if (uv & WM8350_UV_DC6_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC6);
-		if (uv & WM8350_UV_LDO1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO1);
-		if (uv & WM8350_UV_LDO2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO2);
-		if (uv & WM8350_UV_LDO3_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO3);
-		if (uv & WM8350_UV_LDO4_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO4);
-	}
-
-	/* charger, RTC */
-	if (status1) {
-		if (status1 & WM8350_CHG_BAT_HOT_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_HOT);
-		if (status1 & WM8350_CHG_BAT_COLD_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_COLD);
-		if (status1 & WM8350_CHG_BAT_FAIL_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_FAIL);
-		if (status1 & WM8350_CHG_TO_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_TO);
-		if (status1 & WM8350_CHG_END_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_END);
-		if (status1 & WM8350_CHG_START_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_START);
-		if (status1 & WM8350_CHG_FAST_RDY_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_FAST_RDY);
-		if (status1 & WM8350_CHG_VBATT_LT_3P9_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_3P9);
-		if (status1 & WM8350_CHG_VBATT_LT_3P1_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_3P1);
-		if (status1 & WM8350_CHG_VBATT_LT_2P85_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_2P85);
-		if (status1 & WM8350_RTC_ALM_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_ALM);
-		if (status1 & WM8350_RTC_SEC_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_SEC);
-		if (status1 & WM8350_RTC_PER_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_PER);
-	}
-
-	/* current sink, system, aux adc */
-	if (status2) {
-		if (status2 & WM8350_CS1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS1);
-		if (status2 & WM8350_CS2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS2);
-
-		if (status2 & WM8350_SYS_HYST_COMP_FAIL_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_HYST_COMP_FAIL);
-		if (status2 & WM8350_SYS_CHIP_GT115_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_CHIP_GT115);
-		if (status2 & WM8350_SYS_CHIP_GT140_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_CHIP_GT140);
-		if (status2 & WM8350_SYS_WDOG_TO_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_WDOG_TO);
-
-		if (status2 & WM8350_AUXADC_DATARDY_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DATARDY);
-		if (status2 & WM8350_AUXADC_DCOMP4_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP4);
-		if (status2 & WM8350_AUXADC_DCOMP3_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP3);
-		if (status2 & WM8350_AUXADC_DCOMP2_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP2);
-		if (status2 & WM8350_AUXADC_DCOMP1_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP1);
-
-		if (status2 & WM8350_USB_LIMIT_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_USB_LIMIT);
-	}
-
-	/* wake, codec, ext */
-	if (comp) {
-		if (comp & WM8350_WKUP_OFF_STATE_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_OFF_STATE);
-		if (comp & WM8350_WKUP_HIB_STATE_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_HIB_STATE);
-		if (comp & WM8350_WKUP_CONV_FAULT_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_CONV_FAULT);
-		if (comp & WM8350_WKUP_WDOG_RST_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_WDOG_RST);
-		if (comp & WM8350_WKUP_GP_PWR_ON_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_GP_PWR_ON);
-		if (comp & WM8350_WKUP_ONKEY_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_WKUP_ONKEY);
-		if (comp & WM8350_WKUP_GP_WAKEUP_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_GP_WAKEUP);
-
-		if (comp & WM8350_CODEC_JCK_DET_L_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_JCK_DET_L);
-		if (comp & WM8350_CODEC_JCK_DET_R_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_JCK_DET_R);
-		if (comp & WM8350_CODEC_MICSCD_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_MICSCD);
-		if (comp & WM8350_CODEC_MICD_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CODEC_MICD);
-
-		if (comp & WM8350_EXT_USB_FB_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_USB_FB);
-		if (comp & WM8350_EXT_WALL_FB_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_EXT_WALL_FB);
-		if (comp & WM8350_EXT_BAT_FB_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_BAT_FB);
-	}
-
-	if (level_one & WM8350_GP_INT) {
-		int i;
-		u16 gpio;
-
-		gpio = wm8350_reg_read(wm8350, WM8350_GPIO_INT_STATUS);
-		gpio &= ~wm8350_reg_read(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK);
-
-		for (i = 0; i < 12; i++) {
-			if (gpio & (1 << i))
-				wm8350_irq_call_handler(wm8350,
-							WM8350_IRQ_GPIO(i));
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			void (*handler) (struct wm8350 *, int, void *),
-			void *data)
-{
-	if (irq < 0 || irq > WM8350_NUM_IRQ || !handler)
-		return -EINVAL;
-
-	if (wm8350->irq[irq].handler)
-		return -EBUSY;
-
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = handler;
-	wm8350->irq[irq].data = data;
-	mutex_unlock(&wm8350->irq_mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wm8350_register_irq);
-
-int wm8350_free_irq(struct wm8350 *wm8350, int irq)
-{
-	if (irq < 0 || irq > WM8350_NUM_IRQ)
-		return -EINVAL;
-
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = NULL;
-	mutex_unlock(&wm8350->irq_mutex);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wm8350_free_irq);
-
-int wm8350_mask_irq(struct wm8350 *wm8350, int irq)
-{
-	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_HOT_EINT);
-	case WM8350_IRQ_CHG_BAT_COLD:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_COLD_EINT);
-	case WM8350_IRQ_CHG_BAT_FAIL:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_FAIL_EINT);
-	case WM8350_IRQ_CHG_TO:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_TO_EINT);
-	case WM8350_IRQ_CHG_END:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_END_EINT);
-	case WM8350_IRQ_CHG_START:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_START_EINT);
-	case WM8350_IRQ_CHG_FAST_RDY:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_FAST_RDY_EINT);
-	case WM8350_IRQ_RTC_PER:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_PER_EINT);
-	case WM8350_IRQ_RTC_SEC:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_SEC_EINT);
-	case WM8350_IRQ_RTC_ALM:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_ALM_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P9:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_3P9_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_3P1_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_2P85:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_2P85_EINT);
-	case WM8350_IRQ_CS1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_CS1_EINT);
-	case WM8350_IRQ_CS2:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_CS2_EINT);
-	case WM8350_IRQ_USB_LIMIT:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_USB_LIMIT_EINT);
-	case WM8350_IRQ_AUXADC_DATARDY:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DATARDY_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP4:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP4_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP3:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP3_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP2:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP2_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP1_EINT);
-	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT115:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_CHIP_GT115_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT140:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_CHIP_GT140_EINT);
-	case WM8350_IRQ_SYS_WDOG_TO:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_WDOG_TO_EINT);
-	case WM8350_IRQ_UV_LDO4:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO4_EINT);
-	case WM8350_IRQ_UV_LDO3:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO3_EINT);
-	case WM8350_IRQ_UV_LDO2:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO2_EINT);
-	case WM8350_IRQ_UV_LDO1:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO1_EINT);
-	case WM8350_IRQ_UV_DC6:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC6_EINT);
-	case WM8350_IRQ_UV_DC5:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC5_EINT);
-	case WM8350_IRQ_UV_DC4:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC4_EINT);
-	case WM8350_IRQ_UV_DC3:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC3_EINT);
-	case WM8350_IRQ_UV_DC2:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC2_EINT);
-	case WM8350_IRQ_UV_DC1:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC1_EINT);
-	case WM8350_IRQ_OC_LS:
-		return wm8350_set_bits(wm8350,
-				       WM8350_OVER_CURRENT_INT_STATUS_MASK,
-				       WM8350_IM_OC_LS_EINT);
-	case WM8350_IRQ_EXT_USB_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_USB_FB_EINT);
-	case WM8350_IRQ_EXT_WALL_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_WALL_FB_EINT);
-	case WM8350_IRQ_EXT_BAT_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_BAT_FB_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_L:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_JCK_DET_L_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_R:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_JCK_DET_R_EINT);
-	case WM8350_IRQ_CODEC_MICSCD:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_MICSCD_EINT);
-	case WM8350_IRQ_CODEC_MICD:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_MICD_EINT);
-	case WM8350_IRQ_WKUP_OFF_STATE:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_HIB_STATE:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_HIB_STATE_EINT);
-	case WM8350_IRQ_WKUP_CONV_FAULT:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_CONV_FAULT_EINT);
-	case WM8350_IRQ_WKUP_WDOG_RST:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_GP_PWR_ON:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_GP_PWR_ON_EINT);
-	case WM8350_IRQ_WKUP_ONKEY:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_ONKEY_EINT);
-	case WM8350_IRQ_WKUP_GP_WAKEUP:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_GP_WAKEUP_EINT);
-	case WM8350_IRQ_GPIO(0):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP0_EINT);
-	case WM8350_IRQ_GPIO(1):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP1_EINT);
-	case WM8350_IRQ_GPIO(2):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP2_EINT);
-	case WM8350_IRQ_GPIO(3):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP3_EINT);
-	case WM8350_IRQ_GPIO(4):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP4_EINT);
-	case WM8350_IRQ_GPIO(5):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP5_EINT);
-	case WM8350_IRQ_GPIO(6):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP6_EINT);
-	case WM8350_IRQ_GPIO(7):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP7_EINT);
-	case WM8350_IRQ_GPIO(8):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP8_EINT);
-	case WM8350_IRQ_GPIO(9):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP9_EINT);
-	case WM8350_IRQ_GPIO(10):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP10_EINT);
-	case WM8350_IRQ_GPIO(11):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP11_EINT);
-	case WM8350_IRQ_GPIO(12):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP12_EINT);
-	default:
-		dev_warn(wm8350->dev, "Attempting to mask unknown IRQ %d\n",
-			 irq);
-		return -EINVAL;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wm8350_mask_irq);
-
-int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
-{
-	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_HOT_EINT);
-	case WM8350_IRQ_CHG_BAT_COLD:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_COLD_EINT);
-	case WM8350_IRQ_CHG_BAT_FAIL:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_FAIL_EINT);
-	case WM8350_IRQ_CHG_TO:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_TO_EINT);
-	case WM8350_IRQ_CHG_END:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_END_EINT);
-	case WM8350_IRQ_CHG_START:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_START_EINT);
-	case WM8350_IRQ_CHG_FAST_RDY:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_FAST_RDY_EINT);
-	case WM8350_IRQ_RTC_PER:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_PER_EINT);
-	case WM8350_IRQ_RTC_SEC:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_SEC_EINT);
-	case WM8350_IRQ_RTC_ALM:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_ALM_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P9:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_3P9_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_3P1_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_2P85:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_2P85_EINT);
-	case WM8350_IRQ_CS1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_CS1_EINT);
-	case WM8350_IRQ_CS2:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_CS2_EINT);
-	case WM8350_IRQ_USB_LIMIT:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_USB_LIMIT_EINT);
-	case WM8350_IRQ_AUXADC_DATARDY:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DATARDY_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP4:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP4_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP3:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP3_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP2:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP2_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP1_EINT);
-	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT115:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_CHIP_GT115_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT140:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_CHIP_GT140_EINT);
-	case WM8350_IRQ_SYS_WDOG_TO:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_WDOG_TO_EINT);
-	case WM8350_IRQ_UV_LDO4:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO4_EINT);
-	case WM8350_IRQ_UV_LDO3:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO3_EINT);
-	case WM8350_IRQ_UV_LDO2:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO2_EINT);
-	case WM8350_IRQ_UV_LDO1:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO1_EINT);
-	case WM8350_IRQ_UV_DC6:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC6_EINT);
-	case WM8350_IRQ_UV_DC5:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC5_EINT);
-	case WM8350_IRQ_UV_DC4:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC4_EINT);
-	case WM8350_IRQ_UV_DC3:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC3_EINT);
-	case WM8350_IRQ_UV_DC2:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC2_EINT);
-	case WM8350_IRQ_UV_DC1:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC1_EINT);
-	case WM8350_IRQ_OC_LS:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_OVER_CURRENT_INT_STATUS_MASK,
-					 WM8350_IM_OC_LS_EINT);
-	case WM8350_IRQ_EXT_USB_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_USB_FB_EINT);
-	case WM8350_IRQ_EXT_WALL_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_WALL_FB_EINT);
-	case WM8350_IRQ_EXT_BAT_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_BAT_FB_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_L:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_JCK_DET_L_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_R:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_JCK_DET_R_EINT);
-	case WM8350_IRQ_CODEC_MICSCD:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_MICSCD_EINT);
-	case WM8350_IRQ_CODEC_MICD:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_MICD_EINT);
-	case WM8350_IRQ_WKUP_OFF_STATE:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_HIB_STATE:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_HIB_STATE_EINT);
-	case WM8350_IRQ_WKUP_CONV_FAULT:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_CONV_FAULT_EINT);
-	case WM8350_IRQ_WKUP_WDOG_RST:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_GP_PWR_ON:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_GP_PWR_ON_EINT);
-	case WM8350_IRQ_WKUP_ONKEY:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_ONKEY_EINT);
-	case WM8350_IRQ_WKUP_GP_WAKEUP:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_GP_WAKEUP_EINT);
-	case WM8350_IRQ_GPIO(0):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP0_EINT);
-	case WM8350_IRQ_GPIO(1):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP1_EINT);
-	case WM8350_IRQ_GPIO(2):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP2_EINT);
-	case WM8350_IRQ_GPIO(3):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP3_EINT);
-	case WM8350_IRQ_GPIO(4):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP4_EINT);
-	case WM8350_IRQ_GPIO(5):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP5_EINT);
-	case WM8350_IRQ_GPIO(6):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP6_EINT);
-	case WM8350_IRQ_GPIO(7):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP7_EINT);
-	case WM8350_IRQ_GPIO(8):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP8_EINT);
-	case WM8350_IRQ_GPIO(9):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP9_EINT);
-	case WM8350_IRQ_GPIO(10):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP10_EINT);
-	case WM8350_IRQ_GPIO(11):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP11_EINT);
-	case WM8350_IRQ_GPIO(12):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP12_EINT);
-	default:
-		dev_warn(wm8350->dev, "Attempting to unmask unknown IRQ %d\n",
-			 irq);
-		return -EINVAL;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
-
 int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 {
 	u16 reg, result = 0;
@@ -1409,49 +682,18 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		return ret;
 	}
 
-	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF);
-
 	mutex_init(&wm8350->auxadc_mutex);
-	mutex_init(&wm8350->irq_mutex);
-	if (irq) {
-		int flags = IRQF_ONESHOT;
-
-		if (pdata && pdata->irq_high) {
-			flags |= IRQF_TRIGGER_HIGH;
-
-			wm8350_set_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
-					WM8350_IRQ_POL);
-		} else {
-			flags |= IRQF_TRIGGER_LOW;
-
-			wm8350_clear_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
-					  WM8350_IRQ_POL);
-		}
 
-		ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
-					   "wm8350", wm8350);
-		if (ret != 0) {
-			dev_err(wm8350->dev, "Failed to request IRQ: %d\n",
-				ret);
-			goto err;
-		}
-	} else {
-		dev_err(wm8350->dev, "No IRQ configured\n");
+	ret = wm8350_irq_init(wm8350, irq, pdata);
+	if (ret < 0)
 		goto err;
-	}
-	wm8350->chip_irq = irq;
 
 	if (pdata && pdata->init) {
 		ret = pdata->init(wm8350);
 		if (ret != 0) {
 			dev_err(wm8350->dev, "Platform init() failed: %d\n",
 				ret);
-			goto err;
+			goto err_irq;
 		}
 	}
 
@@ -1470,6 +712,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	return 0;
 
+err_irq:
+	wm8350_irq_exit(wm8350);
 err:
 	kfree(wm8350->reg_cache);
 	return ret;
@@ -1493,7 +737,8 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->gpio.pdev);
 	platform_device_unregister(wm8350->codec.pdev);
 
-	free_irq(wm8350->chip_irq, wm8350);
+	wm8350_irq_exit(wm8350);
+
 	kfree(wm8350->reg_cache);
 }
 EXPORT_SYMBOL_GPL(wm8350_device_exit);
diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
new file mode 100644
index 000000000000..a432e2b65c48
--- /dev/null
+++ b/drivers/mfd/wm8350-irq.c
@@ -0,0 +1,804 @@
+/*
+ * wm8350-irq.c  --  IRQ support for Wolfson WM8350
+ *
+ * Copyright 2007, 2008, 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood, Mark Brown
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/audio.h>
+#include <linux/mfd/wm8350/comparator.h>
+#include <linux/mfd/wm8350/gpio.h>
+#include <linux/mfd/wm8350/pmic.h>
+#include <linux/mfd/wm8350/rtc.h>
+#include <linux/mfd/wm8350/supply.h>
+#include <linux/mfd/wm8350/wdt.h>
+
+static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
+{
+	mutex_lock(&wm8350->irq_mutex);
+
+	if (wm8350->irq[irq].handler)
+		wm8350->irq[irq].handler(wm8350, irq, wm8350->irq[irq].data);
+	else {
+		dev_err(wm8350->dev, "irq %d nobody cared. now masked.\n",
+			irq);
+		wm8350_mask_irq(wm8350, irq);
+	}
+
+	mutex_unlock(&wm8350->irq_mutex);
+}
+
+/*
+ * This is a threaded IRQ handler so can access I2C/SPI.  Since all
+ * interrupts are clear on read the IRQ line will be reasserted and
+ * the physical IRQ will be handled again if another interrupt is
+ * asserted while we run - in the normal course of events this is a
+ * rare occurrence so we save I2C/SPI reads.
+ */
+static irqreturn_t wm8350_irq(int irq, void *data)
+{
+	struct wm8350 *wm8350 = data;
+	u16 level_one, status1, status2, comp;
+
+	/* TODO: Use block reads to improve performance? */
+	level_one = wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS)
+		& ~wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK);
+	status1 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_1)
+		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_1_MASK);
+	status2 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_2)
+		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_2_MASK);
+	comp = wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS)
+		& ~wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK);
+
+	/* over current */
+	if (level_one & WM8350_OC_INT) {
+		u16 oc;
+
+		oc = wm8350_reg_read(wm8350, WM8350_OVER_CURRENT_INT_STATUS);
+		oc &= ~wm8350_reg_read(wm8350,
+				       WM8350_OVER_CURRENT_INT_STATUS_MASK);
+
+		if (oc & WM8350_OC_LS_EINT)	/* limit switch */
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_OC_LS);
+	}
+
+	/* under voltage */
+	if (level_one & WM8350_UV_INT) {
+		u16 uv;
+
+		uv = wm8350_reg_read(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS);
+		uv &= ~wm8350_reg_read(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK);
+
+		if (uv & WM8350_UV_DC1_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC1);
+		if (uv & WM8350_UV_DC2_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC2);
+		if (uv & WM8350_UV_DC3_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC3);
+		if (uv & WM8350_UV_DC4_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC4);
+		if (uv & WM8350_UV_DC5_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC5);
+		if (uv & WM8350_UV_DC6_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC6);
+		if (uv & WM8350_UV_LDO1_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO1);
+		if (uv & WM8350_UV_LDO2_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO2);
+		if (uv & WM8350_UV_LDO3_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO3);
+		if (uv & WM8350_UV_LDO4_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO4);
+	}
+
+	/* charger, RTC */
+	if (status1) {
+		if (status1 & WM8350_CHG_BAT_HOT_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_BAT_HOT);
+		if (status1 & WM8350_CHG_BAT_COLD_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_BAT_COLD);
+		if (status1 & WM8350_CHG_BAT_FAIL_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_BAT_FAIL);
+		if (status1 & WM8350_CHG_TO_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_TO);
+		if (status1 & WM8350_CHG_END_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_END);
+		if (status1 & WM8350_CHG_START_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_START);
+		if (status1 & WM8350_CHG_FAST_RDY_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_FAST_RDY);
+		if (status1 & WM8350_CHG_VBATT_LT_3P9_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_VBATT_LT_3P9);
+		if (status1 & WM8350_CHG_VBATT_LT_3P1_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_VBATT_LT_3P1);
+		if (status1 & WM8350_CHG_VBATT_LT_2P85_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CHG_VBATT_LT_2P85);
+		if (status1 & WM8350_RTC_ALM_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_ALM);
+		if (status1 & WM8350_RTC_SEC_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_SEC);
+		if (status1 & WM8350_RTC_PER_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_PER);
+	}
+
+	/* current sink, system, aux adc */
+	if (status2) {
+		if (status2 & WM8350_CS1_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS1);
+		if (status2 & WM8350_CS2_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS2);
+
+		if (status2 & WM8350_SYS_HYST_COMP_FAIL_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_SYS_HYST_COMP_FAIL);
+		if (status2 & WM8350_SYS_CHIP_GT115_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_SYS_CHIP_GT115);
+		if (status2 & WM8350_SYS_CHIP_GT140_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_SYS_CHIP_GT140);
+		if (status2 & WM8350_SYS_WDOG_TO_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_SYS_WDOG_TO);
+
+		if (status2 & WM8350_AUXADC_DATARDY_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_AUXADC_DATARDY);
+		if (status2 & WM8350_AUXADC_DCOMP4_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_AUXADC_DCOMP4);
+		if (status2 & WM8350_AUXADC_DCOMP3_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_AUXADC_DCOMP3);
+		if (status2 & WM8350_AUXADC_DCOMP2_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_AUXADC_DCOMP2);
+		if (status2 & WM8350_AUXADC_DCOMP1_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_AUXADC_DCOMP1);
+
+		if (status2 & WM8350_USB_LIMIT_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_USB_LIMIT);
+	}
+
+	/* wake, codec, ext */
+	if (comp) {
+		if (comp & WM8350_WKUP_OFF_STATE_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_OFF_STATE);
+		if (comp & WM8350_WKUP_HIB_STATE_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_HIB_STATE);
+		if (comp & WM8350_WKUP_CONV_FAULT_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_CONV_FAULT);
+		if (comp & WM8350_WKUP_WDOG_RST_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_WDOG_RST);
+		if (comp & WM8350_WKUP_GP_PWR_ON_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_GP_PWR_ON);
+		if (comp & WM8350_WKUP_ONKEY_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_WKUP_ONKEY);
+		if (comp & WM8350_WKUP_GP_WAKEUP_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_WKUP_GP_WAKEUP);
+
+		if (comp & WM8350_CODEC_JCK_DET_L_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CODEC_JCK_DET_L);
+		if (comp & WM8350_CODEC_JCK_DET_R_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CODEC_JCK_DET_R);
+		if (comp & WM8350_CODEC_MICSCD_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_CODEC_MICSCD);
+		if (comp & WM8350_CODEC_MICD_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CODEC_MICD);
+
+		if (comp & WM8350_EXT_USB_FB_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_USB_FB);
+		if (comp & WM8350_EXT_WALL_FB_EINT)
+			wm8350_irq_call_handler(wm8350,
+						WM8350_IRQ_EXT_WALL_FB);
+		if (comp & WM8350_EXT_BAT_FB_EINT)
+			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_BAT_FB);
+	}
+
+	if (level_one & WM8350_GP_INT) {
+		int i;
+		u16 gpio;
+
+		gpio = wm8350_reg_read(wm8350, WM8350_GPIO_INT_STATUS);
+		gpio &= ~wm8350_reg_read(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK);
+
+		for (i = 0; i < 12; i++) {
+			if (gpio & (1 << i))
+				wm8350_irq_call_handler(wm8350,
+							WM8350_IRQ_GPIO(i));
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+int wm8350_register_irq(struct wm8350 *wm8350, int irq,
+			void (*handler) (struct wm8350 *, int, void *),
+			void *data)
+{
+	if (irq < 0 || irq > WM8350_NUM_IRQ || !handler)
+		return -EINVAL;
+
+	if (wm8350->irq[irq].handler)
+		return -EBUSY;
+
+	mutex_lock(&wm8350->irq_mutex);
+	wm8350->irq[irq].handler = handler;
+	wm8350->irq[irq].data = data;
+	mutex_unlock(&wm8350->irq_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wm8350_register_irq);
+
+int wm8350_free_irq(struct wm8350 *wm8350, int irq)
+{
+	if (irq < 0 || irq > WM8350_NUM_IRQ)
+		return -EINVAL;
+
+	mutex_lock(&wm8350->irq_mutex);
+	wm8350->irq[irq].handler = NULL;
+	mutex_unlock(&wm8350->irq_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wm8350_free_irq);
+
+int wm8350_mask_irq(struct wm8350 *wm8350, int irq)
+{
+	switch (irq) {
+	case WM8350_IRQ_CHG_BAT_HOT:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_BAT_HOT_EINT);
+	case WM8350_IRQ_CHG_BAT_COLD:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_BAT_COLD_EINT);
+	case WM8350_IRQ_CHG_BAT_FAIL:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_BAT_FAIL_EINT);
+	case WM8350_IRQ_CHG_TO:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_TO_EINT);
+	case WM8350_IRQ_CHG_END:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_END_EINT);
+	case WM8350_IRQ_CHG_START:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_START_EINT);
+	case WM8350_IRQ_CHG_FAST_RDY:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_FAST_RDY_EINT);
+	case WM8350_IRQ_RTC_PER:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_RTC_PER_EINT);
+	case WM8350_IRQ_RTC_SEC:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_RTC_SEC_EINT);
+	case WM8350_IRQ_RTC_ALM:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_RTC_ALM_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_3P9:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_VBATT_LT_3P9_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_3P1:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_VBATT_LT_3P1_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_2P85:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+				       WM8350_IM_CHG_VBATT_LT_2P85_EINT);
+	case WM8350_IRQ_CS1:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_CS1_EINT);
+	case WM8350_IRQ_CS2:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_CS2_EINT);
+	case WM8350_IRQ_USB_LIMIT:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_USB_LIMIT_EINT);
+	case WM8350_IRQ_AUXADC_DATARDY:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_AUXADC_DATARDY_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP4:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_AUXADC_DCOMP4_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP3:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_AUXADC_DCOMP3_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP2:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_AUXADC_DCOMP2_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP1:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_AUXADC_DCOMP1_EINT);
+	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
+	case WM8350_IRQ_SYS_CHIP_GT115:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_SYS_CHIP_GT115_EINT);
+	case WM8350_IRQ_SYS_CHIP_GT140:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_SYS_CHIP_GT140_EINT);
+	case WM8350_IRQ_SYS_WDOG_TO:
+		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+				       WM8350_IM_SYS_WDOG_TO_EINT);
+	case WM8350_IRQ_UV_LDO4:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_LDO4_EINT);
+	case WM8350_IRQ_UV_LDO3:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_LDO3_EINT);
+	case WM8350_IRQ_UV_LDO2:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_LDO2_EINT);
+	case WM8350_IRQ_UV_LDO1:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_LDO1_EINT);
+	case WM8350_IRQ_UV_DC6:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC6_EINT);
+	case WM8350_IRQ_UV_DC5:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC5_EINT);
+	case WM8350_IRQ_UV_DC4:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC4_EINT);
+	case WM8350_IRQ_UV_DC3:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC3_EINT);
+	case WM8350_IRQ_UV_DC2:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC2_EINT);
+	case WM8350_IRQ_UV_DC1:
+		return wm8350_set_bits(wm8350,
+				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+				       WM8350_IM_UV_DC1_EINT);
+	case WM8350_IRQ_OC_LS:
+		return wm8350_set_bits(wm8350,
+				       WM8350_OVER_CURRENT_INT_STATUS_MASK,
+				       WM8350_IM_OC_LS_EINT);
+	case WM8350_IRQ_EXT_USB_FB:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_EXT_USB_FB_EINT);
+	case WM8350_IRQ_EXT_WALL_FB:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_EXT_WALL_FB_EINT);
+	case WM8350_IRQ_EXT_BAT_FB:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_EXT_BAT_FB_EINT);
+	case WM8350_IRQ_CODEC_JCK_DET_L:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_CODEC_JCK_DET_L_EINT);
+	case WM8350_IRQ_CODEC_JCK_DET_R:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_CODEC_JCK_DET_R_EINT);
+	case WM8350_IRQ_CODEC_MICSCD:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_CODEC_MICSCD_EINT);
+	case WM8350_IRQ_CODEC_MICD:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_CODEC_MICD_EINT);
+	case WM8350_IRQ_WKUP_OFF_STATE:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_OFF_STATE_EINT);
+	case WM8350_IRQ_WKUP_HIB_STATE:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_HIB_STATE_EINT);
+	case WM8350_IRQ_WKUP_CONV_FAULT:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_CONV_FAULT_EINT);
+	case WM8350_IRQ_WKUP_WDOG_RST:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_OFF_STATE_EINT);
+	case WM8350_IRQ_WKUP_GP_PWR_ON:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_GP_PWR_ON_EINT);
+	case WM8350_IRQ_WKUP_ONKEY:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_ONKEY_EINT);
+	case WM8350_IRQ_WKUP_GP_WAKEUP:
+		return wm8350_set_bits(wm8350,
+				       WM8350_COMPARATOR_INT_STATUS_MASK,
+				       WM8350_IM_WKUP_GP_WAKEUP_EINT);
+	case WM8350_IRQ_GPIO(0):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP0_EINT);
+	case WM8350_IRQ_GPIO(1):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP1_EINT);
+	case WM8350_IRQ_GPIO(2):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP2_EINT);
+	case WM8350_IRQ_GPIO(3):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP3_EINT);
+	case WM8350_IRQ_GPIO(4):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP4_EINT);
+	case WM8350_IRQ_GPIO(5):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP5_EINT);
+	case WM8350_IRQ_GPIO(6):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP6_EINT);
+	case WM8350_IRQ_GPIO(7):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP7_EINT);
+	case WM8350_IRQ_GPIO(8):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP8_EINT);
+	case WM8350_IRQ_GPIO(9):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP9_EINT);
+	case WM8350_IRQ_GPIO(10):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP10_EINT);
+	case WM8350_IRQ_GPIO(11):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP11_EINT);
+	case WM8350_IRQ_GPIO(12):
+		return wm8350_set_bits(wm8350,
+				       WM8350_GPIO_INT_STATUS_MASK,
+				       WM8350_IM_GP12_EINT);
+	default:
+		dev_warn(wm8350->dev, "Attempting to mask unknown IRQ %d\n",
+			 irq);
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wm8350_mask_irq);
+
+int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
+{
+	switch (irq) {
+	case WM8350_IRQ_CHG_BAT_HOT:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_BAT_HOT_EINT);
+	case WM8350_IRQ_CHG_BAT_COLD:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_BAT_COLD_EINT);
+	case WM8350_IRQ_CHG_BAT_FAIL:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_BAT_FAIL_EINT);
+	case WM8350_IRQ_CHG_TO:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_TO_EINT);
+	case WM8350_IRQ_CHG_END:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_END_EINT);
+	case WM8350_IRQ_CHG_START:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_START_EINT);
+	case WM8350_IRQ_CHG_FAST_RDY:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_FAST_RDY_EINT);
+	case WM8350_IRQ_RTC_PER:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_RTC_PER_EINT);
+	case WM8350_IRQ_RTC_SEC:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_RTC_SEC_EINT);
+	case WM8350_IRQ_RTC_ALM:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_RTC_ALM_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_3P9:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_VBATT_LT_3P9_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_3P1:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_VBATT_LT_3P1_EINT);
+	case WM8350_IRQ_CHG_VBATT_LT_2P85:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
+					 WM8350_IM_CHG_VBATT_LT_2P85_EINT);
+	case WM8350_IRQ_CS1:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_CS1_EINT);
+	case WM8350_IRQ_CS2:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_CS2_EINT);
+	case WM8350_IRQ_USB_LIMIT:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_USB_LIMIT_EINT);
+	case WM8350_IRQ_AUXADC_DATARDY:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_AUXADC_DATARDY_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP4:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_AUXADC_DCOMP4_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP3:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_AUXADC_DCOMP3_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP2:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_AUXADC_DCOMP2_EINT);
+	case WM8350_IRQ_AUXADC_DCOMP1:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_AUXADC_DCOMP1_EINT);
+	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
+	case WM8350_IRQ_SYS_CHIP_GT115:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_SYS_CHIP_GT115_EINT);
+	case WM8350_IRQ_SYS_CHIP_GT140:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_SYS_CHIP_GT140_EINT);
+	case WM8350_IRQ_SYS_WDOG_TO:
+		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
+					 WM8350_IM_SYS_WDOG_TO_EINT);
+	case WM8350_IRQ_UV_LDO4:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_LDO4_EINT);
+	case WM8350_IRQ_UV_LDO3:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_LDO3_EINT);
+	case WM8350_IRQ_UV_LDO2:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_LDO2_EINT);
+	case WM8350_IRQ_UV_LDO1:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_LDO1_EINT);
+	case WM8350_IRQ_UV_DC6:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC6_EINT);
+	case WM8350_IRQ_UV_DC5:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC5_EINT);
+	case WM8350_IRQ_UV_DC4:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC4_EINT);
+	case WM8350_IRQ_UV_DC3:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC3_EINT);
+	case WM8350_IRQ_UV_DC2:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC2_EINT);
+	case WM8350_IRQ_UV_DC1:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
+					 WM8350_IM_UV_DC1_EINT);
+	case WM8350_IRQ_OC_LS:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_OVER_CURRENT_INT_STATUS_MASK,
+					 WM8350_IM_OC_LS_EINT);
+	case WM8350_IRQ_EXT_USB_FB:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_EXT_USB_FB_EINT);
+	case WM8350_IRQ_EXT_WALL_FB:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_EXT_WALL_FB_EINT);
+	case WM8350_IRQ_EXT_BAT_FB:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_EXT_BAT_FB_EINT);
+	case WM8350_IRQ_CODEC_JCK_DET_L:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_CODEC_JCK_DET_L_EINT);
+	case WM8350_IRQ_CODEC_JCK_DET_R:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_CODEC_JCK_DET_R_EINT);
+	case WM8350_IRQ_CODEC_MICSCD:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_CODEC_MICSCD_EINT);
+	case WM8350_IRQ_CODEC_MICD:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_CODEC_MICD_EINT);
+	case WM8350_IRQ_WKUP_OFF_STATE:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_OFF_STATE_EINT);
+	case WM8350_IRQ_WKUP_HIB_STATE:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_HIB_STATE_EINT);
+	case WM8350_IRQ_WKUP_CONV_FAULT:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_CONV_FAULT_EINT);
+	case WM8350_IRQ_WKUP_WDOG_RST:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_OFF_STATE_EINT);
+	case WM8350_IRQ_WKUP_GP_PWR_ON:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_GP_PWR_ON_EINT);
+	case WM8350_IRQ_WKUP_ONKEY:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_ONKEY_EINT);
+	case WM8350_IRQ_WKUP_GP_WAKEUP:
+		return wm8350_clear_bits(wm8350,
+					 WM8350_COMPARATOR_INT_STATUS_MASK,
+					 WM8350_IM_WKUP_GP_WAKEUP_EINT);
+	case WM8350_IRQ_GPIO(0):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP0_EINT);
+	case WM8350_IRQ_GPIO(1):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP1_EINT);
+	case WM8350_IRQ_GPIO(2):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP2_EINT);
+	case WM8350_IRQ_GPIO(3):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP3_EINT);
+	case WM8350_IRQ_GPIO(4):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP4_EINT);
+	case WM8350_IRQ_GPIO(5):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP5_EINT);
+	case WM8350_IRQ_GPIO(6):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP6_EINT);
+	case WM8350_IRQ_GPIO(7):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP7_EINT);
+	case WM8350_IRQ_GPIO(8):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP8_EINT);
+	case WM8350_IRQ_GPIO(9):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP9_EINT);
+	case WM8350_IRQ_GPIO(10):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP10_EINT);
+	case WM8350_IRQ_GPIO(11):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP11_EINT);
+	case WM8350_IRQ_GPIO(12):
+		return wm8350_clear_bits(wm8350,
+					 WM8350_GPIO_INT_STATUS_MASK,
+					 WM8350_IM_GP12_EINT);
+	default:
+		dev_warn(wm8350->dev, "Attempting to unmask unknown IRQ %d\n",
+			 irq);
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
+
+int wm8350_irq_init(struct wm8350 *wm8350, int irq,
+		    struct wm8350_platform_data *pdata)
+{
+	int ret;
+	int flags = IRQF_ONESHOT;
+
+	if (!irq) {
+		dev_err(wm8350->dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
+
+	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF);
+	wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF);
+	wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF);
+	wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF);
+	wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF);
+	wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF);
+
+	mutex_init(&wm8350->irq_mutex);
+	wm8350->chip_irq = irq;
+
+	if (pdata && pdata->irq_high) {
+		flags |= IRQF_TRIGGER_HIGH;
+
+		wm8350_set_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
+				WM8350_IRQ_POL);
+	} else {
+		flags |= IRQF_TRIGGER_LOW;
+
+		wm8350_clear_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
+				  WM8350_IRQ_POL);
+	}
+
+	ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
+				   "wm8350", wm8350);
+	if (ret != 0)
+		dev_err(wm8350->dev, "Failed to request IRQ: %d\n", ret);
+
+	return ret;
+}
+
+int wm8350_irq_exit(struct wm8350 *wm8350)
+{
+	free_irq(wm8350->chip_irq, wm8350);
+	return 0;
+}
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 1d595de6a055..32197fde904d 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -681,6 +681,8 @@ int wm8350_register_irq(struct wm8350 *wm8350, int irq,
 int wm8350_free_irq(struct wm8350 *wm8350, int irq);
 int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
-
+int wm8350_irq_init(struct wm8350 *wm8350, int irq,
+		    struct wm8350_platform_data *pdata);
+int wm8350_irq_exit(struct wm8350 *wm8350);
 
 #endif
-- 
cgit v1.2.3


From 0c7229f93a529145d52e1bd7b29e6c98a3a3294d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 12 Oct 2009 16:15:10 +0100
Subject: mfd: Convert WM835x IRQ handling to use a data table

Rather than open coding individual IRQs in each function which
manipulates them store data for IRQs in a table which is then
referenced in the users.

This is a substantial code shrink and should be a performance win in
cases where only a single IRQ goes off at once since instead of
reading four of the second level IRQ registers for each interrupt
we read only the sub-registers which have had an interrupt flagged.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c        | 1017 ++++++++++++++-------------------------
 include/linux/mfd/wm8350/gpio.h |   18 +
 2 files changed, 387 insertions(+), 648 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index a432e2b65c48..d9abfc94c685 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -29,6 +29,343 @@
 #include <linux/mfd/wm8350/supply.h>
 #include <linux/mfd/wm8350/wdt.h>
 
+#define WM8350_NUM_IRQ_REGS 7
+
+#define WM8350_INT_OFFSET_1                     0
+#define WM8350_INT_OFFSET_2                     1
+#define WM8350_POWER_UP_INT_OFFSET              2
+#define WM8350_UNDER_VOLTAGE_INT_OFFSET         3
+#define WM8350_OVER_CURRENT_INT_OFFSET          4
+#define WM8350_GPIO_INT_OFFSET                  5
+#define WM8350_COMPARATOR_INT_OFFSET            6
+
+struct wm8350_irq_data {
+	int primary;
+	int reg;
+	int mask;
+	int primary_only;
+};
+
+static struct wm8350_irq_data wm8350_irqs[] = {
+	[WM8350_IRQ_OC_LS] = {
+		.primary = WM8350_OC_INT,
+		.reg = WM8350_OVER_CURRENT_INT_OFFSET,
+		.mask = WM8350_OC_LS_EINT,
+		.primary_only = 1,
+	},
+	[WM8350_IRQ_UV_DC1] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC1_EINT,
+	},
+	[WM8350_IRQ_UV_DC2] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC2_EINT,
+	},
+	[WM8350_IRQ_UV_DC3] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC3_EINT,
+	},
+	[WM8350_IRQ_UV_DC4] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC4_EINT,
+	},
+	[WM8350_IRQ_UV_DC5] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC5_EINT,
+	},
+	[WM8350_IRQ_UV_DC6] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_DC6_EINT,
+	},
+	[WM8350_IRQ_UV_LDO1] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_LDO1_EINT,
+	},
+	[WM8350_IRQ_UV_LDO2] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_LDO2_EINT,
+	},
+	[WM8350_IRQ_UV_LDO3] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_LDO3_EINT,
+	},
+	[WM8350_IRQ_UV_LDO4] = {
+		.primary = WM8350_UV_INT,
+		.reg = WM8350_UNDER_VOLTAGE_INT_OFFSET,
+		.mask = WM8350_UV_LDO4_EINT,
+	},
+	[WM8350_IRQ_CHG_BAT_HOT] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_BAT_HOT_EINT,
+	},
+	[WM8350_IRQ_CHG_BAT_COLD] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_BAT_COLD_EINT,
+	},
+	[WM8350_IRQ_CHG_BAT_FAIL] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_BAT_FAIL_EINT,
+	},
+	[WM8350_IRQ_CHG_TO] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_TO_EINT,
+	},
+	[WM8350_IRQ_CHG_END] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_END_EINT,
+	},
+	[WM8350_IRQ_CHG_START] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_START_EINT,
+	},
+	[WM8350_IRQ_CHG_FAST_RDY] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_FAST_RDY_EINT,
+	},
+	[WM8350_IRQ_CHG_VBATT_LT_3P9] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_VBATT_LT_3P9_EINT,
+	},
+	[WM8350_IRQ_CHG_VBATT_LT_3P1] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_VBATT_LT_3P1_EINT,
+	},
+	[WM8350_IRQ_CHG_VBATT_LT_2P85] = {
+		.primary = WM8350_CHG_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_CHG_VBATT_LT_2P85_EINT,
+	},
+	[WM8350_IRQ_RTC_ALM] = {
+		.primary = WM8350_RTC_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_RTC_ALM_EINT,
+	},
+	[WM8350_IRQ_RTC_SEC] = {
+		.primary = WM8350_RTC_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_RTC_SEC_EINT,
+	},
+	[WM8350_IRQ_RTC_PER] = {
+		.primary = WM8350_RTC_INT,
+		.reg = WM8350_INT_OFFSET_1,
+		.mask = WM8350_RTC_PER_EINT,
+	},
+	[WM8350_IRQ_CS1] = {
+		.primary = WM8350_CS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_CS1_EINT,
+	},
+	[WM8350_IRQ_CS2] = {
+		.primary = WM8350_CS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_CS2_EINT,
+	},
+	[WM8350_IRQ_SYS_HYST_COMP_FAIL] = {
+		.primary = WM8350_SYS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_SYS_HYST_COMP_FAIL_EINT,
+	},
+	[WM8350_IRQ_SYS_CHIP_GT115] = {
+		.primary = WM8350_SYS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_SYS_CHIP_GT115_EINT,
+	},
+	[WM8350_IRQ_SYS_CHIP_GT140] = {
+		.primary = WM8350_SYS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_SYS_CHIP_GT140_EINT,
+	},
+	[WM8350_IRQ_SYS_WDOG_TO] = {
+		.primary = WM8350_SYS_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_SYS_WDOG_TO_EINT,
+	},
+	[WM8350_IRQ_AUXADC_DATARDY] = {
+		.primary = WM8350_AUXADC_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_AUXADC_DATARDY_EINT,
+	},
+	[WM8350_IRQ_AUXADC_DCOMP4] = {
+		.primary = WM8350_AUXADC_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_AUXADC_DCOMP4_EINT,
+	},
+	[WM8350_IRQ_AUXADC_DCOMP3] = {
+		.primary = WM8350_AUXADC_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_AUXADC_DCOMP3_EINT,
+	},
+	[WM8350_IRQ_AUXADC_DCOMP2] = {
+		.primary = WM8350_AUXADC_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_AUXADC_DCOMP2_EINT,
+	},
+	[WM8350_IRQ_AUXADC_DCOMP1] = {
+		.primary = WM8350_AUXADC_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_AUXADC_DCOMP1_EINT,
+	},
+	[WM8350_IRQ_USB_LIMIT] = {
+		.primary = WM8350_USB_INT,
+		.reg = WM8350_INT_OFFSET_2,
+		.mask = WM8350_USB_LIMIT_EINT,
+		.primary_only = 1,
+	},
+	[WM8350_IRQ_WKUP_OFF_STATE] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_OFF_STATE_EINT,
+	},
+	[WM8350_IRQ_WKUP_HIB_STATE] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_HIB_STATE_EINT,
+	},
+	[WM8350_IRQ_WKUP_CONV_FAULT] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_CONV_FAULT_EINT,
+	},
+	[WM8350_IRQ_WKUP_WDOG_RST] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_WDOG_RST_EINT,
+	},
+	[WM8350_IRQ_WKUP_GP_PWR_ON] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_GP_PWR_ON_EINT,
+	},
+	[WM8350_IRQ_WKUP_ONKEY] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_ONKEY_EINT,
+	},
+	[WM8350_IRQ_WKUP_GP_WAKEUP] = {
+		.primary = WM8350_WKUP_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_WKUP_GP_WAKEUP_EINT,
+	},
+	[WM8350_IRQ_CODEC_JCK_DET_L] = {
+		.primary = WM8350_CODEC_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_CODEC_JCK_DET_L_EINT,
+	},
+	[WM8350_IRQ_CODEC_JCK_DET_R] = {
+		.primary = WM8350_CODEC_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_CODEC_JCK_DET_R_EINT,
+	},
+	[WM8350_IRQ_CODEC_MICSCD] = {
+		.primary = WM8350_CODEC_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_CODEC_MICSCD_EINT,
+	},
+	[WM8350_IRQ_CODEC_MICD] = {
+		.primary = WM8350_CODEC_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_CODEC_MICD_EINT,
+	},
+	[WM8350_IRQ_EXT_USB_FB] = {
+		.primary = WM8350_EXT_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_EXT_USB_FB_EINT,
+	},
+	[WM8350_IRQ_EXT_WALL_FB] = {
+		.primary = WM8350_EXT_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_EXT_WALL_FB_EINT,
+	},
+	[WM8350_IRQ_EXT_BAT_FB] = {
+		.primary = WM8350_EXT_INT,
+		.reg = WM8350_COMPARATOR_INT_OFFSET,
+		.mask = WM8350_EXT_BAT_FB_EINT,
+	},
+	[WM8350_IRQ_GPIO(0)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP0_EINT,
+	},
+	[WM8350_IRQ_GPIO(1)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP1_EINT,
+	},
+	[WM8350_IRQ_GPIO(2)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP2_EINT,
+	},
+	[WM8350_IRQ_GPIO(3)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP3_EINT,
+	},
+	[WM8350_IRQ_GPIO(4)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP4_EINT,
+	},
+	[WM8350_IRQ_GPIO(5)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP5_EINT,
+	},
+	[WM8350_IRQ_GPIO(6)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP6_EINT,
+	},
+	[WM8350_IRQ_GPIO(7)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP7_EINT,
+	},
+	[WM8350_IRQ_GPIO(8)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP8_EINT,
+	},
+	[WM8350_IRQ_GPIO(9)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP9_EINT,
+	},
+	[WM8350_IRQ_GPIO(10)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP10_EINT,
+	},
+	[WM8350_IRQ_GPIO(11)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP11_EINT,
+	},
+	[WM8350_IRQ_GPIO(12)] = {
+		.primary = WM8350_GP_INT,
+		.reg = WM8350_GPIO_INT_OFFSET,
+		.mask = WM8350_GP12_EINT,
+	},
+};
+
 static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
 {
 	mutex_lock(&wm8350->irq_mutex);
@@ -51,197 +388,43 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
  * asserted while we run - in the normal course of events this is a
  * rare occurrence so we save I2C/SPI reads.
  */
-static irqreturn_t wm8350_irq(int irq, void *data)
+static irqreturn_t wm8350_irq(int irq, void *irq_data)
 {
-	struct wm8350 *wm8350 = data;
-	u16 level_one, status1, status2, comp;
+	struct wm8350 *wm8350 = irq_data;
+	u16 level_one;
+	u16 sub_reg[WM8350_NUM_IRQ_REGS];
+	int read_done[WM8350_NUM_IRQ_REGS];
+	struct wm8350_irq_data *data;
+	int i;
 
 	/* TODO: Use block reads to improve performance? */
 	level_one = wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS)
 		& ~wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK);
-	status1 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_1)
-		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_1_MASK);
-	status2 = wm8350_reg_read(wm8350, WM8350_INT_STATUS_2)
-		& ~wm8350_reg_read(wm8350, WM8350_INT_STATUS_2_MASK);
-	comp = wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS)
-		& ~wm8350_reg_read(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK);
-
-	/* over current */
-	if (level_one & WM8350_OC_INT) {
-		u16 oc;
-
-		oc = wm8350_reg_read(wm8350, WM8350_OVER_CURRENT_INT_STATUS);
-		oc &= ~wm8350_reg_read(wm8350,
-				       WM8350_OVER_CURRENT_INT_STATUS_MASK);
-
-		if (oc & WM8350_OC_LS_EINT)	/* limit switch */
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_OC_LS);
-	}
-
-	/* under voltage */
-	if (level_one & WM8350_UV_INT) {
-		u16 uv;
-
-		uv = wm8350_reg_read(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS);
-		uv &= ~wm8350_reg_read(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK);
-
-		if (uv & WM8350_UV_DC1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC1);
-		if (uv & WM8350_UV_DC2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC2);
-		if (uv & WM8350_UV_DC3_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC3);
-		if (uv & WM8350_UV_DC4_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC4);
-		if (uv & WM8350_UV_DC5_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC5);
-		if (uv & WM8350_UV_DC6_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_DC6);
-		if (uv & WM8350_UV_LDO1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO1);
-		if (uv & WM8350_UV_LDO2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO2);
-		if (uv & WM8350_UV_LDO3_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO3);
-		if (uv & WM8350_UV_LDO4_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_UV_LDO4);
-	}
 
-	/* charger, RTC */
-	if (status1) {
-		if (status1 & WM8350_CHG_BAT_HOT_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_HOT);
-		if (status1 & WM8350_CHG_BAT_COLD_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_COLD);
-		if (status1 & WM8350_CHG_BAT_FAIL_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_BAT_FAIL);
-		if (status1 & WM8350_CHG_TO_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_TO);
-		if (status1 & WM8350_CHG_END_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_END);
-		if (status1 & WM8350_CHG_START_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CHG_START);
-		if (status1 & WM8350_CHG_FAST_RDY_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_FAST_RDY);
-		if (status1 & WM8350_CHG_VBATT_LT_3P9_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_3P9);
-		if (status1 & WM8350_CHG_VBATT_LT_3P1_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_3P1);
-		if (status1 & WM8350_CHG_VBATT_LT_2P85_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CHG_VBATT_LT_2P85);
-		if (status1 & WM8350_RTC_ALM_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_ALM);
-		if (status1 & WM8350_RTC_SEC_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_SEC);
-		if (status1 & WM8350_RTC_PER_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_RTC_PER);
-	}
-
-	/* current sink, system, aux adc */
-	if (status2) {
-		if (status2 & WM8350_CS1_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS1);
-		if (status2 & WM8350_CS2_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CS2);
-
-		if (status2 & WM8350_SYS_HYST_COMP_FAIL_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_HYST_COMP_FAIL);
-		if (status2 & WM8350_SYS_CHIP_GT115_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_CHIP_GT115);
-		if (status2 & WM8350_SYS_CHIP_GT140_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_CHIP_GT140);
-		if (status2 & WM8350_SYS_WDOG_TO_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_SYS_WDOG_TO);
-
-		if (status2 & WM8350_AUXADC_DATARDY_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DATARDY);
-		if (status2 & WM8350_AUXADC_DCOMP4_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP4);
-		if (status2 & WM8350_AUXADC_DCOMP3_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP3);
-		if (status2 & WM8350_AUXADC_DCOMP2_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP2);
-		if (status2 & WM8350_AUXADC_DCOMP1_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_AUXADC_DCOMP1);
-
-		if (status2 & WM8350_USB_LIMIT_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_USB_LIMIT);
-	}
+	if (!level_one)
+		return IRQ_NONE;
 
-	/* wake, codec, ext */
-	if (comp) {
-		if (comp & WM8350_WKUP_OFF_STATE_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_OFF_STATE);
-		if (comp & WM8350_WKUP_HIB_STATE_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_HIB_STATE);
-		if (comp & WM8350_WKUP_CONV_FAULT_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_CONV_FAULT);
-		if (comp & WM8350_WKUP_WDOG_RST_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_WDOG_RST);
-		if (comp & WM8350_WKUP_GP_PWR_ON_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_GP_PWR_ON);
-		if (comp & WM8350_WKUP_ONKEY_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_WKUP_ONKEY);
-		if (comp & WM8350_WKUP_GP_WAKEUP_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_WKUP_GP_WAKEUP);
-
-		if (comp & WM8350_CODEC_JCK_DET_L_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_JCK_DET_L);
-		if (comp & WM8350_CODEC_JCK_DET_R_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_JCK_DET_R);
-		if (comp & WM8350_CODEC_MICSCD_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_CODEC_MICSCD);
-		if (comp & WM8350_CODEC_MICD_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_CODEC_MICD);
-
-		if (comp & WM8350_EXT_USB_FB_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_USB_FB);
-		if (comp & WM8350_EXT_WALL_FB_EINT)
-			wm8350_irq_call_handler(wm8350,
-						WM8350_IRQ_EXT_WALL_FB);
-		if (comp & WM8350_EXT_BAT_FB_EINT)
-			wm8350_irq_call_handler(wm8350, WM8350_IRQ_EXT_BAT_FB);
-	}
+	memset(&read_done, 0, sizeof(read_done));
 
-	if (level_one & WM8350_GP_INT) {
-		int i;
-		u16 gpio;
+	for (i = 0; i < ARRAY_SIZE(wm8350_irqs); i++) {
+		data = &wm8350_irqs[i];
 
-		gpio = wm8350_reg_read(wm8350, WM8350_GPIO_INT_STATUS);
-		gpio &= ~wm8350_reg_read(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK);
+		if (!(level_one & data->primary))
+			continue;
 
-		for (i = 0; i < 12; i++) {
-			if (gpio & (1 << i))
-				wm8350_irq_call_handler(wm8350,
-							WM8350_IRQ_GPIO(i));
+		if (!read_done[data->reg]) {
+			sub_reg[data->reg] =
+				wm8350_reg_read(wm8350, WM8350_INT_STATUS_1 +
+						data->reg);
+			sub_reg[data->reg] &=
+				~wm8350_reg_read(wm8350,
+						 WM8350_INT_STATUS_1_MASK +
+						 data->reg);
+			read_done[data->reg] = 1;
 		}
+
+		if (sub_reg[data->reg] & data->mask)
+			wm8350_irq_call_handler(wm8350, i);
 	}
 
 	return IRQ_HANDLED;
@@ -280,479 +463,17 @@ EXPORT_SYMBOL_GPL(wm8350_free_irq);
 
 int wm8350_mask_irq(struct wm8350 *wm8350, int irq)
 {
-	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_HOT_EINT);
-	case WM8350_IRQ_CHG_BAT_COLD:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_COLD_EINT);
-	case WM8350_IRQ_CHG_BAT_FAIL:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_BAT_FAIL_EINT);
-	case WM8350_IRQ_CHG_TO:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_TO_EINT);
-	case WM8350_IRQ_CHG_END:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_END_EINT);
-	case WM8350_IRQ_CHG_START:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_START_EINT);
-	case WM8350_IRQ_CHG_FAST_RDY:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_FAST_RDY_EINT);
-	case WM8350_IRQ_RTC_PER:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_PER_EINT);
-	case WM8350_IRQ_RTC_SEC:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_SEC_EINT);
-	case WM8350_IRQ_RTC_ALM:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_RTC_ALM_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P9:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_3P9_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_3P1_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_2P85:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-				       WM8350_IM_CHG_VBATT_LT_2P85_EINT);
-	case WM8350_IRQ_CS1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_CS1_EINT);
-	case WM8350_IRQ_CS2:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_CS2_EINT);
-	case WM8350_IRQ_USB_LIMIT:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_USB_LIMIT_EINT);
-	case WM8350_IRQ_AUXADC_DATARDY:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DATARDY_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP4:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP4_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP3:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP3_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP2:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP2_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP1:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_AUXADC_DCOMP1_EINT);
-	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT115:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_CHIP_GT115_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT140:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_CHIP_GT140_EINT);
-	case WM8350_IRQ_SYS_WDOG_TO:
-		return wm8350_set_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-				       WM8350_IM_SYS_WDOG_TO_EINT);
-	case WM8350_IRQ_UV_LDO4:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO4_EINT);
-	case WM8350_IRQ_UV_LDO3:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO3_EINT);
-	case WM8350_IRQ_UV_LDO2:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO2_EINT);
-	case WM8350_IRQ_UV_LDO1:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_LDO1_EINT);
-	case WM8350_IRQ_UV_DC6:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC6_EINT);
-	case WM8350_IRQ_UV_DC5:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC5_EINT);
-	case WM8350_IRQ_UV_DC4:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC4_EINT);
-	case WM8350_IRQ_UV_DC3:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC3_EINT);
-	case WM8350_IRQ_UV_DC2:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC2_EINT);
-	case WM8350_IRQ_UV_DC1:
-		return wm8350_set_bits(wm8350,
-				       WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-				       WM8350_IM_UV_DC1_EINT);
-	case WM8350_IRQ_OC_LS:
-		return wm8350_set_bits(wm8350,
-				       WM8350_OVER_CURRENT_INT_STATUS_MASK,
-				       WM8350_IM_OC_LS_EINT);
-	case WM8350_IRQ_EXT_USB_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_USB_FB_EINT);
-	case WM8350_IRQ_EXT_WALL_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_WALL_FB_EINT);
-	case WM8350_IRQ_EXT_BAT_FB:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_EXT_BAT_FB_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_L:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_JCK_DET_L_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_R:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_JCK_DET_R_EINT);
-	case WM8350_IRQ_CODEC_MICSCD:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_MICSCD_EINT);
-	case WM8350_IRQ_CODEC_MICD:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_CODEC_MICD_EINT);
-	case WM8350_IRQ_WKUP_OFF_STATE:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_HIB_STATE:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_HIB_STATE_EINT);
-	case WM8350_IRQ_WKUP_CONV_FAULT:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_CONV_FAULT_EINT);
-	case WM8350_IRQ_WKUP_WDOG_RST:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_GP_PWR_ON:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_GP_PWR_ON_EINT);
-	case WM8350_IRQ_WKUP_ONKEY:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_ONKEY_EINT);
-	case WM8350_IRQ_WKUP_GP_WAKEUP:
-		return wm8350_set_bits(wm8350,
-				       WM8350_COMPARATOR_INT_STATUS_MASK,
-				       WM8350_IM_WKUP_GP_WAKEUP_EINT);
-	case WM8350_IRQ_GPIO(0):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP0_EINT);
-	case WM8350_IRQ_GPIO(1):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP1_EINT);
-	case WM8350_IRQ_GPIO(2):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP2_EINT);
-	case WM8350_IRQ_GPIO(3):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP3_EINT);
-	case WM8350_IRQ_GPIO(4):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP4_EINT);
-	case WM8350_IRQ_GPIO(5):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP5_EINT);
-	case WM8350_IRQ_GPIO(6):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP6_EINT);
-	case WM8350_IRQ_GPIO(7):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP7_EINT);
-	case WM8350_IRQ_GPIO(8):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP8_EINT);
-	case WM8350_IRQ_GPIO(9):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP9_EINT);
-	case WM8350_IRQ_GPIO(10):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP10_EINT);
-	case WM8350_IRQ_GPIO(11):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP11_EINT);
-	case WM8350_IRQ_GPIO(12):
-		return wm8350_set_bits(wm8350,
-				       WM8350_GPIO_INT_STATUS_MASK,
-				       WM8350_IM_GP12_EINT);
-	default:
-		dev_warn(wm8350->dev, "Attempting to mask unknown IRQ %d\n",
-			 irq);
-		return -EINVAL;
-	}
-	return 0;
+	return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK +
+			       wm8350_irqs[irq].reg,
+			       wm8350_irqs[irq].mask);
 }
 EXPORT_SYMBOL_GPL(wm8350_mask_irq);
 
 int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
 {
-	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_HOT_EINT);
-	case WM8350_IRQ_CHG_BAT_COLD:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_COLD_EINT);
-	case WM8350_IRQ_CHG_BAT_FAIL:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_BAT_FAIL_EINT);
-	case WM8350_IRQ_CHG_TO:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_TO_EINT);
-	case WM8350_IRQ_CHG_END:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_END_EINT);
-	case WM8350_IRQ_CHG_START:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_START_EINT);
-	case WM8350_IRQ_CHG_FAST_RDY:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_FAST_RDY_EINT);
-	case WM8350_IRQ_RTC_PER:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_PER_EINT);
-	case WM8350_IRQ_RTC_SEC:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_SEC_EINT);
-	case WM8350_IRQ_RTC_ALM:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_RTC_ALM_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P9:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_3P9_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_3P1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_3P1_EINT);
-	case WM8350_IRQ_CHG_VBATT_LT_2P85:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK,
-					 WM8350_IM_CHG_VBATT_LT_2P85_EINT);
-	case WM8350_IRQ_CS1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_CS1_EINT);
-	case WM8350_IRQ_CS2:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_CS2_EINT);
-	case WM8350_IRQ_USB_LIMIT:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_USB_LIMIT_EINT);
-	case WM8350_IRQ_AUXADC_DATARDY:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DATARDY_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP4:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP4_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP3:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP3_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP2:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP2_EINT);
-	case WM8350_IRQ_AUXADC_DCOMP1:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_AUXADC_DCOMP1_EINT);
-	case WM8350_IRQ_SYS_HYST_COMP_FAIL:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_HYST_COMP_FAIL_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT115:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_CHIP_GT115_EINT);
-	case WM8350_IRQ_SYS_CHIP_GT140:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_CHIP_GT140_EINT);
-	case WM8350_IRQ_SYS_WDOG_TO:
-		return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_2_MASK,
-					 WM8350_IM_SYS_WDOG_TO_EINT);
-	case WM8350_IRQ_UV_LDO4:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO4_EINT);
-	case WM8350_IRQ_UV_LDO3:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO3_EINT);
-	case WM8350_IRQ_UV_LDO2:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO2_EINT);
-	case WM8350_IRQ_UV_LDO1:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_LDO1_EINT);
-	case WM8350_IRQ_UV_DC6:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC6_EINT);
-	case WM8350_IRQ_UV_DC5:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC5_EINT);
-	case WM8350_IRQ_UV_DC4:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC4_EINT);
-	case WM8350_IRQ_UV_DC3:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC3_EINT);
-	case WM8350_IRQ_UV_DC2:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC2_EINT);
-	case WM8350_IRQ_UV_DC1:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_UNDER_VOLTAGE_INT_STATUS_MASK,
-					 WM8350_IM_UV_DC1_EINT);
-	case WM8350_IRQ_OC_LS:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_OVER_CURRENT_INT_STATUS_MASK,
-					 WM8350_IM_OC_LS_EINT);
-	case WM8350_IRQ_EXT_USB_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_USB_FB_EINT);
-	case WM8350_IRQ_EXT_WALL_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_WALL_FB_EINT);
-	case WM8350_IRQ_EXT_BAT_FB:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_EXT_BAT_FB_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_L:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_JCK_DET_L_EINT);
-	case WM8350_IRQ_CODEC_JCK_DET_R:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_JCK_DET_R_EINT);
-	case WM8350_IRQ_CODEC_MICSCD:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_MICSCD_EINT);
-	case WM8350_IRQ_CODEC_MICD:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_CODEC_MICD_EINT);
-	case WM8350_IRQ_WKUP_OFF_STATE:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_HIB_STATE:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_HIB_STATE_EINT);
-	case WM8350_IRQ_WKUP_CONV_FAULT:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_CONV_FAULT_EINT);
-	case WM8350_IRQ_WKUP_WDOG_RST:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_OFF_STATE_EINT);
-	case WM8350_IRQ_WKUP_GP_PWR_ON:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_GP_PWR_ON_EINT);
-	case WM8350_IRQ_WKUP_ONKEY:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_ONKEY_EINT);
-	case WM8350_IRQ_WKUP_GP_WAKEUP:
-		return wm8350_clear_bits(wm8350,
-					 WM8350_COMPARATOR_INT_STATUS_MASK,
-					 WM8350_IM_WKUP_GP_WAKEUP_EINT);
-	case WM8350_IRQ_GPIO(0):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP0_EINT);
-	case WM8350_IRQ_GPIO(1):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP1_EINT);
-	case WM8350_IRQ_GPIO(2):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP2_EINT);
-	case WM8350_IRQ_GPIO(3):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP3_EINT);
-	case WM8350_IRQ_GPIO(4):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP4_EINT);
-	case WM8350_IRQ_GPIO(5):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP5_EINT);
-	case WM8350_IRQ_GPIO(6):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP6_EINT);
-	case WM8350_IRQ_GPIO(7):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP7_EINT);
-	case WM8350_IRQ_GPIO(8):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP8_EINT);
-	case WM8350_IRQ_GPIO(9):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP9_EINT);
-	case WM8350_IRQ_GPIO(10):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP10_EINT);
-	case WM8350_IRQ_GPIO(11):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP11_EINT);
-	case WM8350_IRQ_GPIO(12):
-		return wm8350_clear_bits(wm8350,
-					 WM8350_GPIO_INT_STATUS_MASK,
-					 WM8350_IM_GP12_EINT);
-	default:
-		dev_warn(wm8350->dev, "Attempting to unmask unknown IRQ %d\n",
-			 irq);
-		return -EINVAL;
-	}
-	return 0;
+	return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK +
+				 wm8350_irqs[irq].reg,
+				 wm8350_irqs[irq].mask);
 }
 EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
 
diff --git a/include/linux/mfd/wm8350/gpio.h b/include/linux/mfd/wm8350/gpio.h
index ed91e8f5d298..71af3d6ebe9d 100644
--- a/include/linux/mfd/wm8350/gpio.h
+++ b/include/linux/mfd/wm8350/gpio.h
@@ -172,6 +172,24 @@
 #define WM8350_GPIO_DEBOUNCE_OFF		0
 #define WM8350_GPIO_DEBOUNCE_ON			1
 
+/*
+ * R30 (0x1E) - GPIO Interrupt Status
+ */
+#define WM8350_GP12_EINT                        0x1000
+#define WM8350_GP11_EINT                        0x0800
+#define WM8350_GP10_EINT                        0x0400
+#define WM8350_GP9_EINT                         0x0200
+#define WM8350_GP8_EINT                         0x0100
+#define WM8350_GP7_EINT                         0x0080
+#define WM8350_GP6_EINT                         0x0040
+#define WM8350_GP5_EINT                         0x0020
+#define WM8350_GP4_EINT                         0x0010
+#define WM8350_GP3_EINT                         0x0008
+#define WM8350_GP2_EINT                         0x0004
+#define WM8350_GP1_EINT                         0x0002
+#define WM8350_GP0_EINT                         0x0001
+
+
 /*
  * R128 (0x80) - GPIO Debounce
  */
-- 
cgit v1.2.3


From 68d641efd86d901d000b888eeab5481257d49f12 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 14 Oct 2009 02:12:33 +0400
Subject: mfd: Fix memleak in pcf50633_client_dev_register

Since platform_device_add_data copies the passed data, the allocated
subdev_pdata is never freed. A simple fix would be to either free subdev_pdata
or put it onto the stack. But since the pcf50633 child devices can rely on
beeing children of the pcf50633 core device it's much more elegant to get access
to pcf50633 core structure through that link. This allows to get completly rid
of pcf5033_subdev_pdata.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/misc/pcf50633-input.c |  7 +++----
 drivers/mfd/pcf50633-adc.c          |  5 ++---
 drivers/mfd/pcf50633-core.c         | 10 ----------
 drivers/power/pcf50633-charger.c    |  3 +--
 drivers/rtc/rtc-pcf50633.c          |  5 +----
 include/linux/mfd/pcf50633/core.h   | 10 +++++-----
 6 files changed, 12 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/misc/pcf50633-input.c b/drivers/input/misc/pcf50633-input.c
index 039dcb00ebd9..008de0c5834b 100644
--- a/drivers/input/misc/pcf50633-input.c
+++ b/drivers/input/misc/pcf50633-input.c
@@ -55,7 +55,6 @@ pcf50633_input_irq(int irq, void *data)
 static int __devinit pcf50633_input_probe(struct platform_device *pdev)
 {
 	struct pcf50633_input *input;
-	struct pcf50633_subdev_pdata *pdata = pdev->dev.platform_data;
 	struct input_dev *input_dev;
 	int ret;
 
@@ -71,7 +70,7 @@ static int __devinit pcf50633_input_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, input);
-	input->pcf = pdata->pcf;
+	input->pcf = dev_to_pcf50633(pdev->dev.parent);
 	input->input_dev = input_dev;
 
 	input_dev->name = "PCF50633 PMU events";
@@ -85,9 +84,9 @@ static int __devinit pcf50633_input_probe(struct platform_device *pdev)
 		kfree(input);
 		return ret;
 	}
-	pcf50633_register_irq(pdata->pcf, PCF50633_IRQ_ONKEYR,
+	pcf50633_register_irq(input->pcf, PCF50633_IRQ_ONKEYR,
 				pcf50633_input_irq, input);
-	pcf50633_register_irq(pdata->pcf, PCF50633_IRQ_ONKEYF,
+	pcf50633_register_irq(input->pcf, PCF50633_IRQ_ONKEYF,
 				pcf50633_input_irq, input);
 
 	return 0;
diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c
index 3d31e97d6a45..6d2e8466df1d 100644
--- a/drivers/mfd/pcf50633-adc.c
+++ b/drivers/mfd/pcf50633-adc.c
@@ -209,17 +209,16 @@ static void pcf50633_adc_irq(int irq, void *data)
 
 static int __devinit pcf50633_adc_probe(struct platform_device *pdev)
 {
-	struct pcf50633_subdev_pdata *pdata = pdev->dev.platform_data;
 	struct pcf50633_adc *adc;
 
 	adc = kzalloc(sizeof(*adc), GFP_KERNEL);
 	if (!adc)
 		return -ENOMEM;
 
-	adc->pcf = pdata->pcf;
+	adc->pcf = dev_to_pcf50633(pdev->dev.parent);
 	platform_set_drvdata(pdev, adc);
 
-	pcf50633_register_irq(pdata->pcf, PCF50633_IRQ_ADCRDY,
+	pcf50633_register_irq(adc->pcf, PCF50633_IRQ_ADCRDY,
 					pcf50633_adc_irq, adc);
 
 	mutex_init(&adc->queue_mutex);
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 48776d3018ed..69cdbdcd2e82 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -456,7 +456,6 @@ static void
 pcf50633_client_dev_register(struct pcf50633 *pcf, const char *name,
 						struct platform_device **pdev)
 {
-	struct pcf50633_subdev_pdata *subdev_pdata;
 	int ret;
 
 	*pdev = platform_device_alloc(name, -1);
@@ -465,15 +464,6 @@ pcf50633_client_dev_register(struct pcf50633 *pcf, const char *name,
 		return;
 	}
 
-	subdev_pdata = kmalloc(sizeof(*subdev_pdata), GFP_KERNEL);
-	if (!subdev_pdata) {
-		dev_err(pcf->dev, "Error allocating subdev pdata\n");
-		platform_device_put(*pdev);
-	}
-
-	subdev_pdata->pcf = pcf;
-	platform_device_add_data(*pdev, subdev_pdata, sizeof(*subdev_pdata));
-
 	(*pdev)->dev.parent = pcf->dev;
 
 	ret = platform_device_add(*pdev);
diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c
index e8b278f71781..6a84a8eb8d7a 100644
--- a/drivers/power/pcf50633-charger.c
+++ b/drivers/power/pcf50633-charger.c
@@ -303,7 +303,6 @@ static const u8 mbc_irq_handlers[] = {
 static int __devinit pcf50633_mbc_probe(struct platform_device *pdev)
 {
 	struct pcf50633_mbc *mbc;
-	struct pcf50633_subdev_pdata *pdata = pdev->dev.platform_data;
 	int ret;
 	int i;
 	u8 mbcs1;
@@ -313,7 +312,7 @@ static int __devinit pcf50633_mbc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, mbc);
-	mbc->pcf = pdata->pcf;
+	mbc->pcf = dev_to_pcf50633(pdev->dev.parent);
 
 	/* Set up IRQ handlers */
 	for (i = 0; i < ARRAY_SIZE(mbc_irq_handlers); i++)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 4c5d5d0c4cfc..9b74e9c9151c 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -277,16 +277,13 @@ static void pcf50633_rtc_irq(int irq, void *data)
 
 static int __devinit pcf50633_rtc_probe(struct platform_device *pdev)
 {
-	struct pcf50633_subdev_pdata *pdata;
 	struct pcf50633_rtc *rtc;
 
-
 	rtc = kzalloc(sizeof(*rtc), GFP_KERNEL);
 	if (!rtc)
 		return -ENOMEM;
 
-	pdata = pdev->dev.platform_data;
-	rtc->pcf = pdata->pcf;
+	rtc->pcf = dev_to_pcf50633(pdev->dev.parent);
 	platform_set_drvdata(pdev, rtc);
 	rtc->rtc_dev = rtc_device_register("pcf50633-rtc", &pdev->dev,
 				&pcf50633_rtc_ops, THIS_MODULE);
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index 9aba7b779fbc..d9034cc87f18 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -40,10 +40,6 @@ struct pcf50633_platform_data {
 	u8 resumers[5];
 };
 
-struct pcf50633_subdev_pdata {
-	struct pcf50633 *pcf;
-};
-
 struct pcf50633_irq {
 	void (*handler) (int, void *);
 	void *data;
@@ -217,5 +213,9 @@ enum pcf50633_reg_int5 {
 #define PCF50633_REG_LEDCTL 0x2a
 #define PCF50633_REG_LEDDIM 0x2b
 
-#endif
+static inline struct pcf50633 *dev_to_pcf50633(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
 
+#endif
-- 
cgit v1.2.3


From b4ead61e570d7b7bcf20a5a1733dd0bc37236c99 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@verdurent.com>
Date: Mon, 19 Oct 2009 15:11:00 +0300
Subject: mfd: Add support for remapping twl4030-power power states

The <RESOURCE>_REMAP register allows configuration of the <RESOURCE> in case
of a sleep or off transition.

Allow this property of resources to be configured (through twl4030_resconfig)
and add code to parse these values to program the registers accordingly.

Signed-off-by: Amit Kucheria <amit.kucheria@verdurent.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl4030.h |  3 +++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 2c38ac17ab64..3e41e0c0e4c6 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -75,6 +75,8 @@ static u8 twl4030_start_script_address = 0x2b;
 */
 #define DEV_GRP_OFFSET		0
 #define TYPE_OFFSET		1
+#define REMAP_OFFSET		2
+#define DEDICATED_OFFSET	3
 
 /* Bit positions in the registers */
 
@@ -88,6 +90,12 @@ static u8 twl4030_start_script_address = 0x2b;
 #define TYPE2_SHIFT		3
 #define TYPE2_MASK		(3 << TYPE2_SHIFT)
 
+/* <RESOURCE>_REMAP */
+#define SLEEP_STATE_SHIFT	0
+#define SLEEP_STATE_MASK	(0xf << SLEEP_STATE_SHIFT)
+#define OFF_STATE_SHIFT		4
+#define OFF_STATE_MASK		(0xf << OFF_STATE_SHIFT)
+
 static u8 res_config_addrs[] = {
 	[RES_VAUX1]	= 0x17,
 	[RES_VAUX2]	= 0x1b,
@@ -325,6 +333,7 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 	int err;
 	u8 type;
 	u8 grp;
+	u8 remap;
 
 	if (rconfig->resource > TOTAL_RESOURCES) {
 		pr_err("TWL4030 Resource %d does not exist\n",
@@ -380,6 +389,33 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 		return err;
 	}
 
+	/* Set remap states */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &remap,
+				rconfig_addr + REMAP_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 Resource %d remap could not be read\n",
+			rconfig->resource);
+		return err;
+	}
+
+	if (rconfig->remap_off >= 0) {
+		remap &= ~OFF_STATE_MASK;
+		remap |= rconfig->remap_off << OFF_STATE_SHIFT;
+	}
+
+	if (rconfig->remap_sleep >= 0) {
+		remap &= ~SLEEP_STATE_MASK;
+		remap |= rconfig->remap_off << SLEEP_STATE_SHIFT;
+	}
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+				remap,
+				rconfig_addr + REMAP_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 failed to program remap\n");
+		return err;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 5306a759cbde..e87cb270d8a1 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -250,6 +250,7 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 #define RES_TYPE_ALL		0x7
 
+/* Resource states */
 #define RES_STATE_WRST		0xF
 #define RES_STATE_ACTIVE	0xE
 #define RES_STATE_SLEEP		0x8
@@ -391,6 +392,8 @@ struct twl4030_resconfig {
 	u8 devgroup;	/* Processor group that Power resource belongs to */
 	u8 type;	/* Power resource addressed, 6 / broadcast message */
 	u8 type2;	/* Power resource addressed, 3 / broadcast message */
+	u8 remap_off;	/* off state remapping */
+	u8 remap_sleep;	/* sleep state remapping */
 };
 
 struct twl4030_power_data {
-- 
cgit v1.2.3


From 56baa667973e53d6d38af2ad3731d558566d818b Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Mon, 19 Oct 2009 21:24:02 +0200
Subject: mfd: fix undefined twl4030-power resconfig value checks

The code tries to skip values initialized with -1, but since the values
are unsigned the comparison is always true.

The patch eliminates the following compiler warnings:

drivers/mfd/twl4030-power.c: In function 'twl4030_configure_resource':
drivers/mfd/twl4030-power.c:338: warning: comparison is always true due to
limited range of data type
drivers/mfd/twl4030-power.c:358: warning: comparison is always true due to
limited range of data type
drivers/mfd/twl4030-power.c:363: warning: comparison is always true due to
limited range of data type

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 6 +++---
 include/linux/i2c/twl4030.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 3e41e0c0e4c6..9f98c36273d8 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -352,7 +352,7 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 		return err;
 	}
 
-	if (rconfig->devgroup >= 0) {
+	if (rconfig->devgroup != TWL4030_RESCONFIG_UNDEF) {
 		grp &= ~DEV_GRP_MASK;
 		grp |= rconfig->devgroup << DEV_GRP_SHIFT;
 		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
@@ -372,12 +372,12 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 		return err;
 	}
 
-	if (rconfig->type >= 0) {
+	if (rconfig->type != TWL4030_RESCONFIG_UNDEF) {
 		type &= ~TYPE_MASK;
 		type |= rconfig->type << TYPE_SHIFT;
 	}
 
-	if (rconfig->type2 >= 0) {
+	if (rconfig->type2 != TWL4030_RESCONFIG_UNDEF) {
 		type &= ~TYPE2_MASK;
 		type |= rconfig->type2 << TYPE2_SHIFT;
 	}
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index e87cb270d8a1..8a4a58ff8dad 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -400,6 +400,7 @@ struct twl4030_power_data {
 	struct twl4030_script **scripts;
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
+#define TWL4030_RESCONFIG_UNDEF	((u8)-1)
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
-- 
cgit v1.2.3


From 75b75722b4eb1032b3fe2e56b7015f23c6080529 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 21 Oct 2009 19:11:34 +0100
Subject: mfd: Allow platforms to specify an IRQ base for WM8350

This is currently unused by the wm8350 drivers but getting it merged
now will reduce merge issues in the future when implementing wm8350
genirq support.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 32197fde904d..ffce508a9109 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -646,10 +646,12 @@ struct wm8350 {
  * @init: Function called during driver initialisation.  Should be
  *        used by the platform to configure GPIO functions and similar.
  * @irq_high: Set if WM8350 IRQ is active high.
+ * @irq_base: Base IRQ for genirq (not currently used).
  */
 struct wm8350_platform_data {
 	int (*init)(struct wm8350 *wm8350);
 	int irq_high;
+	int irq_base;
 };
 
 
-- 
cgit v1.2.3


From 38a684963f619eb9117cb898b92bde92cdd09127 Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Date: Thu, 22 Oct 2009 14:14:09 +0300
Subject: mfd: Enable twl4030 32kHz oscillator low-power mode

Allows TWL's 32kHz oscillator to go in low-power mode when
main battery voltage is running low.

Signed-off-by: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-core.c  | 9 +++++++--
 include/linux/i2c/twl4030.h | 5 +++++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index e3abbf66cc5c..90a38e43c313 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -183,6 +183,7 @@
 #define HFCLK_FREQ_26_MHZ		(2 << 0)
 #define HFCLK_FREQ_38p4_MHZ		(3 << 0)
 #define HIGH_PERF_SQ			(1 << 3)
+#define CK32K_LOWPWR_EN			(1 << 7)
 
 
 /* chip-specific feature flags, for i2c_device_id.driver_data */
@@ -695,7 +696,8 @@ static inline int __init unprotect_pm_master(void)
 	return e;
 }
 
-static void clocks_init(struct device *dev)
+static void clocks_init(struct device *dev,
+			struct twl4030_clock_init_data *clock)
 {
 	int e = 0;
 	struct clk *osc;
@@ -742,6 +744,9 @@ static void clocks_init(struct device *dev)
 	}
 
 	ctrl |= HIGH_PERF_SQ;
+	if (clock && clock->ck32k_lowpwr_enable)
+		ctrl |= CK32K_LOWPWR_EN;
+
 	e |= unprotect_pm_master();
 	/* effect->MADC+USB ck en */
 	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
@@ -820,7 +825,7 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	inuse = true;
 
 	/* setup clock framework */
-	clocks_init(&client->dev);
+	clocks_init(&client->dev, pdata->clock);
 
 	/* load power event scripts */
 	if (twl_has_power() && pdata->power)
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 8a4a58ff8dad..0c84cfa059e9 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -313,6 +313,10 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /*----------------------------------------------------------------------*/
 
+struct twl4030_clock_init_data {
+	bool ck32k_lowpwr_enable;
+};
+
 struct twl4030_bci_platform_data {
 	int *battery_tmp_tbl;
 	unsigned int tblsize;
@@ -425,6 +429,7 @@ struct twl4030_codec_data {
 
 struct twl4030_platform_data {
 	unsigned				irq_base, irq_end;
+	struct twl4030_clock_init_data		*clock;
 	struct twl4030_bci_platform_data	*bci;
 	struct twl4030_gpio_platform_data	*gpio;
 	struct twl4030_madc_platform_data	*madc;
-- 
cgit v1.2.3


From b45440c33a0bdd824b98e4a4c56767c50d3275eb Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 2 Nov 2009 16:52:30 +0000
Subject: mfd: Allow configuration of VDCDC2 for tps65010

Add function to allow the configuation fo the VDCDC2 register by
external users, to allow changing of the standard and low-power
running modes.

This is needed, for example, for the Simtec IM2440D20 where we need
to use the low-power mode to shutdown the LDO/DCDC that are not needed
during suspend (saving substantial power) and the runtime use of the
low-power mode to change VCore.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65010.c       | 28 ++++++++++++++++++++++++++++
 include/linux/i2c/tps65010.h | 19 +++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c
index 755c4030ea31..e5955306c2fa 100644
--- a/drivers/mfd/tps65010.c
+++ b/drivers/mfd/tps65010.c
@@ -964,6 +964,34 @@ int tps65010_config_vregs1(unsigned value)
 }
 EXPORT_SYMBOL(tps65010_config_vregs1);
 
+int tps65010_config_vdcdc2(unsigned value)
+{
+	struct i2c_client *c;
+	int	 status;
+
+	if (!the_tps)
+		return -ENODEV;
+
+	c = the_tps->client;
+	mutex_lock(&the_tps->lock);
+
+	pr_debug("%s: vdcdc2 0x%02x\n", DRIVER_NAME,
+		 i2c_smbus_read_byte_data(c, TPS_VDCDC2));
+
+	status = i2c_smbus_write_byte_data(c, TPS_VDCDC2, value);
+
+	if (status != 0)
+		printk(KERN_ERR "%s: Failed to write vdcdc2 register\n",
+			DRIVER_NAME);
+	else
+		pr_debug("%s: vregs1 0x%02x\n", DRIVER_NAME,
+			 i2c_smbus_read_byte_data(c, TPS_VDCDC2));
+
+	mutex_unlock(&the_tps->lock);
+	return status;
+}
+EXPORT_SYMBOL(tps65010_config_vdcdc2);
+
 /*-------------------------------------------------------------------------*/
 /* tps65013_set_low_pwr parameter:
  * mode: ON or OFF
diff --git a/include/linux/i2c/tps65010.h b/include/linux/i2c/tps65010.h
index 918c5354d9b8..08aa92278d71 100644
--- a/include/linux/i2c/tps65010.h
+++ b/include/linux/i2c/tps65010.h
@@ -72,6 +72,21 @@
 #define	TPS_VDCDC1		0x0c
 #	define	TPS_ENABLE_LP		(1 << 3)
 #define	TPS_VDCDC2		0x0d
+#	define	TPS_LP_COREOFF	(1 << 7)
+#	define 	TPS_VCORE_1_8V	(7<<4)
+#	define 	TPS_VCORE_1_5V	(6 << 4)
+#	define 	TPS_VCORE_1_4V	(5 << 4)
+#	define 	TPS_VCORE_1_3V	(4 << 4)
+#	define 	TPS_VCORE_1_2V	(3 << 4)
+#	define 	TPS_VCORE_1_1V	(2 << 4)
+#	define 	TPS_VCORE_1_0V	(1 << 4)
+#	define 	TPS_VCORE_0_85V	(0 << 4)
+#	define	TPS_VCORE_LP_1_2V (3 << 2)
+#	define	TPS_VCORE_LP_1_1V (2 << 2)
+#	define	TPS_VCORE_LP_1_0V (1 << 2)
+#	define	TPS_VCORE_LP_0_85V (0 << 2)
+#	define	TPS_VIB		(1 << 1)
+#	define	TPS_VCORE_DISCH	(1 << 0)
 #define	TPS_VREGS1		0x0e
 #	define	TPS_LDO2_ENABLE	(1 << 7)
 #	define	TPS_LDO2_OFF	(1 << 6)
@@ -152,6 +167,10 @@ extern int tps65010_config_vregs1(unsigned value);
  */
 extern int tps65013_set_low_pwr(unsigned mode);
 
+/* tps65010_set_vdcdc2
+ *  value to be written to VDCDC2
+ */
+extern int tps65010_config_vdcdc2(unsigned value);
 
 struct i2c_client;
 
-- 
cgit v1.2.3


From 5a65edbc12b6b34ef912114f1fc8215786f85b25 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 4 Nov 2009 16:10:51 +0000
Subject: mfd: Convert wm8350 IRQ handlers to irq_handler_t

This is done as simple code transformation, the semantics of the
IRQ API provided by the core are are still very different to those
of genirq (mainly with regard to masking).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c             |  6 +++---
 drivers/power/wm8350_power.c         | 39 +++++++++++++++++++++++-------------
 drivers/regulator/wm8350-regulator.c |  7 +++++--
 drivers/rtc/rtc-wm8350.c             | 18 +++++++++++------
 include/linux/mfd/wm8350/core.h      |  8 ++++----
 sound/soc/codecs/wm8350.c            | 15 +++++++++-----
 6 files changed, 59 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index d9abfc94c685..2ea2b8b4c72a 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -371,7 +371,7 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
 	mutex_lock(&wm8350->irq_mutex);
 
 	if (wm8350->irq[irq].handler)
-		wm8350->irq[irq].handler(wm8350, irq, wm8350->irq[irq].data);
+		wm8350->irq[irq].handler(irq, wm8350->irq[irq].data);
 	else {
 		dev_err(wm8350->dev, "irq %d nobody cared. now masked.\n",
 			irq);
@@ -431,8 +431,8 @@ static irqreturn_t wm8350_irq(int irq, void *irq_data)
 }
 
 int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			void (*handler) (struct wm8350 *, int, void *),
-			void *data)
+			irq_handler_t handler, unsigned long flags,
+			const char *name, void *data)
 {
 	if (irq < 0 || irq > WM8350_NUM_IRQ || !handler)
 		return -EINVAL;
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 28b0299c0043..6e634cf7fc14 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -184,8 +184,9 @@ static ssize_t charger_state_show(struct device *dev,
 
 static DEVICE_ATTR(charger_state, 0444, charger_state_show, NULL);
 
-static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
+static irqreturn_t wm8350_charger_handler(int irq, void *data)
 {
+	struct wm8350 *wm8350 = data;
 	struct wm8350_power *power = &wm8350->power;
 	struct wm8350_charger_policy *policy = power->policy;
 
@@ -238,6 +239,8 @@ static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
 	default:
 		dev_err(wm8350->dev, "Unknown interrupt %d\n", irq);
 	}
+
+	return IRQ_HANDLED;
 }
 
 /*********************************************************************
@@ -387,45 +390,53 @@ static void wm8350_init_charger(struct wm8350 *wm8350)
 {
 	/* register our interest in charger events */
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "Battery hot", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "Battery cold", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "Battery fail", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Charger timeout", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_TO);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Charge end", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_END);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Charge start", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_START);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Fast charge ready", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Battery <3.9V", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Battery <3.1V", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0,
+			    "Battery <2.85V", wm8350);
+
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
 
 	/* and supply change events */
 	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "USB", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
 	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "Wall", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
 	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB,
-			    wm8350_charger_handler, NULL);
+			    wm8350_charger_handler, 0, "Battery", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
 }
 
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 768bd0e5b48b..8c289fd4add2 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1330,9 +1330,10 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
 	 },
 };
 
-static void pmic_uv_handler(struct wm8350 *wm8350, int irq, void *data)
+static irqreturn_t pmic_uv_handler(int irq, void *data)
 {
 	struct regulator_dev *rdev = (struct regulator_dev *)data;
+	struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
 
 	mutex_lock(&rdev->mutex);
 	if (irq == WM8350_IRQ_CS1 || irq == WM8350_IRQ_CS2)
@@ -1344,6 +1345,8 @@ static void pmic_uv_handler(struct wm8350 *wm8350, int irq, void *data)
 					      REGULATOR_EVENT_UNDER_VOLTAGE,
 					      wm8350);
 	mutex_unlock(&rdev->mutex);
+
+	return IRQ_HANDLED;
 }
 
 static int wm8350_regulator_probe(struct platform_device *pdev)
@@ -1388,7 +1391,7 @@ static int wm8350_regulator_probe(struct platform_device *pdev)
 
 	/* register regulator IRQ */
 	ret = wm8350_register_irq(wm8350, wm8350_reg[pdev->id].irq,
-				  pmic_uv_handler, rdev);
+				  pmic_uv_handler, 0, "UV", rdev);
 	if (ret < 0) {
 		regulator_unregister(rdev);
 		dev_err(&pdev->dev, "failed to register regulator %s IRQ\n",
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index c91edc572eb6..56e56e552813 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -315,9 +315,9 @@ static int wm8350_rtc_update_irq_enable(struct device *dev,
 	return 0;
 }
 
-static void wm8350_rtc_alarm_handler(struct wm8350 *wm8350, int irq,
-				     void *data)
+static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data)
 {
+	struct wm8350 *wm8350 = data;
 	struct rtc_device *rtc = wm8350->rtc.rtc;
 	int ret;
 
@@ -330,14 +330,18 @@ static void wm8350_rtc_alarm_handler(struct wm8350 *wm8350, int irq,
 		dev_err(&(wm8350->rtc.pdev->dev),
 			"Failed to disable alarm: %d\n", ret);
 	}
+
+	return IRQ_HANDLED;
 }
 
-static void wm8350_rtc_update_handler(struct wm8350 *wm8350, int irq,
-				      void *data)
+static irqreturn_t wm8350_rtc_update_handler(int irq, void *data)
 {
+	struct wm8350 *wm8350 = data;
 	struct rtc_device *rtc = wm8350->rtc.rtc;
 
 	rtc_update_irq(rtc, 1, RTC_IRQF | RTC_UF);
+
+	return IRQ_HANDLED;
 }
 
 static const struct rtc_class_ops wm8350_rtc_ops = {
@@ -459,10 +463,12 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
 	wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_PER);
 
 	wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
-			    wm8350_rtc_update_handler, NULL);
+			    wm8350_rtc_update_handler, 0,
+			    "RTC Seconds", wm8350);
 
 	wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM,
-			    wm8350_rtc_alarm_handler, NULL);
+			    wm8350_rtc_alarm_handler, 0,
+			    "RTC Alarm", wm8350);
 	wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_ALM);
 
 	return 0;
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index ffce508a9109..43868899bf49 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -15,7 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
+#include <linux/interrupt.h>
 
 #include <linux/mfd/wm8350/audio.h>
 #include <linux/mfd/wm8350/gpio.h>
@@ -601,7 +601,7 @@ extern const u16 wm8352_mode3_defaults[];
 struct wm8350;
 
 struct wm8350_irq {
-	void (*handler) (struct wm8350 *, int, void *);
+	irq_handler_t handler;
 	void *data;
 };
 
@@ -678,8 +678,8 @@ int wm8350_block_write(struct wm8350 *wm8350, int reg, int size, u16 *src);
  * WM8350 internal interrupts
  */
 int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			void (*handler) (struct wm8350 *, int, void *),
-			void *data);
+			irq_handler_t handler, unsigned long flags,
+			const char *name, void *data);
 int wm8350_free_irq(struct wm8350 *wm8350, int irq);
 int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index f82125d9e85a..17a327d67fd5 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1340,9 +1340,10 @@ static int wm8350_resume(struct platform_device *pdev)
 	return 0;
 }
 
-static void wm8350_hp_jack_handler(struct wm8350 *wm8350, int irq, void *data)
+static irqreturn_t wm8350_hp_jack_handler(int irq, void *data)
 {
 	struct wm8350_data *priv = data;
+	struct wm8350 *wm8350 = priv->codec.control_data;
 	u16 reg;
 	int report;
 	int mask;
@@ -1365,7 +1366,7 @@ static void wm8350_hp_jack_handler(struct wm8350 *wm8350, int irq, void *data)
 
 	if (!jack->jack) {
 		dev_warn(wm8350->dev, "Jack interrupt called with no jack\n");
-		return;
+		return IRQ_NONE;
 	}
 
 	/* Debounce */
@@ -1378,6 +1379,8 @@ static void wm8350_hp_jack_handler(struct wm8350 *wm8350, int irq, void *data)
 		report = 0;
 
 	snd_soc_jack_report(jack->jack, report, jack->report);
+
+	return IRQ_HANDLED;
 }
 
 /**
@@ -1421,7 +1424,7 @@ int wm8350_hp_jack_detect(struct snd_soc_codec *codec, enum wm8350_jack which,
 	wm8350_set_bits(wm8350, WM8350_JACK_DETECT, ena);
 
 	/* Sync status */
-	wm8350_hp_jack_handler(wm8350, irq, priv);
+	wm8350_hp_jack_handler(irq, priv);
 
 	wm8350_unmask_irq(wm8350, irq);
 
@@ -1485,9 +1488,11 @@ static int wm8350_probe(struct platform_device *pdev)
 	wm8350_mask_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L);
 	wm8350_mask_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L,
-			    wm8350_hp_jack_handler, priv);
+			    wm8350_hp_jack_handler, 0, "Left jack detect",
+			    priv);
 	wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R,
-			    wm8350_hp_jack_handler, priv);
+			    wm8350_hp_jack_handler, 0, "Right jack detect",
+			    priv);
 
 	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
 	if (ret < 0) {
-- 
cgit v1.2.3


From 1920a61e208fac73d1a30a7cf4005701802fe69f Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Date: Tue, 10 Nov 2009 17:26:15 +0200
Subject: mfd: Initial support for twl5031

TWL5031 introduces two new interrupts in PIH. Moreover, BCI
has changed remarkably and, thus, it's disabled when TWL5031
is in use.

Signed-off-by: Ilkka Koskinen <ilkka.koskinen@nokia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-core.c  |  13 ++++-
 drivers/mfd/twl4030-irq.c   | 131 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/i2c/twl4030.h |  47 ++++++++++++++--
 3 files changed, 180 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 90a38e43c313..334c86fccede 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -158,6 +158,10 @@
 #define TWL4030_BASEADD_PWMB		0x00F1
 #define TWL4030_BASEADD_KEYPAD		0x00D2
 
+#define TWL5031_BASEADD_ACCESSORY	0x0074 /* Replaces Main Charge */
+#define TWL5031_BASEADD_INTERRUPTS	0x00B9 /* Different than TWL4030's
+						  one */
+
 /* subchip/slave 3 - POWER ID */
 #define TWL4030_BASEADD_BACKUP		0x0014
 #define TWL4030_BASEADD_INT		0x002E
@@ -189,6 +193,7 @@
 /* chip-specific feature flags, for i2c_device_id.driver_data */
 #define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
 #define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
+#define TWL5031			BIT(2)  /* twl5031 has different registers */
 
 /*----------------------------------------------------------------------*/
 
@@ -241,6 +246,8 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
 	{ 2, TWL4030_BASEADD_PWM1 },
 	{ 2, TWL4030_BASEADD_PWMA },
 	{ 2, TWL4030_BASEADD_PWMB },
+	{ 2, TWL5031_BASEADD_ACCESSORY },
+	{ 2, TWL5031_BASEADD_INTERRUPTS },
 
 	{ 3, TWL4030_BASEADD_BACKUP },
 	{ 3, TWL4030_BASEADD_INT },
@@ -488,7 +495,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 {
 	struct device	*child;
 
-	if (twl_has_bci() && pdata->bci && !(features & TPS_SUBSET)) {
+	if (twl_has_bci() && pdata->bci &&
+	    !(features & (TPS_SUBSET | TWL5031))) {
 		child = add_child(3, "twl4030_bci",
 				pdata->bci, sizeof(*pdata->bci),
 				false,
@@ -760,6 +768,7 @@ static void clocks_init(struct device *dev,
 
 int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
 int twl_exit_irq(void);
+int twl_init_chip_irq(const char *chip);
 
 static int twl4030_remove(struct i2c_client *client)
 {
@@ -835,6 +844,7 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (client->irq
 			&& pdata->irq_base
 			&& pdata->irq_end > pdata->irq_base) {
+		twl_init_chip_irq(id->name);
 		status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
 		if (status < 0)
 			goto fail;
@@ -850,6 +860,7 @@ fail:
 static const struct i2c_device_id twl4030_ids[] = {
 	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
 	{ "twl5030", 0 },		/* T2 updated */
+	{ "twl5031", TWL5031 },		/* TWL5030 updated */
 	{ "tps65950", 0 },		/* catalog version of twl5030 */
 	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
 	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index fb194fe244c1..0b733028828f 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -74,6 +74,8 @@ struct sih {
 	u8	edr_offset;
 	u8	bytes_edr;		/* bytelen of EDR */
 
+	u8	irq_lines;		/* number of supported irq lines */
+
 	/* SIR ignored -- set interrupt, for testing only */
 	struct irq_data {
 		u8	isr_offset;
@@ -82,6 +84,9 @@ struct sih {
 	/* + 2 bytes padding */
 };
 
+static const struct sih *sih_modules;
+static int nr_sih_modules;
+
 #define SIH_INITIALIZER(modname, nbits) \
 	.module		= TWL4030_MODULE_ ## modname, \
 	.control_offset = TWL4030_ ## modname ## _SIH_CTRL, \
@@ -89,6 +94,7 @@ struct sih {
 	.bytes_ixr	= DIV_ROUND_UP(nbits, 8), \
 	.edr_offset	= TWL4030_ ## modname ## _EDR, \
 	.bytes_edr	= DIV_ROUND_UP((2*(nbits)), 8), \
+	.irq_lines	= 2, \
 	.mask = { { \
 		.isr_offset	= TWL4030_ ## modname ## _ISR1, \
 		.imr_offset	= TWL4030_ ## modname ## _IMR1, \
@@ -107,7 +113,8 @@ struct sih {
 /* Order in this table matches order in PIH_ISR.  That is,
  * BIT(n) in PIH_ISR is sih_modules[n].
  */
-static const struct sih sih_modules[6] = {
+/* sih_modules_twl4030 is used both in twl4030 and twl5030 */
+static const struct sih sih_modules_twl4030[6] = {
 	[0] = {
 		.name		= "gpio",
 		.module		= TWL4030_MODULE_GPIO,
@@ -118,6 +125,7 @@ static const struct sih sih_modules[6] = {
 		/* Note: *all* of these IRQs default to no-trigger */
 		.edr_offset	= REG_GPIO_EDR1,
 		.bytes_edr	= 5,
+		.irq_lines	= 2,
 		.mask = { {
 			.isr_offset	= REG_GPIO_ISR1A,
 			.imr_offset	= REG_GPIO_IMR1A,
@@ -140,6 +148,7 @@ static const struct sih sih_modules[6] = {
 		.edr_offset	= TWL4030_INTERRUPTS_BCIEDR1,
 		/* Note: most of these IRQs default to no-trigger */
 		.bytes_edr	= 3,
+		.irq_lines	= 2,
 		.mask = { {
 			.isr_offset	= TWL4030_INTERRUPTS_BCIISR1A,
 			.imr_offset	= TWL4030_INTERRUPTS_BCIIMR1A,
@@ -164,6 +173,99 @@ static const struct sih sih_modules[6] = {
 		/* there are no SIH modules #6 or #7 ... */
 };
 
+static const struct sih sih_modules_twl5031[8] = {
+	[0] = {
+		.name		= "gpio",
+		.module		= TWL4030_MODULE_GPIO,
+		.control_offset	= REG_GPIO_SIH_CTRL,
+		.set_cor	= true,
+		.bits		= TWL4030_GPIO_MAX,
+		.bytes_ixr	= 3,
+		/* Note: *all* of these IRQs default to no-trigger */
+		.edr_offset	= REG_GPIO_EDR1,
+		.bytes_edr	= 5,
+		.irq_lines	= 2,
+		.mask = { {
+			.isr_offset	= REG_GPIO_ISR1A,
+			.imr_offset	= REG_GPIO_IMR1A,
+		}, {
+			.isr_offset	= REG_GPIO_ISR1B,
+			.imr_offset	= REG_GPIO_IMR1B,
+		}, },
+	},
+	[1] = {
+		.name		= "keypad",
+		.set_cor	= true,
+		SIH_INITIALIZER(KEYPAD_KEYP, 4)
+	},
+	[2] = {
+		.name		= "bci",
+		.module		= TWL5031_MODULE_INTERRUPTS,
+		.control_offset	= TWL5031_INTERRUPTS_BCISIHCTRL,
+		.bits		= 7,
+		.bytes_ixr	= 1,
+		.edr_offset	= TWL5031_INTERRUPTS_BCIEDR1,
+		/* Note: most of these IRQs default to no-trigger */
+		.bytes_edr	= 2,
+		.irq_lines	= 2,
+		.mask = { {
+			.isr_offset	= TWL5031_INTERRUPTS_BCIISR1,
+			.imr_offset	= TWL5031_INTERRUPTS_BCIIMR1,
+		}, {
+			.isr_offset	= TWL5031_INTERRUPTS_BCIISR2,
+			.imr_offset	= TWL5031_INTERRUPTS_BCIIMR2,
+		}, },
+	},
+	[3] = {
+		.name		= "madc",
+		SIH_INITIALIZER(MADC, 4)
+	},
+	[4] = {
+		/* USB doesn't use the same SIH organization */
+		.name		= "usb",
+	},
+	[5] = {
+		.name		= "power",
+		.set_cor	= true,
+		SIH_INITIALIZER(INT_PWR, 8)
+	},
+	[6] = {
+		/*
+		 * ACI doesn't use the same SIH organization.
+		 * For example, it supports only one interrupt line
+		 */
+		.name		= "aci",
+		.module		= TWL5031_MODULE_ACCESSORY,
+		.bits		= 9,
+		.bytes_ixr	= 2,
+		.irq_lines	= 1,
+		.mask = { {
+			.isr_offset	= TWL5031_ACIIDR_LSB,
+			.imr_offset	= TWL5031_ACIIMR_LSB,
+		}, },
+
+	},
+	[7] = {
+		/* Accessory */
+		.name		= "acc",
+		.module		= TWL5031_MODULE_ACCESSORY,
+		.control_offset	= TWL5031_ACCSIHCTRL,
+		.bits		= 2,
+		.bytes_ixr	= 1,
+		.edr_offset	= TWL5031_ACCEDR1,
+		/* Note: most of these IRQs default to no-trigger */
+		.bytes_edr	= 1,
+		.irq_lines	= 2,
+		.mask = { {
+			.isr_offset	= TWL5031_ACCISR1,
+			.imr_offset	= TWL5031_ACCIMR1,
+		}, {
+			.isr_offset	= TWL5031_ACCISR2,
+			.imr_offset	= TWL5031_ACCIMR2,
+		}, },
+	},
+};
+
 #undef TWL4030_MODULE_KEYPAD_KEYP
 #undef TWL4030_MODULE_INT_PWR
 #undef TWL4030_INT_PWR_EDR
@@ -284,12 +386,16 @@ static int twl4030_init_sih_modules(unsigned line)
 	/* disable all interrupts on our line */
 	memset(buf, 0xff, sizeof buf);
 	sih = sih_modules;
-	for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) {
+	for (i = 0; i < nr_sih_modules; i++, sih++) {
 
 		/* skip USB -- it's funky */
 		if (!sih->bytes_ixr)
 			continue;
 
+		/* Not all the SIH modules support multiple interrupt lines */
+		if (sih->irq_lines <= line)
+			continue;
+
 		status = twl4030_i2c_write(sih->module, buf,
 				sih->mask[line].imr_offset, sih->bytes_ixr);
 		if (status < 0)
@@ -314,7 +420,7 @@ static int twl4030_init_sih_modules(unsigned line)
 	}
 
 	sih = sih_modules;
-	for (i = 0; i < ARRAY_SIZE(sih_modules); i++, sih++) {
+	for (i = 0; i < nr_sih_modules; i++, sih++) {
 		u8 rxbuf[4];
 		int j;
 
@@ -322,6 +428,10 @@ static int twl4030_init_sih_modules(unsigned line)
 		if (!sih->bytes_ixr)
 			continue;
 
+		/* Not all the SIH modules support multiple interrupt lines */
+		if (sih->irq_lines <= line)
+			continue;
+
 		/* Clear pending interrupt status.  Either the read was
 		 * enough, or we need to write those bits.  Repeat, in
 		 * case an IRQ is pending (PENDDIS=0) ... that's not
@@ -611,7 +721,7 @@ int twl4030_sih_setup(int module)
 
 	/* only support modules with standard clear-on-read for now */
 	for (sih_mod = 0, sih = sih_modules;
-			sih_mod < ARRAY_SIZE(sih_modules);
+			sih_mod < nr_sih_modules;
 			sih_mod++, sih++) {
 		if (sih->module == module && sih->set_cor) {
 			if (!WARN((irq_base + sih->bits) > NR_IRQS,
@@ -756,3 +866,16 @@ int twl_exit_irq(void)
 	}
 	return 0;
 }
+
+int twl_init_chip_irq(const char *chip)
+{
+	if (!strcmp(chip, "twl5031")) {
+		sih_modules = sih_modules_twl5031;
+		nr_sih_modules = ARRAY_SIZE(sih_modules_twl5031);
+	} else {
+		sih_modules = sih_modules_twl4030;
+		nr_sih_modules = ARRAY_SIZE(sih_modules_twl4030);
+	}
+
+	return 0;
+}
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 0c84cfa059e9..efa62eb497b8 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -61,13 +61,16 @@
 #define TWL4030_MODULE_PWMA		0x0E
 #define TWL4030_MODULE_PWMB		0x0F
 
+#define TWL5031_MODULE_ACCESSORY	0x10
+#define TWL5031_MODULE_INTERRUPTS	0x11
+
 /* Slave 3 (i2c address 0x4b) */
-#define TWL4030_MODULE_BACKUP		0x10
-#define TWL4030_MODULE_INT		0x11
-#define TWL4030_MODULE_PM_MASTER	0x12
-#define TWL4030_MODULE_PM_RECEIVER	0x13
-#define TWL4030_MODULE_RTC		0x14
-#define TWL4030_MODULE_SECURED_REG	0x15
+#define TWL4030_MODULE_BACKUP		0x12
+#define TWL4030_MODULE_INT		0x13
+#define TWL4030_MODULE_PM_MASTER	0x14
+#define TWL4030_MODULE_PM_RECEIVER	0x15
+#define TWL4030_MODULE_RTC		0x16
+#define TWL4030_MODULE_SECURED_REG	0x17
 
 /*
  * Read and write single 8-bit registers
@@ -221,6 +224,38 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /*----------------------------------------------------------------------*/
 
+/*
+ * Accessory Interrupts
+ */
+#define TWL5031_ACIIMR_LSB		0x05
+#define TWL5031_ACIIMR_MSB		0x06
+#define TWL5031_ACIIDR_LSB		0x07
+#define TWL5031_ACIIDR_MSB		0x08
+#define TWL5031_ACCISR1			0x0F
+#define TWL5031_ACCIMR1			0x10
+#define TWL5031_ACCISR2			0x11
+#define TWL5031_ACCIMR2			0x12
+#define TWL5031_ACCSIR			0x13
+#define TWL5031_ACCEDR1			0x14
+#define TWL5031_ACCSIHCTRL		0x15
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Battery Charger Controller
+ */
+
+#define TWL5031_INTERRUPTS_BCIISR1	0x0
+#define TWL5031_INTERRUPTS_BCIIMR1	0x1
+#define TWL5031_INTERRUPTS_BCIISR2	0x2
+#define TWL5031_INTERRUPTS_BCIIMR2	0x3
+#define TWL5031_INTERRUPTS_BCISIR	0x4
+#define TWL5031_INTERRUPTS_BCIEDR1	0x5
+#define TWL5031_INTERRUPTS_BCIEDR2	0x6
+#define TWL5031_INTERRUPTS_BCISIHCTRL	0x7
+
+/*----------------------------------------------------------------------*/
+
 /* Power bus message definitions */
 
 /* The TWL4030/5030 splits its power-management resources (the various
-- 
cgit v1.2.3


From 5fb4d38b19d95a5f980f0a10adba798f5b92128c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 11 Nov 2009 16:10:22 +0000
Subject: mfd: Move WM831x to generic IRQ

Replace the wm831x-local IRQ infrastructure with genirq, allowing access
to the diagnostic infrastructure of genirq and allowing us to implement
interrupt support for the GPIOs.  The switchover is done within the
wm831x specific IRQ API, further patches will convert the individual
drivers to use genirq directly.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        |   9 +-
 drivers/mfd/wm831x-irq.c         | 209 ++++++++++++++++-----------------------
 include/linux/mfd/wm831x/core.h  |  40 +++++---
 include/linux/mfd/wm831x/pdata.h |   1 +
 4 files changed, 122 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 163029f06185..223a90c7492f 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1504,19 +1504,19 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8310:
 		ret = mfd_add_devices(wm831x->dev, -1,
 				      wm8310_devs, ARRAY_SIZE(wm8310_devs),
-				      NULL, 0);
+				      NULL, wm831x->irq_base);
 		break;
 
 	case WM8311:
 		ret = mfd_add_devices(wm831x->dev, -1,
 				      wm8311_devs, ARRAY_SIZE(wm8311_devs),
-				      NULL, 0);
+				      NULL, wm831x->irq_base);
 		break;
 
 	case WM8312:
 		ret = mfd_add_devices(wm831x->dev, -1,
 				      wm8312_devs, ARRAY_SIZE(wm8312_devs),
-				      NULL, 0);
+				      NULL, wm831x->irq_base);
 		break;
 
 	case WM8320:
@@ -1538,7 +1538,8 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	if (pdata && pdata->backlight) {
 		/* Treat errors as non-critical */
 		ret = mfd_add_devices(wm831x->dev, -1, backlight_devs,
-				      ARRAY_SIZE(backlight_devs), NULL, 0);
+				      ARRAY_SIZE(backlight_devs), NULL,
+				      wm831x->irq_base);
 		if (ret < 0)
 			dev_err(wm831x->dev, "Failed to add backlight: %d\n",
 				ret);
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index ac056ea6b66e..301327697117 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/interrupt.h>
 
@@ -339,110 +340,71 @@ static inline int irq_data_to_mask_reg(struct wm831x_irq_data *irq_data)
 	return WM831X_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg;
 }
 
-static void __wm831x_enable_irq(struct wm831x *wm831x, int irq)
+static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x,
+							int irq)
 {
-	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
-
-	wm831x->irq_masks[irq_data->reg - 1] &= ~irq_data->mask;
-	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
-			 wm831x->irq_masks[irq_data->reg - 1]);
+	return &wm831x_irqs[irq - wm831x->irq_base];
 }
 
-void wm831x_enable_irq(struct wm831x *wm831x, int irq)
+static void wm831x_irq_lock(unsigned int irq)
 {
-	mutex_lock(&wm831x->irq_lock);
-	__wm831x_enable_irq(wm831x, irq);
-	mutex_unlock(&wm831x->irq_lock);
-}
-EXPORT_SYMBOL_GPL(wm831x_enable_irq);
+	struct wm831x *wm831x = get_irq_chip_data(irq);
 
-static void __wm831x_disable_irq(struct wm831x *wm831x, int irq)
-{
-	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
-
-	wm831x->irq_masks[irq_data->reg - 1] |= irq_data->mask;
-	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
-			 wm831x->irq_masks[irq_data->reg - 1]);
-}
-
-void wm831x_disable_irq(struct wm831x *wm831x, int irq)
-{
 	mutex_lock(&wm831x->irq_lock);
-	__wm831x_disable_irq(wm831x, irq);
-	mutex_unlock(&wm831x->irq_lock);
 }
-EXPORT_SYMBOL_GPL(wm831x_disable_irq);
 
-int wm831x_request_irq(struct wm831x *wm831x,
-		       unsigned int irq, irq_handler_t handler,
-		       unsigned long flags, const char *name,
-		       void *dev)
+static void wm831x_irq_sync_unlock(unsigned int irq)
 {
-	int ret = 0;
-
-	if (irq < 0 || irq >= WM831X_NUM_IRQS)
-		return -EINVAL;
-
-	mutex_lock(&wm831x->irq_lock);
-
-	if (wm831x_irqs[irq].handler) {
-		dev_err(wm831x->dev, "Already have handler for IRQ %d\n", irq);
-		ret = -EINVAL;
-		goto out;
+	struct wm831x *wm831x = get_irq_chip_data(irq);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x->irq_masks_cur); i++) {
+		/* If there's been a change in the mask write it back
+		 * to the hardware. */
+		if (wm831x->irq_masks_cur[i] != wm831x->irq_masks_cache[i]) {
+			wm831x->irq_masks_cache[i] = wm831x->irq_masks_cur[i];
+			wm831x_reg_write(wm831x,
+					 WM831X_INTERRUPT_STATUS_1_MASK + i,
+					 wm831x->irq_masks_cur[i]);
+		}
 	}
 
-	wm831x_irqs[irq].handler = handler;
-	wm831x_irqs[irq].handler_data = dev;
-
-	__wm831x_enable_irq(wm831x, irq);
-
-out:
 	mutex_unlock(&wm831x->irq_lock);
-
-	return ret;
 }
-EXPORT_SYMBOL_GPL(wm831x_request_irq);
 
-void wm831x_free_irq(struct wm831x *wm831x, unsigned int irq, void *data)
+static void wm831x_irq_unmask(unsigned int irq)
 {
-	if (irq < 0 || irq >= WM831X_NUM_IRQS)
-		return;
-
-	mutex_lock(&wm831x->irq_lock);
+	struct wm831x *wm831x = get_irq_chip_data(irq);
+	struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x, irq);
 
-	wm831x_irqs[irq].handler = NULL;
-	wm831x_irqs[irq].handler_data = NULL;
-
-	__wm831x_disable_irq(wm831x, irq);
-
-	mutex_unlock(&wm831x->irq_lock);
+	wm831x->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
 }
-EXPORT_SYMBOL_GPL(wm831x_free_irq);
 
-
-static void wm831x_handle_irq(struct wm831x *wm831x, int irq, int status)
+static void wm831x_irq_mask(unsigned int irq)
 {
-	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
-
-	if (irq_data->handler) {
-		irq_data->handler(irq, irq_data->handler_data);
-		wm831x_reg_write(wm831x, irq_data_to_status_reg(irq_data),
-				 irq_data->mask);
-	} else {
-		dev_err(wm831x->dev, "Unhandled IRQ %d, masking\n", irq);
-		__wm831x_disable_irq(wm831x, irq);
-	}
+	struct wm831x *wm831x = get_irq_chip_data(irq);
+	struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x, irq);
+
+	wm831x->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
 }
 
-/* Main interrupt handling occurs in a workqueue since we need
- * interrupts enabled to interact with the chip. */
-static void wm831x_irq_worker(struct work_struct *work)
+static struct irq_chip wm831x_irq_chip = {
+	.name = "wm831x",
+	.bus_lock = wm831x_irq_lock,
+	.bus_sync_unlock = wm831x_irq_sync_unlock,
+	.mask = wm831x_irq_mask,
+	.unmask = wm831x_irq_unmask,
+};
+
+/* The processing of the primary interrupt occurs in a thread so that
+ * we can interact with the device over I2C or SPI. */
+static irqreturn_t wm831x_irq_thread(int irq, void *data)
 {
-	struct wm831x *wm831x = container_of(work, struct wm831x, irq_work);
+	struct wm831x *wm831x = data;
 	unsigned int i;
 	int primary;
-	int status_regs[5];
-	int read[5] = { 0 };
+	int status_regs[WM831X_NUM_IRQ_REGS] = { 0 };
+	int read[WM831X_NUM_IRQ_REGS] = { 0 };
 	int *status;
 
 	primary = wm831x_reg_read(wm831x, WM831X_SYSTEM_INTERRUPTS);
@@ -452,8 +414,6 @@ static void wm831x_irq_worker(struct work_struct *work)
 		goto out;
 	}
 
-	mutex_lock(&wm831x->irq_lock);
-
 	for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
 		int offset = wm831x_irqs[i].reg - 1;
 
@@ -471,41 +431,34 @@ static void wm831x_irq_worker(struct work_struct *work)
 				dev_err(wm831x->dev,
 					"Failed to read IRQ status: %d\n",
 					*status);
-				goto out_lock;
+				goto out;
 			}
 
-			/* Mask out the disabled IRQs */
-			*status &= ~wm831x->irq_masks[offset];
 			read[offset] = 1;
 		}
 
-		if (*status & wm831x_irqs[i].mask)
-			wm831x_handle_irq(wm831x, i, *status);
+		/* Report it if it isn't masked, or forget the status. */
+		if ((*status & ~wm831x->irq_masks_cur[offset])
+		    & wm831x_irqs[i].mask)
+			handle_nested_irq(wm831x->irq_base + i);
+		else
+			*status &= ~wm831x_irqs[i].mask;
 	}
 
-out_lock:
-	mutex_unlock(&wm831x->irq_lock);
 out:
-	enable_irq(wm831x->irq);
-}
-
-
-static irqreturn_t wm831x_cpu_irq(int irq, void *data)
-{
-	struct wm831x *wm831x = data;
-
-	/* Shut the interrupt to the CPU up and schedule the actual
-	 * handler; we can't check that the IRQ is asserted. */
-	disable_irq_nosync(irq);
-
-	queue_work(wm831x->irq_wq, &wm831x->irq_work);
+	for (i = 0; i < ARRAY_SIZE(status_regs); i++) {
+		if (status_regs[i])
+			wm831x_reg_write(wm831x, WM831X_INTERRUPT_STATUS_1 + i,
+					 status_regs[i]);
+	}
 
 	return IRQ_HANDLED;
 }
 
 int wm831x_irq_init(struct wm831x *wm831x, int irq)
 {
-	int i, ret;
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int i, cur_irq, ret;
 
 	mutex_init(&wm831x->irq_lock);
 
@@ -515,41 +468,53 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
 		return 0;
 	}
 
-
-	wm831x->irq_wq = create_singlethread_workqueue("wm831x-irq");
-	if (!wm831x->irq_wq) {
-		dev_err(wm831x->dev, "Failed to allocate IRQ worker\n");
-		return -ESRCH;
+	if (!pdata || !pdata->irq_base) {
+		dev_err(wm831x->dev,
+			"No interrupt base specified, no interrupts\n");
+		return 0;
 	}
 
 	wm831x->irq = irq;
-	INIT_WORK(&wm831x->irq_work, wm831x_irq_worker);
+	wm831x->irq_base = pdata->irq_base;
 
 	/* Mask the individual interrupt sources */
-	for (i = 0; i < ARRAY_SIZE(wm831x->irq_masks); i++) {
-		wm831x->irq_masks[i] = 0xffff;
+	for (i = 0; i < ARRAY_SIZE(wm831x->irq_masks_cur); i++) {
+		wm831x->irq_masks_cur[i] = 0xffff;
+		wm831x->irq_masks_cache[i] = 0xffff;
 		wm831x_reg_write(wm831x, WM831X_INTERRUPT_STATUS_1_MASK + i,
 				 0xffff);
 	}
 
-	/* Enable top level interrupts, we mask at secondary level */
-	wm831x_reg_write(wm831x, WM831X_SYSTEM_INTERRUPTS_MASK, 0);
+	/* Register them with genirq */
+	for (cur_irq = wm831x->irq_base;
+	     cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base;
+	     cur_irq++) {
+		set_irq_chip_data(cur_irq, wm831x);
+		set_irq_chip_and_handler(cur_irq, &wm831x_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		set_irq_noprobe(cur_irq);
+#endif
+	}
 
-	/* We're good to go.  We set IRQF_SHARED since there's a
-	 * chance the driver will interoperate with another driver but
-	 * the need to disable the IRQ while handing via I2C/SPI means
-	 * that this may break and performance will be impacted.  If
-	 * this does happen it's a hardware design issue and the only
-	 * other alternative would be polling.
-	 */
-	ret = request_irq(irq, wm831x_cpu_irq, IRQF_TRIGGER_LOW | IRQF_SHARED,
-			  "wm831x", wm831x);
+	ret = request_threaded_irq(irq, NULL, wm831x_irq_thread,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   "wm831x", wm831x);
 	if (ret != 0) {
 		dev_err(wm831x->dev, "Failed to request IRQ %d: %d\n",
 			irq, ret);
 		return ret;
 	}
 
+	/* Enable top level interrupts, we mask at secondary level */
+	wm831x_reg_write(wm831x, WM831X_SYSTEM_INTERRUPTS_MASK, 0);
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index d01d293a6b25..5184b79c700b 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -16,7 +16,6 @@
 #define __MFD_WM831X_CORE_H__
 
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
 
 /*
  * Register values.
@@ -236,6 +235,8 @@
 
 struct regulator_dev;
 
+#define WM831X_NUM_IRQ_REGS 5
+
 struct wm831x {
 	struct mutex io_lock;
 
@@ -249,10 +250,9 @@ struct wm831x {
 
 	int irq;  /* Our chip IRQ */
 	struct mutex irq_lock;
-	struct workqueue_struct *irq_wq;
-	struct work_struct irq_work;
 	unsigned int irq_base;
-	int irq_masks[5];
+	int irq_masks_cur[WM831X_NUM_IRQ_REGS];   /* Currently active value */
+	int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */
 
 	int num_gpio;
 
@@ -281,12 +281,30 @@ int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
 int wm831x_irq_init(struct wm831x *wm831x, int irq);
 void wm831x_irq_exit(struct wm831x *wm831x);
 
-int __must_check wm831x_request_irq(struct wm831x *wm831x,
-				    unsigned int irq, irq_handler_t handler,
-				    unsigned long flags, const char *name,
-				    void *dev);
-void wm831x_free_irq(struct wm831x *wm831x, unsigned int, void *);
-void wm831x_disable_irq(struct wm831x *wm831x, int irq);
-void wm831x_enable_irq(struct wm831x *wm831x, int irq);
+static inline int __must_check wm831x_request_irq(struct wm831x *wm831x,
+						  unsigned int irq,
+						  irq_handler_t handler,
+						  unsigned long flags,
+						  const char *name,
+						  void *dev)
+{
+	return request_threaded_irq(irq, NULL, handler, flags, name, dev);
+}
+
+static inline void wm831x_free_irq(struct wm831x *wm831x,
+				   unsigned int irq, void *dev)
+{
+	free_irq(irq, dev);
+}
+
+static inline void wm831x_disable_irq(struct wm831x *wm831x, int irq)
+{
+	disable_irq(irq);
+}
+
+static inline void wm831x_enable_irq(struct wm831x *wm831x, int irq)
+{
+	enable_irq(irq);
+}
 
 #endif
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 90d820260aad..415c228743d5 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -91,6 +91,7 @@ struct wm831x_pdata {
 	/** Called after subdevices are set up */
 	int (*post_init)(struct wm831x *wm831x);
 
+	int irq_base;
 	int gpio_base;
 	struct wm831x_backlight_pdata *backlight;
 	struct wm831x_backup_pdata *backup;
-- 
cgit v1.2.3


From 9e2726776d45b1e383625b3ce4f8b511456e09ad Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 30 Nov 2009 00:53:17 +0100
Subject: mfd: Near complete mc13783 rewrite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes several things while still providing the old API:

 - simplify and fix locking
 - better error handling
 - don't ack all irqs making it impossible to detect a reset of the
   rtc
 - use a timeout variant to wait for completion of ADC conversion
 - provide platform-data to regulator subdevice (This allows making
   struct mc13783 opaque for other drivers after the regulator driver is
   updated to use its platform_data.)
 - expose all interrupts
 - use threaded irq

After all users in mainline are converted to the new API, some things
(e.g. mc13783-private.h) can go away.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mc13783-core.c          | 755 ++++++++++++++++++++++++------------
 include/linux/mfd/mc13783-private.h | 208 +---------
 include/linux/mfd/mc13783.h         | 120 ++++--
 3 files changed, 617 insertions(+), 466 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
index e354d2912ef1..dc1add0c4949 100644
--- a/drivers/mfd/mc13783-core.c
+++ b/drivers/mfd/mc13783-core.c
@@ -1,286 +1,549 @@
 /*
- * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
- *
- * This code is in parts based on wm8350-core.c and pcf50633-core.c
- *
- * Initial development of this code was funded by
- * Phytec Messtechnik GmbH, http://www.phytec.de
+ * Copyright 2009 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * loosely based on an earlier driver that has
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
  */
-
-#include <linux/mfd/mc13783-private.h>
-#include <linux/platform_device.h>
-#include <linux/mfd/mc13783.h>
-#include <linux/completion.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/spi/spi.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
+#include <linux/spi/spi.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/mc13783-private.h>
+
+#define MC13783_IRQSTAT0	0
+#define MC13783_IRQSTAT0_ADCDONEI	(1 << 0)
+#define MC13783_IRQSTAT0_ADCBISDONEI	(1 << 1)
+#define MC13783_IRQSTAT0_TSI		(1 << 2)
+#define MC13783_IRQSTAT0_WHIGHI		(1 << 3)
+#define MC13783_IRQSTAT0_WLOWI		(1 << 4)
+#define MC13783_IRQSTAT0_CHGDETI	(1 << 6)
+#define MC13783_IRQSTAT0_CHGOVI		(1 << 7)
+#define MC13783_IRQSTAT0_CHGREVI	(1 << 8)
+#define MC13783_IRQSTAT0_CHGSHORTI	(1 << 9)
+#define MC13783_IRQSTAT0_CCCVI		(1 << 10)
+#define MC13783_IRQSTAT0_CHGCURRI	(1 << 11)
+#define MC13783_IRQSTAT0_BPONI		(1 << 12)
+#define MC13783_IRQSTAT0_LOBATLI	(1 << 13)
+#define MC13783_IRQSTAT0_LOBATHI	(1 << 14)
+#define MC13783_IRQSTAT0_UDPI		(1 << 15)
+#define MC13783_IRQSTAT0_USBI		(1 << 16)
+#define MC13783_IRQSTAT0_IDI		(1 << 19)
+#define MC13783_IRQSTAT0_SE1I		(1 << 21)
+#define MC13783_IRQSTAT0_CKDETI		(1 << 22)
+#define MC13783_IRQSTAT0_UDMI		(1 << 23)
+
+#define MC13783_IRQMASK0	1
+#define MC13783_IRQMASK0_ADCDONEM	MC13783_IRQSTAT0_ADCDONEI
+#define MC13783_IRQMASK0_ADCBISDONEM	MC13783_IRQSTAT0_ADCBISDONEI
+#define MC13783_IRQMASK0_TSM		MC13783_IRQSTAT0_TSI
+#define MC13783_IRQMASK0_WHIGHM		MC13783_IRQSTAT0_WHIGHI
+#define MC13783_IRQMASK0_WLOWM		MC13783_IRQSTAT0_WLOWI
+#define MC13783_IRQMASK0_CHGDETM	MC13783_IRQSTAT0_CHGDETI
+#define MC13783_IRQMASK0_CHGOVM		MC13783_IRQSTAT0_CHGOVI
+#define MC13783_IRQMASK0_CHGREVM	MC13783_IRQSTAT0_CHGREVI
+#define MC13783_IRQMASK0_CHGSHORTM	MC13783_IRQSTAT0_CHGSHORTI
+#define MC13783_IRQMASK0_CCCVM		MC13783_IRQSTAT0_CCCVI
+#define MC13783_IRQMASK0_CHGCURRM	MC13783_IRQSTAT0_CHGCURRI
+#define MC13783_IRQMASK0_BPONM		MC13783_IRQSTAT0_BPONI
+#define MC13783_IRQMASK0_LOBATLM	MC13783_IRQSTAT0_LOBATLI
+#define MC13783_IRQMASK0_LOBATHM	MC13783_IRQSTAT0_LOBATHI
+#define MC13783_IRQMASK0_UDPM		MC13783_IRQSTAT0_UDPI
+#define MC13783_IRQMASK0_USBM		MC13783_IRQSTAT0_USBI
+#define MC13783_IRQMASK0_IDM		MC13783_IRQSTAT0_IDI
+#define MC13783_IRQMASK0_SE1M		MC13783_IRQSTAT0_SE1I
+#define MC13783_IRQMASK0_CKDETM		MC13783_IRQSTAT0_CKDETI
+#define MC13783_IRQMASK0_UDMM		MC13783_IRQSTAT0_UDMI
+
+#define MC13783_IRQSTAT1	3
+#define MC13783_IRQSTAT1_1HZI		(1 << 0)
+#define MC13783_IRQSTAT1_TODAI		(1 << 1)
+#define MC13783_IRQSTAT1_ONOFD1I	(1 << 3)
+#define MC13783_IRQSTAT1_ONOFD2I	(1 << 4)
+#define MC13783_IRQSTAT1_ONOFD3I	(1 << 5)
+#define MC13783_IRQSTAT1_SYSRSTI	(1 << 6)
+#define MC13783_IRQSTAT1_RTCRSTI	(1 << 7)
+#define MC13783_IRQSTAT1_PCI		(1 << 8)
+#define MC13783_IRQSTAT1_WARMI		(1 << 9)
+#define MC13783_IRQSTAT1_MEMHLDI	(1 << 10)
+#define MC13783_IRQSTAT1_PWRRDYI	(1 << 11)
+#define MC13783_IRQSTAT1_THWARNLI	(1 << 12)
+#define MC13783_IRQSTAT1_THWARNHI	(1 << 13)
+#define MC13783_IRQSTAT1_CLKI		(1 << 14)
+#define MC13783_IRQSTAT1_SEMAFI		(1 << 15)
+#define MC13783_IRQSTAT1_MC2BI		(1 << 17)
+#define MC13783_IRQSTAT1_HSDETI		(1 << 18)
+#define MC13783_IRQSTAT1_HSLI		(1 << 19)
+#define MC13783_IRQSTAT1_ALSPTHI	(1 << 20)
+#define MC13783_IRQSTAT1_AHSSHORTI	(1 << 21)
+
+#define MC13783_IRQMASK1	4
+#define MC13783_IRQMASK1_1HZM		MC13783_IRQSTAT1_1HZI
+#define MC13783_IRQMASK1_TODAM		MC13783_IRQSTAT1_TODAI
+#define MC13783_IRQMASK1_ONOFD1M	MC13783_IRQSTAT1_ONOFD1I
+#define MC13783_IRQMASK1_ONOFD2M	MC13783_IRQSTAT1_ONOFD2I
+#define MC13783_IRQMASK1_ONOFD3M	MC13783_IRQSTAT1_ONOFD3I
+#define MC13783_IRQMASK1_SYSRSTM	MC13783_IRQSTAT1_SYSRSTI
+#define MC13783_IRQMASK1_RTCRSTM	MC13783_IRQSTAT1_RTCRSTI
+#define MC13783_IRQMASK1_PCM		MC13783_IRQSTAT1_PCI
+#define MC13783_IRQMASK1_WARMM		MC13783_IRQSTAT1_WARMI
+#define MC13783_IRQMASK1_MEMHLDM	MC13783_IRQSTAT1_MEMHLDI
+#define MC13783_IRQMASK1_PWRRDYM	MC13783_IRQSTAT1_PWRRDYI
+#define MC13783_IRQMASK1_THWARNLM	MC13783_IRQSTAT1_THWARNLI
+#define MC13783_IRQMASK1_THWARNHM	MC13783_IRQSTAT1_THWARNHI
+#define MC13783_IRQMASK1_CLKM		MC13783_IRQSTAT1_CLKI
+#define MC13783_IRQMASK1_SEMAFM		MC13783_IRQSTAT1_SEMAFI
+#define MC13783_IRQMASK1_MC2BM		MC13783_IRQSTAT1_MC2BI
+#define MC13783_IRQMASK1_HSDETM		MC13783_IRQSTAT1_HSDETI
+#define MC13783_IRQMASK1_HSLM		MC13783_IRQSTAT1_HSLI
+#define MC13783_IRQMASK1_ALSPTHM	MC13783_IRQSTAT1_ALSPTHI
+#define MC13783_IRQMASK1_AHSSHORTM	MC13783_IRQSTAT1_AHSSHORTI
+
+#define MC13783_ADC1		44
+#define MC13783_ADC1_ADEN		(1 << 0)
+#define MC13783_ADC1_RAND		(1 << 1)
+#define MC13783_ADC1_ADSEL		(1 << 3)
+#define MC13783_ADC1_ASC		(1 << 20)
+#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
+
+#define MC13783_NUMREGS 0x3f
+
+void mc13783_lock(struct mc13783 *mc13783)
+{
+	if (!mutex_trylock(&mc13783->lock)) {
+		dev_dbg(&mc13783->spidev->dev, "wait for %s from %pf\n",
+				__func__, __builtin_return_address(0));
+
+		mutex_lock(&mc13783->lock);
+	}
+	dev_dbg(&mc13783->spidev->dev, "%s from %pf\n",
+			__func__, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(mc13783_lock);
 
-#define MC13783_MAX_REG_NUM	0x3f
-#define MC13783_FRAME_MASK	0x00ffffff
-#define MC13783_MAX_REG_NUM	0x3f
-#define MC13783_REG_NUM_SHIFT	0x19
-#define MC13783_WRITE_BIT_SHIFT	31
+void mc13783_unlock(struct mc13783 *mc13783)
+{
+	dev_dbg(&mc13783->spidev->dev, "%s from %pf\n",
+			__func__, __builtin_return_address(0));
+	mutex_unlock(&mc13783->lock);
+}
+EXPORT_SYMBOL(mc13783_unlock);
 
-static inline int spi_rw(struct spi_device *spi, u8 * buf, size_t len)
+#define MC13783_REGOFFSET_SHIFT 25
+int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val)
 {
-	struct spi_transfer t = {
-		.tx_buf = (const void *)buf,
-		.rx_buf = buf,
-		.len = len,
-		.cs_change = 0,
-		.delay_usecs = 0,
-	};
+	struct spi_transfer t;
 	struct spi_message m;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&mc13783->lock));
+
+	if (offset > MC13783_NUMREGS)
+		return -EINVAL;
+
+	*val = offset << MC13783_REGOFFSET_SHIFT;
+
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = val;
+	t.rx_buf = val;
+	t.len = sizeof(u32);
 
 	spi_message_init(&m);
 	spi_message_add_tail(&t, &m);
-	if (spi_sync(spi, &m) != 0 || m.status != 0)
-		return -EINVAL;
-	return len - m.actual_length;
-}
 
-static int mc13783_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
-{
-	unsigned int frame = 0;
-	int ret = 0;
+	ret = spi_sync(mc13783->spidev, &m);
 
-	if (reg_num > MC13783_MAX_REG_NUM)
-		return -EINVAL;
+	/* error in message.status implies error return from spi_sync */
+	BUG_ON(!ret && m.status);
 
-	frame |= reg_num << MC13783_REG_NUM_SHIFT;
+	if (ret)
+		return ret;
 
-	ret = spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+	*val &= 0xffffff;
 
-	*reg_val = frame & MC13783_FRAME_MASK;
+	dev_vdbg(&mc13783->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val);
 
-	return ret;
+	return 0;
 }
+EXPORT_SYMBOL(mc13783_reg_read);
 
-static int mc13783_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val)
 {
-	unsigned int frame = 0;
+	u32 buf;
+	struct spi_transfer t;
+	struct spi_message m;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&mc13783->lock));
 
-	if (reg_num > MC13783_MAX_REG_NUM)
+	dev_vdbg(&mc13783->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val);
+
+	if (offset > MC13783_NUMREGS || val > 0xffffff)
 		return -EINVAL;
 
-	frame |= (1 << MC13783_WRITE_BIT_SHIFT);
-	frame |= reg_num << MC13783_REG_NUM_SHIFT;
-	frame |= reg_val & MC13783_FRAME_MASK;
+	buf = 1 << 31 | offset << MC13783_REGOFFSET_SHIFT | val;
+
+	memset(&t, 0, sizeof(t));
 
-	return spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+	t.tx_buf = &buf;
+	t.rx_buf = &buf;
+	t.len = sizeof(u32);
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t, &m);
+
+	ret = spi_sync(mc13783->spidev, &m);
+
+	BUG_ON(!ret && m.status);
+
+	if (ret)
+		return ret;
+
+	return 0;
 }
+EXPORT_SYMBOL(mc13783_reg_write);
 
-int mc13783_reg_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
+int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
+		u32 mask, u32 val)
 {
 	int ret;
+	u32 valread;
 
-	mutex_lock(&mc13783->io_lock);
-	ret = mc13783_read(mc13783, reg_num, reg_val);
-	mutex_unlock(&mc13783->io_lock);
+	BUG_ON(val & ~mask);
 
-	return ret;
+	ret = mc13783_reg_read(mc13783, offset, &valread);
+	if (ret)
+		return ret;
+
+	valread = (valread & ~mask) | val;
+
+	return mc13783_reg_write(mc13783, offset, valread);
 }
-EXPORT_SYMBOL_GPL(mc13783_reg_read);
+EXPORT_SYMBOL(mc13783_reg_rmw);
 
-int mc13783_reg_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+int mc13783_mask(struct mc13783 *mc13783, int irq)
 {
 	int ret;
+	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+	u32 mask;
 
-	mutex_lock(&mc13783->io_lock);
-	ret = mc13783_write(mc13783, reg_num, reg_val);
-	mutex_unlock(&mc13783->io_lock);
+	if (irq < 0 || irq >= MC13783_NUM_IRQ)
+		return -EINVAL;
 
-	return ret;
+	ret = mc13783_reg_read(mc13783, offmask, &mask);
+	if (ret)
+		return ret;
+
+	if (mask & irqbit)
+		/* already masked */
+		return 0;
+
+	return mc13783_reg_write(mc13783, offmask, mask | irqbit);
 }
-EXPORT_SYMBOL_GPL(mc13783_reg_write);
+EXPORT_SYMBOL(mc13783_mask);
 
-/**
- * mc13783_set_bits - Bitmask write
- *
- * @mc13783: Pointer to mc13783 control structure
- * @reg:    Register to access
- * @mask:   Mask of bits to change
- * @val:    Value to set for masked bits
- */
-int mc13783_set_bits(struct mc13783 *mc13783, int reg, u32 mask, u32 val)
+int mc13783_unmask(struct mc13783 *mc13783, int irq)
 {
-	u32 tmp;
 	int ret;
+	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+	u32 mask;
 
-	mutex_lock(&mc13783->io_lock);
+	if (irq < 0 || irq >= MC13783_NUM_IRQ)
+		return -EINVAL;
 
-	ret = mc13783_read(mc13783, reg, &tmp);
-	tmp = (tmp & ~mask) | val;
-	if (ret == 0)
-		ret = mc13783_write(mc13783, reg, tmp);
+	ret = mc13783_reg_read(mc13783, offmask, &mask);
+	if (ret)
+		return ret;
 
-	mutex_unlock(&mc13783->io_lock);
+	if (!(mask & irqbit))
+		/* already unmasked */
+		return 0;
 
-	return ret;
+	return mc13783_reg_write(mc13783, offmask, mask & ~irqbit);
 }
-EXPORT_SYMBOL_GPL(mc13783_set_bits);
+EXPORT_SYMBOL(mc13783_unmask);
 
-int mc13783_register_irq(struct mc13783 *mc13783, int irq,
-		void (*handler) (int, void *), void *data)
+int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev)
 {
-	if (irq < 0 || irq > MC13783_NUM_IRQ || !handler)
+	BUG_ON(!mutex_is_locked(&mc13783->lock));
+	BUG_ON(!handler);
+
+	if (irq < 0 || irq >= MC13783_NUM_IRQ)
 		return -EINVAL;
 
-	if (WARN_ON(mc13783->irq_handler[irq].handler))
+	if (mc13783->irqhandler[irq])
 		return -EBUSY;
 
-	mutex_lock(&mc13783->io_lock);
-	mc13783->irq_handler[irq].handler = handler;
-	mc13783->irq_handler[irq].data = data;
-	mutex_unlock(&mc13783->io_lock);
+	mc13783->irqhandler[irq] = handler;
+	mc13783->irqdata[irq] = dev;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mc13783_register_irq);
+EXPORT_SYMBOL(mc13783_irq_request_nounmask);
 
-int mc13783_free_irq(struct mc13783 *mc13783, int irq)
+int mc13783_irq_request(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev)
 {
-	if (irq < 0 || irq > MC13783_NUM_IRQ)
+	int ret;
+
+	ret = mc13783_irq_request_nounmask(mc13783, irq, handler, name, dev);
+	if (ret)
+		return ret;
+
+	ret = mc13783_unmask(mc13783, irq);
+	if (ret) {
+		mc13783->irqhandler[irq] = NULL;
+		mc13783->irqdata[irq] = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13783_irq_request);
+
+int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
+{
+	int ret;
+	BUG_ON(!mutex_is_locked(&mc13783->lock));
+
+	if (irq < 0 || irq >= MC13783_NUM_IRQ || !mc13783->irqhandler[irq] ||
+			mc13783->irqdata[irq] != dev)
 		return -EINVAL;
 
-	mutex_lock(&mc13783->io_lock);
-	mc13783->irq_handler[irq].handler = NULL;
-	mutex_unlock(&mc13783->io_lock);
+	ret = mc13783_mask(mc13783, irq);
+	if (ret)
+		return ret;
+
+	mc13783->irqhandler[irq] = NULL;
+	mc13783->irqdata[irq] = NULL;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mc13783_free_irq);
+EXPORT_SYMBOL(mc13783_irq_free);
 
-static void mc13783_irq_work(struct work_struct *work)
+static inline irqreturn_t mc13783_irqhandler(struct mc13783 *mc13783, int irq)
 {
-	struct mc13783 *mc13783 = container_of(work, struct mc13783, work);
-	int i;
-	unsigned int adc_sts;
-
-	/* check if the adc has finished any completion */
-	mc13783_reg_read(mc13783, MC13783_REG_INTERRUPT_STATUS_0, &adc_sts);
-	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0,
-			adc_sts & MC13783_INT_STAT_ADCDONEI);
-
-	if (adc_sts & MC13783_INT_STAT_ADCDONEI)
-		complete_all(&mc13783->adc_done);
-
-	for (i = 0; i < MC13783_NUM_IRQ; i++)
-		if (mc13783->irq_handler[i].handler)
-			mc13783->irq_handler[i].handler(i,
-					mc13783->irq_handler[i].data);
-	enable_irq(mc13783->irq);
+	return mc13783->irqhandler[irq](irq, mc13783->irqdata[irq]);
 }
 
-static irqreturn_t mc13783_interrupt(int irq, void *dev_id)
+int mc13783_ackirq(struct mc13783 *mc13783, int irq)
 {
-	struct mc13783 *mc13783 = dev_id;
+	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
+	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
 
-	disable_irq_nosync(irq);
+	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
 
-	schedule_work(&mc13783->work);
-	return IRQ_HANDLED;
+	return mc13783_reg_write(mc13783, offstat, val);
 }
+EXPORT_SYMBOL(mc13783_ackirq);
 
-/* set adc to ts interrupt mode, which generates touchscreen wakeup interrupt */
-static inline void mc13783_adc_set_ts_irq_mode(struct mc13783 *mc13783)
+/*
+ * returns: number of handled irqs or negative error
+ * locking: holds mc13783->lock
+ */
+static int mc13783_irq_handle(struct mc13783 *mc13783,
+		unsigned int offstat, unsigned int offmask, int baseirq)
 {
-	unsigned int reg_adc0, reg_adc1;
+	u32 stat, mask;
+	int ret = mc13783_reg_read(mc13783, offstat, &stat);
+	int num_handled = 0;
+
+	if (ret)
+		return ret;
+
+	ret = mc13783_reg_read(mc13783, offmask, &mask);
+	if (ret)
+		return ret;
+
+	while (stat & ~mask) {
+		int irq = __ffs(stat & ~mask);
+
+		stat &= ~(1 << irq);
+
+		if (likely(mc13783->irqhandler[baseirq + irq])) {
+			irqreturn_t handled;
 
-	reg_adc0 = MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
-			| MC13783_ADC0_TSMOD0;
-	reg_adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN;
+			handled = mc13783_irqhandler(mc13783, baseirq + irq);
+			if (handled == IRQ_HANDLED)
+				num_handled++;
+		} else {
+			dev_err(&mc13783->spidev->dev,
+					"BUG: irq %u but no handler\n",
+					baseirq + irq);
 
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+			mask |= 1 << irq;
+
+			ret = mc13783_reg_write(mc13783, offmask, mask);
+		}
+	}
+
+	return num_handled;
 }
 
+static irqreturn_t mc13783_irq_thread(int irq, void *data)
+{
+	struct mc13783 *mc13783 = data;
+	irqreturn_t ret;
+	int handled = 0;
+
+	mc13783_lock(mc13783);
+
+	ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT0,
+			MC13783_IRQMASK0, MC13783_IRQ_ADCDONE);
+	if (ret > 0)
+		handled = 1;
+
+	ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT1,
+			MC13783_IRQMASK1, MC13783_IRQ_1HZ);
+	if (ret > 0)
+		handled = 1;
+
+	mc13783_unlock(mc13783);
+
+	return IRQ_RETVAL(handled);
+}
+
+#define MC13783_ADC1_CHAN0_SHIFT	5
+#define MC13783_ADC1_CHAN1_SHIFT	8
+
+struct mc13783_adcdone_data {
+	struct mc13783 *mc13783;
+	struct completion done;
+};
+
+static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
+{
+	struct mc13783_adcdone_data *adcdone_data = data;
+
+	mc13783_ackirq(adcdone_data->mc13783, irq);
+
+	complete_all(&adcdone_data->done);
+
+	return IRQ_HANDLED;
+}
+
+#define MC13783_ADC_WORKING (1 << 16)
+
 int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 		unsigned int channel, unsigned int *sample)
 {
-	unsigned int reg_adc0, reg_adc1;
-	int i;
+	u32 adc0, adc1, old_adc0;
+	int i, ret;
+	struct mc13783_adcdone_data adcdone_data = {
+		.mc13783 = mc13783,
+	};
+	init_completion(&adcdone_data.done);
+
+	dev_dbg(&mc13783->spidev->dev, "%s\n", __func__);
+
+	mc13783_lock(mc13783);
+
+	if (mc13783->flags & MC13783_ADC_WORKING) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	mc13783->flags |= MC13783_ADC_WORKING;
 
-	mutex_lock(&mc13783->adc_conv_lock);
+	mc13783_reg_read(mc13783, MC13783_ADC0, &old_adc0);
 
-	/* set up auto incrementing anyway to make quick read */
-	reg_adc0 =  MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
-	/* enable the adc, ignore external triggering and set ASC to trigger
-	 * conversion */
-	reg_adc1 =  MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN
-		| MC13783_ADC1_ASC;
+	adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
+	adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC;
 
-	/* setup channel number */
 	if (channel > 7)
-		reg_adc1 |= MC13783_ADC1_ADSEL;
+		adc1 |= MC13783_ADC1_ADSEL;
 
 	switch (mode) {
 	case MC13783_ADC_MODE_TS:
-		/* enables touch screen reference mode and set touchscreen mode
-		 * to position mode */
-		reg_adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
+		adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
 			| MC13783_ADC0_TSMOD0 | MC13783_ADC0_TSMOD1;
-		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
 		break;
+
 	case MC13783_ADC_MODE_SINGLE_CHAN:
-		reg_adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
-		reg_adc1 |= MC13783_ADC1_RAND;
+		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
+		adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
+		adc1 |= MC13783_ADC1_RAND;
 		break;
+
 	case MC13783_ADC_MODE_MULT_CHAN:
-		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
+		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
 		break;
+
 	default:
+		mc13783_unlock(mc13783);
 		return -EINVAL;
 	}
 
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+	dev_dbg(&mc13783->spidev->dev, "%s: request irq\n", __func__);
+	mc13783_irq_request(mc13783, MC13783_IRQ_ADCDONE,
+			mc13783_handler_adcdone, __func__, &adcdone_data);
+	mc13783_ackirq(mc13783, MC13783_IRQ_ADCDONE);
 
-	wait_for_completion_interruptible(&mc13783->adc_done);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, adc0);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, adc1);
 
-	for (i = 0; i < 4; i++)
-		mc13783_reg_read(mc13783, MC13783_REG_ADC_2, &sample[i]);
+	mc13783_unlock(mc13783);
 
-	if (mc13783->ts_active)
-		mc13783_adc_set_ts_irq_mode(mc13783);
+	ret = wait_for_completion_interruptible_timeout(&adcdone_data.done, HZ);
 
-	mutex_unlock(&mc13783->adc_conv_lock);
+	if (!ret)
+		ret = -ETIMEDOUT;
 
-	return 0;
+	mc13783_lock(mc13783);
+
+	mc13783_irq_free(mc13783, MC13783_IRQ_ADCDONE, &adcdone_data);
+
+	if (ret > 0)
+		for (i = 0; i < 4; ++i) {
+			ret = mc13783_reg_read(mc13783,
+					MC13783_REG_ADC_2, &sample[i]);
+			if (ret)
+				break;
+		}
+
+	if (mode == MC13783_ADC_MODE_TS)
+		/* restore TSMOD */
+		mc13783_reg_write(mc13783, MC13783_REG_ADC_0, old_adc0);
+
+	mc13783->flags &= ~MC13783_ADC_WORKING;
+out:
+	mc13783_unlock(mc13783);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
 
-void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status)
+static int mc13783_add_subdevice_pdata(struct mc13783 *mc13783,
+		const char *name, void *pdata, size_t pdata_size)
 {
-	mc13783->ts_active = status;
+	struct mfd_cell cell = {
+		.name = name,
+		.platform_data = pdata,
+		.data_size = pdata_size,
+	};
+
+	return mfd_add_devices(&mc13783->spidev->dev, -1, &cell, 1, NULL, 0);
+}
+
+static int mc13783_add_subdevice(struct mc13783 *mc13783, const char *name)
+{
+	return mc13783_add_subdevice_pdata(mc13783, name, NULL, 0);
 }
-EXPORT_SYMBOL_GPL(mc13783_adc_set_ts_status);
 
 static int mc13783_check_revision(struct mc13783 *mc13783)
 {
 	u32 rev_id, rev1, rev2, finid, icid;
 
-	mc13783_read(mc13783, MC13783_REG_REVISION, &rev_id);
+	mc13783_reg_read(mc13783, MC13783_REG_REVISION, &rev_id);
 
 	rev1 = (rev_id & 0x018) >> 3;
 	rev2 = (rev_id & 0x007);
@@ -292,38 +555,24 @@ static int mc13783_check_revision(struct mc13783 *mc13783)
 		rev1 = 3;
 
 	if (rev1 == 0 || icid != 2) {
-		dev_err(mc13783->dev, "No MC13783 detected.\n");
+		dev_err(&mc13783->spidev->dev, "No MC13783 detected.\n");
 		return -ENODEV;
 	}
 
-	mc13783->revision = ((rev1 * 10) + rev2);
-	dev_info(mc13783->dev, "MC13783 Rev %d.%d FinVer %x detected\n", rev1,
-	       rev2, finid);
+	dev_info(&mc13783->spidev->dev,
+			"MC13783 Rev %d.%d FinVer %x detected\n",
+			rev1, rev2, finid);
 
 	return 0;
 }
 
-/*
- * Register a client device.  This is non-fatal since there is no need to
- * fail the entire device init due to a single platform device failing.
- */
-static void mc13783_client_dev_register(struct mc13783 *mc13783,
-				       const char *name)
-{
-	struct mfd_cell cell = {};
-
-	cell.name = name;
-
-	mfd_add_devices(mc13783->dev, -1, &cell, 1, NULL, 0);
-}
-
-static int __devinit mc13783_probe(struct spi_device *spi)
+static int mc13783_probe(struct spi_device *spi)
 {
 	struct mc13783 *mc13783;
-	struct mc13783_platform_data *pdata = spi->dev.platform_data;
+	struct mc13783_platform_data *pdata = dev_get_platdata(&spi->dev);
 	int ret;
 
-	mc13783 = kzalloc(sizeof(struct mc13783), GFP_KERNEL);
+	mc13783 = kzalloc(sizeof(*mc13783), GFP_KERNEL);
 	if (!mc13783)
 		return -ENOMEM;
 
@@ -332,96 +581,104 @@ static int __devinit mc13783_probe(struct spi_device *spi)
 	spi->bits_per_word = 32;
 	spi_setup(spi);
 
-	mc13783->spi_device = spi;
-	mc13783->dev = &spi->dev;
-	mc13783->irq = spi->irq;
+	mc13783->spidev = spi;
+
+	mutex_init(&mc13783->lock);
+	mc13783_lock(mc13783);
+
+	ret = mc13783_check_revision(mc13783);
+	if (ret)
+		goto err_revision;
+
+	/* mask all irqs */
+	ret = mc13783_reg_write(mc13783, MC13783_IRQMASK0, 0x00ffffff);
+	if (ret)
+		goto err_mask;
 
-	INIT_WORK(&mc13783->work, mc13783_irq_work);
-	mutex_init(&mc13783->io_lock);
-	mutex_init(&mc13783->adc_conv_lock);
-	init_completion(&mc13783->adc_done);
+	ret = mc13783_reg_write(mc13783, MC13783_IRQMASK1, 0x00ffffff);
+	if (ret)
+		goto err_mask;
+
+	ret = request_threaded_irq(spi->irq, NULL, mc13783_irq_thread,
+			IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13783", mc13783);
+
+	if (ret) {
+err_mask:
+err_revision:
+		mutex_unlock(&mc13783->lock);
+		dev_set_drvdata(&spi->dev, NULL);
+		kfree(mc13783);
+		return ret;
+	}
 
+	/* This should go away (BEGIN) */
 	if (pdata) {
 		mc13783->flags = pdata->flags;
 		mc13783->regulators = pdata->regulators;
 		mc13783->num_regulators = pdata->num_regulators;
 	}
+	/* This should go away (END) */
 
-	if (mc13783_check_revision(mc13783)) {
-		ret = -ENODEV;
-		goto err_out;
+	if (pdata->flags & MC13783_USE_ADC)
+		mc13783_add_subdevice(mc13783, "mc13783-adc");
+
+	if (pdata->flags & MC13783_USE_CODEC)
+		mc13783_add_subdevice(mc13783, "mc13783-codec");
+
+	if (pdata->flags & MC13783_USE_REGULATOR) {
+		struct mc13783_regulator_platform_data regulator_pdata = {
+			.num_regulators = pdata->num_regulators,
+			.regulators = pdata->regulators,
+		};
+
+		mc13783_add_subdevice_pdata(mc13783, "mc13783-regulator",
+				&regulator_pdata, sizeof(regulator_pdata));
 	}
 
-	/* clear and mask all interrupts */
-	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0, 0x00ffffff);
-	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_0, 0x00ffffff);
-	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_1, 0x00ffffff);
-	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_1, 0x00ffffff);
+	if (pdata->flags & MC13783_USE_RTC)
+		mc13783_add_subdevice(mc13783, "mc13783-rtc");
 
-	/* unmask adcdone interrupts */
-	mc13783_set_bits(mc13783, MC13783_REG_INTERRUPT_MASK_0,
-			MC13783_INT_MASK_ADCDONEM, 0);
+	if (pdata->flags & MC13783_USE_TOUCHSCREEN)
+		mc13783_add_subdevice(mc13783, "mc13783-ts");
 
-	ret = request_irq(mc13783->irq, mc13783_interrupt,
-			IRQF_DISABLED | IRQF_TRIGGER_HIGH, "mc13783",
-			mc13783);
-	if (ret)
-		goto err_out;
-
-	if (mc13783->flags & MC13783_USE_CODEC)
-		mc13783_client_dev_register(mc13783, "mc13783-codec");
-	if (mc13783->flags & MC13783_USE_ADC)
-		mc13783_client_dev_register(mc13783, "mc13783-adc");
-	if (mc13783->flags & MC13783_USE_RTC)
-		mc13783_client_dev_register(mc13783, "mc13783-rtc");
-	if (mc13783->flags & MC13783_USE_REGULATOR)
-		mc13783_client_dev_register(mc13783, "mc13783-regulator");
-	if (mc13783->flags & MC13783_USE_TOUCHSCREEN)
-		mc13783_client_dev_register(mc13783, "mc13783-ts");
+	mc13783_unlock(mc13783);
 
 	return 0;
-
-err_out:
-	kfree(mc13783);
-	return ret;
 }
 
 static int __devexit mc13783_remove(struct spi_device *spi)
 {
-	struct mc13783 *mc13783;
+	struct mc13783 *mc13783 = dev_get_drvdata(&spi->dev);
 
-	mc13783 = dev_get_drvdata(&spi->dev);
-
-	free_irq(mc13783->irq, mc13783);
+	free_irq(mc13783->spidev->irq, mc13783);
 
 	mfd_remove_devices(&spi->dev);
 
 	return 0;
 }
 
-static struct spi_driver pmic_driver = {
+static struct spi_driver mc13783_driver = {
 	.driver = {
-		   .name = "mc13783",
-		   .bus = &spi_bus_type,
-		   .owner = THIS_MODULE,
+		.name = "mc13783",
+		.bus = &spi_bus_type,
+		.owner = THIS_MODULE,
 	},
 	.probe = mc13783_probe,
 	.remove = __devexit_p(mc13783_remove),
 };
 
-static int __init pmic_init(void)
+static int __init mc13783_init(void)
 {
-	return spi_register_driver(&pmic_driver);
+	return spi_register_driver(&mc13783_driver);
 }
-subsys_initcall(pmic_init);
+subsys_initcall(mc13783_init);
 
-static void __exit pmic_exit(void)
+static void __exit mc13783_exit(void)
 {
-	spi_unregister_driver(&pmic_driver);
+	spi_unregister_driver(&mc13783_driver);
 }
-module_exit(pmic_exit);
-
-MODULE_DESCRIPTION("Core/Protocol driver for Freescale MC13783 PMIC");
-MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
-MODULE_LICENSE("GPL");
+module_exit(mc13783_exit);
 
+MODULE_DESCRIPTION("Core driver for Freescale MC13783 PMIC");
+MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/mc13783-private.h b/include/linux/mfd/mc13783-private.h
index 47e698cb0f16..95cf9360553f 100644
--- a/include/linux/mfd/mc13783-private.h
+++ b/include/linux/mfd/mc13783-private.h
@@ -24,52 +24,23 @@
 
 #include <linux/platform_device.h>
 #include <linux/mfd/mc13783.h>
-#include <linux/workqueue.h>
 #include <linux/mutex.h>
-
-struct mc13783_irq {
-	void (*handler)(int, void *);
-	void *data;
-};
-
-#define MC13783_NUM_IRQ		2
-#define MC13783_IRQ_TS		0
-#define MC13783_IRQ_REGULATOR	1
-
-#define MC13783_ADC_MODE_TS		1
-#define MC13783_ADC_MODE_SINGLE_CHAN	2
-#define MC13783_ADC_MODE_MULT_CHAN	3
+#include <linux/interrupt.h>
 
 struct mc13783 {
-	int revision;
-	struct device *dev;
-	struct spi_device *spi_device;
-
-	int (*read_dev)(void *data, char reg, int count, u32 *dst);
-	int (*write_dev)(void *data, char reg, int count, const u32 *src);
-
-	struct mutex io_lock;
-	void *io_data;
+	struct spi_device *spidev;
+	struct mutex lock;
 	int irq;
-	unsigned int flags;
+	int flags;
 
-	struct mc13783_irq irq_handler[MC13783_NUM_IRQ];
-	struct work_struct work;
-	struct completion adc_done;
-	unsigned int ts_active;
-	struct mutex adc_conv_lock;
+	irq_handler_t irqhandler[MC13783_NUM_IRQ];
+	void *irqdata[MC13783_NUM_IRQ];
 
+	/* XXX these should go as platformdata to the regulator subdevice */
 	struct mc13783_regulator_init_data *regulators;
 	int num_regulators;
 };
 
-int mc13783_reg_read(struct mc13783 *, int reg_num, u32 *);
-int mc13783_reg_write(struct mc13783 *, int, u32);
-int mc13783_set_bits(struct mc13783 *, int, u32, u32);
-int mc13783_free_irq(struct mc13783 *mc13783, int irq);
-int mc13783_register_irq(struct mc13783 *mc13783, int irq,
-		void (*handler) (int, void *), void *data);
-
 #define MC13783_REG_INTERRUPT_STATUS_0		 0
 #define MC13783_REG_INTERRUPT_MASK_0		 1
 #define MC13783_REG_INTERRUPT_SENSE_0		 2
@@ -136,55 +107,6 @@ int mc13783_register_irq(struct mc13783 *mc13783, int irq,
 #define MC13783_REG_TEST_3			63
 #define MC13783_REG_NB				64
 
-
-/*
- * Interrupt Status
- */
-#define MC13783_INT_STAT_ADCDONEI	(1 << 0)
-#define MC13783_INT_STAT_ADCBISDONEI	(1 << 1)
-#define MC13783_INT_STAT_TSI		(1 << 2)
-#define MC13783_INT_STAT_WHIGHI		(1 << 3)
-#define MC13783_INT_STAT_WLOWI		(1 << 4)
-#define MC13783_INT_STAT_CHGDETI	(1 << 6)
-#define MC13783_INT_STAT_CHGOVI		(1 << 7)
-#define MC13783_INT_STAT_CHGREVI	(1 << 8)
-#define MC13783_INT_STAT_CHGSHORTI	(1 << 9)
-#define MC13783_INT_STAT_CCCVI		(1 << 10)
-#define MC13783_INT_STAT_CHGCURRI	(1 << 11)
-#define MC13783_INT_STAT_BPONI		(1 << 12)
-#define MC13783_INT_STAT_LOBATLI	(1 << 13)
-#define MC13783_INT_STAT_LOBATHI	(1 << 14)
-#define MC13783_INT_STAT_UDPI		(1 << 15)
-#define MC13783_INT_STAT_USBI		(1 << 16)
-#define MC13783_INT_STAT_IDI		(1 << 19)
-#define MC13783_INT_STAT_Unused		(1 << 20)
-#define MC13783_INT_STAT_SE1I		(1 << 21)
-#define MC13783_INT_STAT_CKDETI		(1 << 22)
-#define MC13783_INT_STAT_UDMI		(1 << 23)
-
-/*
- * Interrupt Mask
- */
-#define MC13783_INT_MASK_ADCDONEM	(1 << 0)
-#define MC13783_INT_MASK_ADCBISDONEM	(1 << 1)
-#define MC13783_INT_MASK_TSM		(1 << 2)
-#define MC13783_INT_MASK_WHIGHM		(1 << 3)
-#define MC13783_INT_MASK_WLOWM		(1 << 4)
-#define MC13783_INT_MASK_CHGDETM	(1 << 6)
-#define MC13783_INT_MASK_CHGOVM		(1 << 7)
-#define MC13783_INT_MASK_CHGREVM	(1 << 8)
-#define MC13783_INT_MASK_CHGSHORTM	(1 << 9)
-#define MC13783_INT_MASK_CCCVM		(1 << 10)
-#define MC13783_INT_MASK_CHGCURRM	(1 << 11)
-#define MC13783_INT_MASK_BPONM		(1 << 12)
-#define MC13783_INT_MASK_LOBATLM	(1 << 13)
-#define MC13783_INT_MASK_LOBATHM	(1 << 14)
-#define MC13783_INT_MASK_UDPM		(1 << 15)
-#define MC13783_INT_MASK_USBM		(1 << 16)
-#define MC13783_INT_MASK_IDM		(1 << 19)
-#define MC13783_INT_MASK_SE1M		(1 << 21)
-#define MC13783_INT_MASK_CKDETM		(1 << 22)
-
 /*
  * Reg Regulator Mode 0
  */
@@ -284,113 +206,15 @@ int mc13783_register_irq(struct mc13783 *mc13783, int irq,
 #define MC13783_SWCTRL_SW3_STBY		(1 << 21)
 #define MC13783_SWCTRL_SW3_MODE		(1 << 22)
 
-/*
- * ADC/Touch
- */
-#define MC13783_ADC0_LICELLCON		(1 << 0)
-#define MC13783_ADC0_CHRGICON		(1 << 1)
-#define MC13783_ADC0_BATICON		(1 << 2)
-#define MC13783_ADC0_RTHEN 		(1 << 3)
-#define MC13783_ADC0_DTHEN		(1 << 4)
-#define MC13783_ADC0_UIDEN		(1 << 5)
-#define MC13783_ADC0_ADOUTEN 		(1 << 6)
-#define MC13783_ADC0_ADOUTPER		(1 << 7)
-#define MC13783_ADC0_ADREFEN		(1 << 10)
-#define MC13783_ADC0_ADREFMODE		(1 << 11)
-#define MC13783_ADC0_TSMOD0		(1 << 12)
-#define MC13783_ADC0_TSMOD1		(1 << 13)
-#define MC13783_ADC0_TSMOD2		(1 << 14)
-#define MC13783_ADC0_CHRGRAWDIV		(1 << 15)
-#define MC13783_ADC0_ADINC1		(1 << 16)
-#define MC13783_ADC0_ADINC2		(1 << 17)
-#define MC13783_ADC0_WCOMP		(1 << 18)
-#define MC13783_ADC0_ADCBIS0		(1 << 23)
-
-#define MC13783_ADC1_ADEN		(1 << 0)
-#define MC13783_ADC1_RAND		(1 << 1)
-#define MC13783_ADC1_ADSEL		(1 << 3)
-#define MC13783_ADC1_TRIGMASK		(1 << 4)
-#define MC13783_ADC1_ADA10		(1 << 5)
-#define MC13783_ADC1_ADA11		(1 << 6)
-#define MC13783_ADC1_ADA12		(1 << 7)
-#define MC13783_ADC1_ADA20		(1 << 8)
-#define MC13783_ADC1_ADA21		(1 << 9)
-#define MC13783_ADC1_ADA22		(1 << 10)
-#define MC13783_ADC1_ATO0		(1 << 11)
-#define MC13783_ADC1_ATO1		(1 << 12)
-#define MC13783_ADC1_ATO2		(1 << 13)
-#define MC13783_ADC1_ATO3		(1 << 14)
-#define MC13783_ADC1_ATO4		(1 << 15)
-#define MC13783_ADC1_ATO5		(1 << 16)
-#define MC13783_ADC1_ATO6		(1 << 17)
-#define MC13783_ADC1_ATO7		(1 << 18)
-#define MC13783_ADC1_ATOX		(1 << 19)
-#define MC13783_ADC1_ASC		(1 << 20)
-#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
-#define MC13783_ADC1_ADONESHOT		(1 << 22)
-#define MC13783_ADC1_ADCBIS1		(1 << 23)
-
-#define MC13783_ADC1_CHAN0_SHIFT	5
-#define MC13783_ADC1_CHAN1_SHIFT	8
-
-#define MC13783_ADC2_ADD10		(1 << 2)
-#define MC13783_ADC2_ADD11		(1 << 3)
-#define MC13783_ADC2_ADD12		(1 << 4)
-#define MC13783_ADC2_ADD13		(1 << 5)
-#define MC13783_ADC2_ADD14		(1 << 6)
-#define MC13783_ADC2_ADD15		(1 << 7)
-#define MC13783_ADC2_ADD16		(1 << 8)
-#define MC13783_ADC2_ADD17		(1 << 9)
-#define MC13783_ADC2_ADD18		(1 << 10)
-#define MC13783_ADC2_ADD19		(1 << 11)
-#define MC13783_ADC2_ADD20		(1 << 14)
-#define MC13783_ADC2_ADD21		(1 << 15)
-#define MC13783_ADC2_ADD22		(1 << 16)
-#define MC13783_ADC2_ADD23		(1 << 17)
-#define MC13783_ADC2_ADD24		(1 << 18)
-#define MC13783_ADC2_ADD25		(1 << 19)
-#define MC13783_ADC2_ADD26		(1 << 20)
-#define MC13783_ADC2_ADD27		(1 << 21)
-#define MC13783_ADC2_ADD28		(1 << 22)
-#define MC13783_ADC2_ADD29		(1 << 23)
+static inline int mc13783_set_bits(struct mc13783 *mc13783, unsigned int offset,
+		u32 mask, u32 val)
+{
+	int ret;
+	mc13783_lock(mc13783);
+	ret = mc13783_reg_rmw(mc13783, offset, mask, val);
+	mc13783_unlock(mc13783);
 
-#define MC13783_ADC3_WHIGH0		(1 << 0)
-#define MC13783_ADC3_WHIGH1		(1 << 1)
-#define MC13783_ADC3_WHIGH2		(1 << 2)
-#define MC13783_ADC3_WHIGH3		(1 << 3)
-#define MC13783_ADC3_WHIGH4		(1 << 4)
-#define MC13783_ADC3_WHIGH5		(1 << 5)
-#define MC13783_ADC3_ICID0		(1 << 6)
-#define MC13783_ADC3_ICID1		(1 << 7)
-#define MC13783_ADC3_ICID2		(1 << 8)
-#define MC13783_ADC3_WLOW0		(1 << 9)
-#define MC13783_ADC3_WLOW1		(1 << 10)
-#define MC13783_ADC3_WLOW2		(1 << 11)
-#define MC13783_ADC3_WLOW3		(1 << 12)
-#define MC13783_ADC3_WLOW4		(1 << 13)
-#define MC13783_ADC3_WLOW5		(1 << 14)
-#define MC13783_ADC3_ADCBIS2		(1 << 23)
-
-#define MC13783_ADC4_ADDBIS10		(1 << 2)
-#define MC13783_ADC4_ADDBIS11		(1 << 3)
-#define MC13783_ADC4_ADDBIS12		(1 << 4)
-#define MC13783_ADC4_ADDBIS13		(1 << 5)
-#define MC13783_ADC4_ADDBIS14		(1 << 6)
-#define MC13783_ADC4_ADDBIS15		(1 << 7)
-#define MC13783_ADC4_ADDBIS16		(1 << 8)
-#define MC13783_ADC4_ADDBIS17		(1 << 9)
-#define MC13783_ADC4_ADDBIS18		(1 << 10)
-#define MC13783_ADC4_ADDBIS19		(1 << 11)
-#define MC13783_ADC4_ADDBIS20		(1 << 14)
-#define MC13783_ADC4_ADDBIS21		(1 << 15)
-#define MC13783_ADC4_ADDBIS22		(1 << 16)
-#define MC13783_ADC4_ADDBIS23		(1 << 17)
-#define MC13783_ADC4_ADDBIS24		(1 << 18)
-#define MC13783_ADC4_ADDBIS25		(1 << 19)
-#define MC13783_ADC4_ADDBIS26		(1 << 20)
-#define MC13783_ADC4_ADDBIS27		(1 << 21)
-#define MC13783_ADC4_ADDBIS28		(1 << 22)
-#define MC13783_ADC4_ADDBIS29		(1 << 23)
+	return ret;
+}
 
 #endif /* __LINUX_MFD_MC13783_PRIV_H */
-
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index b3a2a7243573..35680409b8cf 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -1,28 +1,50 @@
 /*
- * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright 2009 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
  *
- * Initial development of this code was funded by
- * Phytec Messtechnik GmbH, http://www.phytec.de
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
  */
+#ifndef __LINUX_MFD_MC13783_H
+#define __LINUX_MFD_MC13783_H
 
-#ifndef __INCLUDE_LINUX_MFD_MC13783_H
-#define __INCLUDE_LINUX_MFD_MC13783_H
+#include <linux/interrupt.h>
 
 struct mc13783;
+
+void mc13783_lock(struct mc13783 *mc13783);
+void mc13783_unlock(struct mc13783 *mc13783);
+
+int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val);
+int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val);
+int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
+		u32 mask, u32 val);
+
+int mc13783_irq_request(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev);
+int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev);
+int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
+int mc13783_ackirq(struct mc13783 *mc13783, int irq);
+
+int mc13783_mask(struct mc13783 *mc13783, int irq);
+int mc13783_unmask(struct mc13783 *mc13783, int irq);
+
+#define MC13783_ADC0		43
+#define MC13783_ADC0_ADREFEN		(1 << 10)
+#define MC13783_ADC0_ADREFMODE		(1 << 11)
+#define MC13783_ADC0_TSMOD0		(1 << 12)
+#define MC13783_ADC0_TSMOD1		(1 << 13)
+#define MC13783_ADC0_TSMOD2		(1 << 14)
+#define MC13783_ADC0_ADINC1		(1 << 16)
+#define MC13783_ADC0_ADINC2		(1 << 17)
+
+#define MC13783_ADC0_TSMOD_MASK		(MC13783_ADC0_TSMOD0 | \
+					MC13783_ADC0_TSMOD1 | \
+					MC13783_ADC0_TSMOD2)
+
+/* to be cleaned up */
 struct regulator_init_data;
 
 struct mc13783_regulator_init_data {
@@ -30,23 +52,30 @@ struct mc13783_regulator_init_data {
 	struct regulator_init_data *init_data;
 };
 
-struct mc13783_platform_data {
-	struct mc13783_regulator_init_data *regulators;
+struct mc13783_regulator_platform_data {
 	int num_regulators;
-	unsigned int flags;
+	struct mc13783_regulator_init_data *regulators;
 };
 
-/* mc13783_platform_data flags */
+struct mc13783_platform_data {
+	int num_regulators;
+	struct mc13783_regulator_init_data *regulators;
+
 #define MC13783_USE_TOUCHSCREEN (1 << 0)
 #define MC13783_USE_CODEC	(1 << 1)
 #define MC13783_USE_ADC		(1 << 2)
 #define MC13783_USE_RTC		(1 << 3)
 #define MC13783_USE_REGULATOR	(1 << 4)
+	unsigned int flags;
+};
+
+#define MC13783_ADC_MODE_TS		1
+#define MC13783_ADC_MODE_SINGLE_CHAN	2
+#define MC13783_ADC_MODE_MULT_CHAN	3
 
 int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 		unsigned int channel, unsigned int *sample);
 
-void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status);
 
 #define	MC13783_SW_SW1A		0
 #define	MC13783_SW_SW1B		1
@@ -80,5 +109,46 @@ void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status);
 #define	MC13783_REGU_V3		29
 #define	MC13783_REGU_V4		30
 
-#endif /* __INCLUDE_LINUX_MFD_MC13783_H */
+#define MC13783_IRQ_ADCDONE	0
+#define MC13783_IRQ_ADCBISDONE	1
+#define MC13783_IRQ_TS		2
+#define MC13783_IRQ_WHIGH	3
+#define MC13783_IRQ_WLOW	4
+#define MC13783_IRQ_CHGDET	6
+#define MC13783_IRQ_CHGOV	7
+#define MC13783_IRQ_CHGREV	8
+#define MC13783_IRQ_CHGSHORT	9
+#define MC13783_IRQ_CCCV	10
+#define MC13783_IRQ_CHGCURR	11
+#define MC13783_IRQ_BPON	12
+#define MC13783_IRQ_LOBATL	13
+#define MC13783_IRQ_LOBATH	14
+#define MC13783_IRQ_UDP		15
+#define MC13783_IRQ_USB		16
+#define MC13783_IRQ_ID		19
+#define MC13783_IRQ_SE1		21
+#define MC13783_IRQ_CKDET	22
+#define MC13783_IRQ_UDM		23
+#define MC13783_IRQ_1HZ		24
+#define MC13783_IRQ_TODA	25
+#define MC13783_IRQ_ONOFD1	27
+#define MC13783_IRQ_ONOFD2	28
+#define MC13783_IRQ_ONOFD3	29
+#define MC13783_IRQ_SYSRST	30
+#define MC13783_IRQ_RTCRST	31
+#define MC13783_IRQ_PC		32
+#define MC13783_IRQ_WARM	33
+#define MC13783_IRQ_MEMHLD	34
+#define MC13783_IRQ_PWRRDY	35
+#define MC13783_IRQ_THWARNL	36
+#define MC13783_IRQ_THWARNH	37
+#define MC13783_IRQ_CLK		38
+#define MC13783_IRQ_SEMAF	39
+#define MC13783_IRQ_MC2B	41
+#define MC13783_IRQ_HSDET	42
+#define MC13783_IRQ_HSL		43
+#define MC13783_IRQ_ALSPTH	44
+#define MC13783_IRQ_AHSSHORT	45
+#define MC13783_NUM_IRQ		46
 
+#endif /* __LINUX_MFD_MC13783_H */
-- 
cgit v1.2.3


From ed89a7551c65e1db44f1a6b7be6efce178fc87d0 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sun, 29 Nov 2009 13:36:10 +0100
Subject: mfd: Remove ezx-pcap defines for custom led gpio encoding

We used these, in a first version of leds-pcap driver, in order to encode gpio
enabling and gpio inversion for a led inside the variable used for the gpio
number. In the new leds-pcap driver we rely on gpio_is_valid() to derive if a
led is gpio enabled and we have a dedicated flag to tell if the gpio value has
to be inverted.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/ezx-pcap.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index 3402042ddc31..40c372165f3e 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -231,9 +231,6 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_LED_4MA		1
 #define PCAP_LED_5MA		2
 #define PCAP_LED_9MA		3
-#define PCAP_LED_GPIO_VAL_MASK	0x00ffffff
-#define PCAP_LED_GPIO_EN	0x01000000
-#define PCAP_LED_GPIO_INVERT	0x02000000
 #define PCAP_LED_T_MASK		0xf
 #define PCAP_LED_C_MASK		0x3
 #define PCAP_BL_MASK		0x1f
-- 
cgit v1.2.3


From ab4abe056d8828341d2a7d6463b13eafaf210181 Mon Sep 17 00:00:00 2001
From: Juha Keski-Saari <ext-juha.1.keski-saari@nokia.com>
Date: Fri, 11 Dec 2009 11:12:15 +0100
Subject: mfd: Add all twl4030 regulators to the twl4030 mfd driver

Add all twl4030 regulators to the twl4030 mfd driver and
twl4030_platform_data

Signed-off-by: Juha Keski-Saari <ext-juha.1.keski-saari@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-core.c  | 26 ++++++++++++++++++++++++--
 include/linux/i2c/twl4030.h |  8 ++++++--
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 334c86fccede..e4a5d489b8e7 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -625,11 +625,21 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	}
 
 	if (twl_has_regulator()) {
-		/*
 		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
-		*/
+
+		child = add_regulator(TWL4030_REG_VIO, pdata->vio);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDD1, pdata->vdd1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDD2, pdata->vdd2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 
 		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
 		if (IS_ERR(child))
@@ -645,6 +655,18 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 				pdata->vaux2);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTANA1, pdata->vintana1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTANA2, pdata->vintana2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTDIG, pdata->vintdig);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	/* maybe add LDOs that are omitted on cost-reduced parts */
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index efa62eb497b8..a50bcf8a4048 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -484,8 +484,12 @@ struct twl4030_platform_data {
 	struct regulator_init_data		*vaux2;
 	struct regulator_init_data		*vaux3;
 	struct regulator_init_data		*vaux4;
-
-	/* REVISIT more to come ... _nothing_ should be hard-wired */
+	struct regulator_init_data		*vio;
+	struct regulator_init_data		*vdd1;
+	struct regulator_init_data		*vdd2;
+	struct regulator_init_data		*vintana1;
+	struct regulator_init_data		*vintana2;
+	struct regulator_init_data		*vintdig;
 };
 
 /*----------------------------------------------------------------------*/
-- 
cgit v1.2.3


From b07682b6056eb6701f8cb86aa5800e6f2ea7919b Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Sun, 13 Dec 2009 20:05:51 +0100
Subject: mfd: Rename twl4030* driver files to enable re-use

The upcoming TWL6030 is companion chip for OMAP4 like the current TWL4030
for OMAP3. The common modules like RTC, Regulator creates opportunity
to re-use the most of the code from twl4030.

This patch renames few common drivers twl4030* files to twl* to enable
the code re-use.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-omap2/board-2430sdp.c      |   2 +-
 arch/arm/mach-omap2/board-3430sdp.c      |   2 +-
 arch/arm/mach-omap2/board-ldp.c          |   2 +-
 arch/arm/mach-omap2/board-omap3beagle.c  |   2 +-
 arch/arm/mach-omap2/board-omap3pandora.c |   2 +-
 arch/arm/mach-omap2/board-overo.c        |   2 +-
 drivers/gpio/twl4030-gpio.c              |   2 +-
 drivers/input/keyboard/twl4030_keypad.c  |   2 +-
 drivers/input/misc/twl4030-pwrbutton.c   |   2 +-
 drivers/mfd/Makefile                     |   2 +-
 drivers/mfd/twl-core.c                   | 929 +++++++++++++++++++++++++++++++
 drivers/mfd/twl4030-core.c               | 929 -------------------------------
 drivers/mfd/twl4030-irq.c                |   2 +-
 drivers/mfd/twl4030-power.c              |   2 +-
 drivers/regulator/Makefile               |   2 +-
 drivers/regulator/twl-regulator.c        | 500 +++++++++++++++++
 drivers/regulator/twl4030-regulator.c    | 500 -----------------
 drivers/rtc/Makefile                     |   2 +-
 drivers/rtc/rtc-twl.c                    | 540 ++++++++++++++++++
 drivers/rtc/rtc-twl4030.c                | 540 ------------------
 drivers/usb/otg/twl4030-usb.c            |   2 +-
 drivers/video/omap/lcd_2430sdp.c         |   2 +-
 drivers/watchdog/twl4030_wdt.c           |   2 +-
 include/linux/i2c/twl.h                  | 550 ++++++++++++++++++
 include/linux/i2c/twl4030.h              | 550 ------------------
 sound/soc/codecs/twl4030.c               |   2 +-
 26 files changed, 2537 insertions(+), 2537 deletions(-)
 create mode 100644 drivers/mfd/twl-core.c
 delete mode 100644 drivers/mfd/twl4030-core.c
 create mode 100644 drivers/regulator/twl-regulator.c
 delete mode 100644 drivers/regulator/twl4030-regulator.c
 create mode 100644 drivers/rtc/rtc-twl.c
 delete mode 100644 drivers/rtc/rtc-twl4030.c
 create mode 100644 include/linux/i2c/twl.h
 delete mode 100644 include/linux/i2c/twl4030.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index db9374bc528b..e508904fb67e 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -19,7 +19,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/delay.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 4cfb7b68dfad..c90b0d0b1927 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -20,7 +20,7 @@
 #include <linux/input/matrix_keypad.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/regulator/machine.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 37431738f1c2..995d4a2b2dfd 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -24,7 +24,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/regulator/machine.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 6ada8029f9a8..231cb4ec1847 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -29,7 +29,7 @@
 #include <linux/mtd/nand.h>
 
 #include <linux/regulator/machine.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 6f6c601eeab7..ef17cf1ab6d7 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -24,7 +24,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/regulator/machine.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/leds.h>
 #include <linux/input.h>
 #include <linux/input/matrix_keypad.h>
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index 5b78a87217e0..d192dd98a591 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -26,7 +26,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/regulator/machine.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
index 49384a7c5492..a320d7bfe67c 100644
--- a/drivers/gpio/twl4030-gpio.c
+++ b/drivers/gpio/twl4030-gpio.c
@@ -34,7 +34,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 
 /*
diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c
index 9a2977c21696..1d1536a2fe46 100644
--- a/drivers/input/keyboard/twl4030_keypad.c
+++ b/drivers/input/keyboard/twl4030_keypad.c
@@ -31,7 +31,7 @@
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/platform_device.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 
 /*
diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c
index f5fc9974a111..a73b889fff79 100644
--- a/drivers/input/misc/twl4030-pwrbutton.c
+++ b/drivers/input/misc/twl4030-pwrbutton.c
@@ -27,7 +27,7 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 #define PWR_PWRON_IRQ (1 << 0)
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 75638ca8288b..f4d14b7589bf 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
-obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
+obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_TWL4030_CODEC)	+= twl4030-codec.o
 
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
new file mode 100644
index 000000000000..44714f5cf495
--- /dev/null
+++ b/drivers/mfd/twl-core.c
@@ -0,0 +1,929 @@
+/*
+ * twl4030_core.c - driver for TWL4030/TPS659x0 PM and audio CODEC devices
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Modifications to defer interrupt handling to a kernel thread:
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Code cleanup and modifications to IRQ handler.
+ * by syed khasim <x0khasim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+
+#include <linux/regulator/machine.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/twl.h>
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#include <plat/cpu.h>
+#endif
+
+/*
+ * The TWL4030 "Triton 2" is one of a family of a multi-function "Power
+ * Management and System Companion Device" chips originally designed for
+ * use in OMAP2 and OMAP 3 based systems.  Its control interfaces use I2C,
+ * often at around 3 Mbit/sec, including for interrupt handling.
+ *
+ * This driver core provides genirq support for the interrupts emitted,
+ * by the various modules, and exports register access primitives.
+ *
+ * FIXME this driver currently requires use of the first interrupt line
+ * (and associated registers).
+ */
+
+#define DRIVER_NAME			"twl4030"
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+#define twl_has_bci()		true
+#else
+#define twl_has_bci()		false
+#endif
+
+#if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
+#define twl_has_keypad()	true
+#else
+#define twl_has_keypad()	false
+#endif
+
+#if defined(CONFIG_GPIO_TWL4030) || defined(CONFIG_GPIO_TWL4030_MODULE)
+#define twl_has_gpio()	true
+#else
+#define twl_has_gpio()	false
+#endif
+
+#if defined(CONFIG_REGULATOR_TWL4030) \
+	|| defined(CONFIG_REGULATOR_TWL4030_MODULE)
+#define twl_has_regulator()	true
+#else
+#define twl_has_regulator()	false
+#endif
+
+#if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
+#define twl_has_madc()	true
+#else
+#define twl_has_madc()	false
+#endif
+
+#ifdef CONFIG_TWL4030_POWER
+#define twl_has_power()        true
+#else
+#define twl_has_power()        false
+#endif
+
+#if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
+#define twl_has_rtc()	true
+#else
+#define twl_has_rtc()	false
+#endif
+
+#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE)
+#define twl_has_usb()	true
+#else
+#define twl_has_usb()	false
+#endif
+
+#if defined(CONFIG_TWL4030_WATCHDOG) || \
+	defined(CONFIG_TWL4030_WATCHDOG_MODULE)
+#define twl_has_watchdog()        true
+#else
+#define twl_has_watchdog()        false
+#endif
+
+#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
+#define twl_has_codec()	true
+#else
+#define twl_has_codec()	false
+#endif
+
+/* Triton Core internal information (BEGIN) */
+
+/* Last - for index max*/
+#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
+
+#define TWL4030_NUM_SLAVES		4
+
+#if defined(CONFIG_INPUT_TWL4030_PWRBUTTON) \
+	|| defined(CONFIG_INPUT_TWL4030_PWBUTTON_MODULE)
+#define twl_has_pwrbutton()	true
+#else
+#define twl_has_pwrbutton()	false
+#endif
+
+/* Base Address defns for twl4030_map[] */
+
+/* subchip/slave 0 - USB ID */
+#define TWL4030_BASEADD_USB		0x0000
+
+/* subchip/slave 1 - AUD ID */
+#define TWL4030_BASEADD_AUDIO_VOICE	0x0000
+#define TWL4030_BASEADD_GPIO		0x0098
+#define TWL4030_BASEADD_INTBR		0x0085
+#define TWL4030_BASEADD_PIH		0x0080
+#define TWL4030_BASEADD_TEST		0x004C
+
+/* subchip/slave 2 - AUX ID */
+#define TWL4030_BASEADD_INTERRUPTS	0x00B9
+#define TWL4030_BASEADD_LED		0x00EE
+#define TWL4030_BASEADD_MADC		0x0000
+#define TWL4030_BASEADD_MAIN_CHARGE	0x0074
+#define TWL4030_BASEADD_PRECHARGE	0x00AA
+#define TWL4030_BASEADD_PWM0		0x00F8
+#define TWL4030_BASEADD_PWM1		0x00FB
+#define TWL4030_BASEADD_PWMA		0x00EF
+#define TWL4030_BASEADD_PWMB		0x00F1
+#define TWL4030_BASEADD_KEYPAD		0x00D2
+
+#define TWL5031_BASEADD_ACCESSORY	0x0074 /* Replaces Main Charge */
+#define TWL5031_BASEADD_INTERRUPTS	0x00B9 /* Different than TWL4030's
+						  one */
+
+/* subchip/slave 3 - POWER ID */
+#define TWL4030_BASEADD_BACKUP		0x0014
+#define TWL4030_BASEADD_INT		0x002E
+#define TWL4030_BASEADD_PM_MASTER	0x0036
+#define TWL4030_BASEADD_PM_RECEIVER	0x005B
+#define TWL4030_BASEADD_RTC		0x001C
+#define TWL4030_BASEADD_SECURED_REG	0x0000
+
+/* Triton Core internal information (END) */
+
+
+/* Few power values */
+#define R_CFG_BOOT			0x05
+#define R_PROTECT_KEY			0x0E
+
+/* access control values for R_PROTECT_KEY */
+#define KEY_UNLOCK1			0xce
+#define KEY_UNLOCK2			0xec
+#define KEY_LOCK			0x00
+
+/* some fields in R_CFG_BOOT */
+#define HFCLK_FREQ_19p2_MHZ		(1 << 0)
+#define HFCLK_FREQ_26_MHZ		(2 << 0)
+#define HFCLK_FREQ_38p4_MHZ		(3 << 0)
+#define HIGH_PERF_SQ			(1 << 3)
+#define CK32K_LOWPWR_EN			(1 << 7)
+
+
+/* chip-specific feature flags, for i2c_device_id.driver_data */
+#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
+#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
+#define TWL5031			BIT(2)  /* twl5031 has different registers */
+
+/*----------------------------------------------------------------------*/
+
+/* is driver active, bound to a chip? */
+static bool inuse;
+
+/* Structure for each TWL4030 Slave */
+struct twl4030_client {
+	struct i2c_client *client;
+	u8 address;
+
+	/* max numb of i2c_msg required is for read =2 */
+	struct i2c_msg xfer_msg[2];
+
+	/* To lock access to xfer_msg */
+	struct mutex xfer_lock;
+};
+
+static struct twl4030_client twl4030_modules[TWL4030_NUM_SLAVES];
+
+
+/* mapping the module id to slave id and base address */
+struct twl4030mapping {
+	unsigned char sid;	/* Slave ID */
+	unsigned char base;	/* base address */
+};
+
+static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+	/*
+	 * NOTE:  don't change this table without updating the
+	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
+	 * so they continue to match the order in this table.
+	 */
+
+	{ 0, TWL4030_BASEADD_USB },
+
+	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
+	{ 1, TWL4030_BASEADD_GPIO },
+	{ 1, TWL4030_BASEADD_INTBR },
+	{ 1, TWL4030_BASEADD_PIH },
+	{ 1, TWL4030_BASEADD_TEST },
+
+	{ 2, TWL4030_BASEADD_KEYPAD },
+	{ 2, TWL4030_BASEADD_MADC },
+	{ 2, TWL4030_BASEADD_INTERRUPTS },
+	{ 2, TWL4030_BASEADD_LED },
+	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
+	{ 2, TWL4030_BASEADD_PRECHARGE },
+	{ 2, TWL4030_BASEADD_PWM0 },
+	{ 2, TWL4030_BASEADD_PWM1 },
+	{ 2, TWL4030_BASEADD_PWMA },
+	{ 2, TWL4030_BASEADD_PWMB },
+	{ 2, TWL5031_BASEADD_ACCESSORY },
+	{ 2, TWL5031_BASEADD_INTERRUPTS },
+
+	{ 3, TWL4030_BASEADD_BACKUP },
+	{ 3, TWL4030_BASEADD_INT },
+	{ 3, TWL4030_BASEADD_PM_MASTER },
+	{ 3, TWL4030_BASEADD_PM_RECEIVER },
+	{ 3, TWL4030_BASEADD_RTC },
+	{ 3, TWL4030_BASEADD_SECURED_REG },
+};
+
+/*----------------------------------------------------------------------*/
+
+/* Exported Functions */
+
+/**
+ * twl4030_i2c_write - Writes a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes+1 containing data to write
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * IMPORTANT: for 'value' parameter: Allocate value num_bytes+1 and
+ * valid data starts at Offset 1.
+ *
+ * Returns the result of operation - 0 is success
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
+{
+	int ret;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/*
+	 * [MSG1]: fill the register address data
+	 * fill the data Tx buffer
+	 */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = num_bytes + 1;
+	msg->flags = 0;
+	msg->buf = value;
+	/* over write the first byte of buffer with the register address */
+	*value = twl4030_map[mod_no].base + reg;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2c_transfer returns number of messages transferred */
+	if (ret != 1) {
+		pr_err("%s: i2c_write failed to transfer all messages\n",
+			DRIVER_NAME);
+		if (ret < 0)
+			return ret;
+		else
+			return -EIO;
+	} else {
+		return 0;
+	}
+}
+EXPORT_SYMBOL(twl4030_i2c_write);
+
+/**
+ * twl4030_i2c_read - Reads a n bit register in TWL4030
+ * @mod_no: module number
+ * @value: an array of num_bytes containing data to be read
+ * @reg: register address (just offset will do)
+ * @num_bytes: number of bytes to transfer
+ *
+ * Returns result of operation - num_bytes is success else failure.
+ */
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
+{
+	int ret;
+	u8 val;
+	int sid;
+	struct twl4030_client *twl;
+	struct i2c_msg *msg;
+
+	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
+		return -EPERM;
+	}
+	sid = twl4030_map[mod_no].sid;
+	twl = &twl4030_modules[sid];
+
+	if (unlikely(!inuse)) {
+		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
+		return -EPERM;
+	}
+	mutex_lock(&twl->xfer_lock);
+	/* [MSG1] fill the register address data */
+	msg = &twl->xfer_msg[0];
+	msg->addr = twl->address;
+	msg->len = 1;
+	msg->flags = 0;	/* Read the register value */
+	val = twl4030_map[mod_no].base + reg;
+	msg->buf = &val;
+	/* [MSG2] fill the data rx buffer */
+	msg = &twl->xfer_msg[1];
+	msg->addr = twl->address;
+	msg->flags = I2C_M_RD;	/* Read the register value */
+	msg->len = num_bytes;	/* only n bytes */
+	msg->buf = value;
+	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 2);
+	mutex_unlock(&twl->xfer_lock);
+
+	/* i2c_transfer returns number of messages transferred */
+	if (ret != 2) {
+		pr_err("%s: i2c_read failed to transfer all messages\n",
+			DRIVER_NAME);
+		if (ret < 0)
+			return ret;
+		else
+			return -EIO;
+	} else {
+		return 0;
+	}
+}
+EXPORT_SYMBOL(twl4030_i2c_read);
+
+/**
+ * twl4030_i2c_write_u8 - Writes a 8 bit register in TWL4030
+ * @mod_no: module number
+ * @value: the value to be written 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
+{
+
+	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
+	u8 temp_buffer[2] = { 0 };
+	/* offset 1 contains the data */
+	temp_buffer[1] = value;
+	return twl4030_i2c_write(mod_no, temp_buffer, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_write_u8);
+
+/**
+ * twl4030_i2c_read_u8 - Reads a 8 bit register from TWL4030
+ * @mod_no: module number
+ * @value: the value read 8 bit
+ * @reg: register address (just offset will do)
+ *
+ * Returns result of operation - 0 is success
+ */
+int twl4030_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
+{
+	return twl4030_i2c_read(mod_no, value, reg, 1);
+}
+EXPORT_SYMBOL(twl4030_i2c_read_u8);
+
+/*----------------------------------------------------------------------*/
+
+static struct device *
+add_numbered_child(unsigned chip, const char *name, int num,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq0, int irq1)
+{
+	struct platform_device	*pdev;
+	struct twl4030_client	*twl = &twl4030_modules[chip];
+	int			status;
+
+	pdev = platform_device_alloc(name, num);
+	if (!pdev) {
+		dev_dbg(&twl->client->dev, "can't alloc dev\n");
+		status = -ENOMEM;
+		goto err;
+	}
+
+	device_init_wakeup(&pdev->dev, can_wakeup);
+	pdev->dev.parent = &twl->client->dev;
+
+	if (pdata) {
+		status = platform_device_add_data(pdev, pdata, pdata_len);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add platform_data\n");
+			goto err;
+		}
+	}
+
+	if (irq0) {
+		struct resource r[2] = {
+			{ .start = irq0, .flags = IORESOURCE_IRQ, },
+			{ .start = irq1, .flags = IORESOURCE_IRQ, },
+		};
+
+		status = platform_device_add_resources(pdev, r, irq1 ? 2 : 1);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add irqs\n");
+			goto err;
+		}
+	}
+
+	status = platform_device_add(pdev);
+
+err:
+	if (status < 0) {
+		platform_device_put(pdev);
+		dev_err(&twl->client->dev, "can't add %s dev\n", name);
+		return ERR_PTR(status);
+	}
+	return &pdev->dev;
+}
+
+static inline struct device *add_child(unsigned chip, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq0, int irq1)
+{
+	return add_numbered_child(chip, name, -1, pdata, pdata_len,
+		can_wakeup, irq0, irq1);
+}
+
+static struct device *
+add_regulator_linked(int num, struct regulator_init_data *pdata,
+		struct regulator_consumer_supply *consumers,
+		unsigned num_consumers)
+{
+	/* regulator framework demands init_data ... */
+	if (!pdata)
+		return NULL;
+
+	if (consumers) {
+		pdata->consumer_supplies = consumers;
+		pdata->num_consumer_supplies = num_consumers;
+	}
+
+	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
+	return add_numbered_child(3, "twl4030_reg", num,
+		pdata, sizeof(*pdata), false, 0, 0);
+}
+
+static struct device *
+add_regulator(int num, struct regulator_init_data *pdata)
+{
+	return add_regulator_linked(num, pdata, NULL, 0);
+}
+
+/*
+ * NOTE:  We know the first 8 IRQs after pdata->base_irq are
+ * for the PIH, and the next are for the PWR_INT SIH, since
+ * that's how twl_init_irq() sets things up.
+ */
+
+static int
+add_children(struct twl4030_platform_data *pdata, unsigned long features)
+{
+	struct device	*child;
+
+	if (twl_has_bci() && pdata->bci &&
+	    !(features & (TPS_SUBSET | TWL5031))) {
+		child = add_child(3, "twl4030_bci",
+				pdata->bci, sizeof(*pdata->bci),
+				false,
+				/* irq0 = CHG_PRES, irq1 = BCI */
+				pdata->irq_base + 8 + 1, pdata->irq_base + 2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_gpio() && pdata->gpio) {
+		child = add_child(1, "twl4030_gpio",
+				pdata->gpio, sizeof(*pdata->gpio),
+				false, pdata->irq_base + 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_keypad() && pdata->keypad) {
+		child = add_child(2, "twl4030_keypad",
+				pdata->keypad, sizeof(*pdata->keypad),
+				true, pdata->irq_base + 1, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_madc() && pdata->madc) {
+		child = add_child(2, "twl4030_madc",
+				pdata->madc, sizeof(*pdata->madc),
+				true, pdata->irq_base + 3, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_rtc()) {
+		/*
+		 * REVISIT platform_data here currently might expose the
+		 * "msecure" line ... but for now we just expect board
+		 * setup to tell the chip "it's always ok to SET_TIME".
+		 * Eventually, Linux might become more aware of such
+		 * HW security concerns, and "least privilege".
+		 */
+		child = add_child(3, "twl4030_rtc",
+				NULL, 0,
+				true, pdata->irq_base + 8 + 3, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_usb() && pdata->usb) {
+
+		static struct regulator_consumer_supply usb1v5 = {
+			.supply =	"usb1v5",
+		};
+		static struct regulator_consumer_supply usb1v8 = {
+			.supply =	"usb1v8",
+		};
+		static struct regulator_consumer_supply usb3v1 = {
+			.supply =	"usb3v1",
+		};
+
+	/* First add the regulators so that they can be used by transceiver */
+		if (twl_has_regulator()) {
+			/* this is a template that gets copied */
+			struct regulator_init_data usb_fixed = {
+				.constraints.valid_modes_mask =
+					REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+				.constraints.valid_ops_mask =
+					REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+			};
+
+			child = add_regulator_linked(TWL4030_REG_VUSB1V5,
+						      &usb_fixed, &usb1v5, 1);
+			if (IS_ERR(child))
+				return PTR_ERR(child);
+
+			child = add_regulator_linked(TWL4030_REG_VUSB1V8,
+						      &usb_fixed, &usb1v8, 1);
+			if (IS_ERR(child))
+				return PTR_ERR(child);
+
+			child = add_regulator_linked(TWL4030_REG_VUSB3V1,
+						      &usb_fixed, &usb3v1, 1);
+			if (IS_ERR(child))
+				return PTR_ERR(child);
+
+		}
+
+		child = add_child(0, "twl4030_usb",
+				pdata->usb, sizeof(*pdata->usb),
+				true,
+				/* irq0 = USB_PRES, irq1 = USB */
+				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
+
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		/* we need to connect regulators to this transceiver */
+		if (twl_has_regulator() && child) {
+			usb1v5.dev = child;
+			usb1v8.dev = child;
+			usb3v1.dev = child;
+		}
+	}
+
+	if (twl_has_watchdog()) {
+		child = add_child(0, "twl4030_wdt", NULL, 0, false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_pwrbutton()) {
+		child = add_child(1, "twl4030_pwrbutton",
+				NULL, 0, true, pdata->irq_base + 8 + 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_codec() && pdata->codec) {
+		child = add_child(1, "twl4030_codec",
+				pdata->codec, sizeof(*pdata->codec),
+				false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_regulator()) {
+		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VIO, pdata->vio);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDD1, pdata->vdd1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDD2, pdata->vdd2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator((features & TWL4030_VAUX2)
+					? TWL4030_REG_VAUX2_4030
+					: TWL4030_REG_VAUX2,
+				pdata->vaux2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTANA1, pdata->vintana1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTANA2, pdata->vintana2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VINTDIG, pdata->vintdig);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* maybe add LDOs that are omitted on cost-reduced parts */
+	if (twl_has_regulator() && !(features & TPS_SUBSET)) {
+		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * These three functions initialize the on-chip clock framework,
+ * letting it generate the right frequencies for USB, MADC, and
+ * other purposes.
+ */
+static inline int __init protect_pm_master(void)
+{
+	int e = 0;
+
+	e = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_LOCK,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static inline int __init unprotect_pm_master(void)
+{
+	int e = 0;
+
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK1,
+			R_PROTECT_KEY);
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK2,
+			R_PROTECT_KEY);
+	return e;
+}
+
+static void clocks_init(struct device *dev,
+			struct twl4030_clock_init_data *clock)
+{
+	int e = 0;
+	struct clk *osc;
+	u32 rate;
+	u8 ctrl = HFCLK_FREQ_26_MHZ;
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+	if (cpu_is_omap2430())
+		osc = clk_get(dev, "osc_ck");
+	else
+		osc = clk_get(dev, "osc_sys_ck");
+
+	if (IS_ERR(osc)) {
+		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+				"using bootloader value (unknown osc rate)\n");
+		return;
+	}
+
+	rate = clk_get_rate(osc);
+	clk_put(osc);
+
+#else
+	/* REVISIT for non-OMAP systems, pass the clock rate from
+	 * board init code, using platform_data.
+	 */
+	osc = ERR_PTR(-EIO);
+
+	printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+	       "using bootloader value (unknown osc rate)\n");
+
+	return;
+#endif
+
+	switch (rate) {
+	case 19200000:
+		ctrl = HFCLK_FREQ_19p2_MHZ;
+		break;
+	case 26000000:
+		ctrl = HFCLK_FREQ_26_MHZ;
+		break;
+	case 38400000:
+		ctrl = HFCLK_FREQ_38p4_MHZ;
+		break;
+	}
+
+	ctrl |= HIGH_PERF_SQ;
+	if (clock && clock->ck32k_lowpwr_enable)
+		ctrl |= CK32K_LOWPWR_EN;
+
+	e |= unprotect_pm_master();
+	/* effect->MADC+USB ck en */
+	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
+	e |= protect_pm_master();
+
+	if (e < 0)
+		pr_err("%s: clock init err [%d]\n", DRIVER_NAME, e);
+}
+
+/*----------------------------------------------------------------------*/
+
+int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
+int twl_exit_irq(void);
+int twl_init_chip_irq(const char *chip);
+
+static int twl4030_remove(struct i2c_client *client)
+{
+	unsigned i;
+	int status;
+
+	status = twl_exit_irq();
+	if (status < 0)
+		return status;
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		if (twl->client && twl->client != client)
+			i2c_unregister_device(twl->client);
+		twl4030_modules[i].client = NULL;
+	}
+	inuse = false;
+	return 0;
+}
+
+/* NOTE:  this driver only handles a single twl4030/tps659x0 chip */
+static int __init
+twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int				status;
+	unsigned			i;
+	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+
+	if (!pdata) {
+		dev_dbg(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) {
+		dev_dbg(&client->dev, "can't talk I2C?\n");
+		return -EIO;
+	}
+
+	if (inuse) {
+		dev_dbg(&client->dev, "driver is already in use\n");
+		return -EBUSY;
+	}
+
+	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
+		struct twl4030_client	*twl = &twl4030_modules[i];
+
+		twl->address = client->addr + i;
+		if (i == 0)
+			twl->client = client;
+		else {
+			twl->client = i2c_new_dummy(client->adapter,
+					twl->address);
+			if (!twl->client) {
+				dev_err(&client->dev,
+					"can't attach client %d\n", i);
+				status = -ENOMEM;
+				goto fail;
+			}
+		}
+		mutex_init(&twl->xfer_lock);
+	}
+	inuse = true;
+
+	/* setup clock framework */
+	clocks_init(&client->dev, pdata->clock);
+
+	/* load power event scripts */
+	if (twl_has_power() && pdata->power)
+		twl4030_power_init(pdata->power);
+
+	/* Maybe init the T2 Interrupt subsystem */
+	if (client->irq
+			&& pdata->irq_base
+			&& pdata->irq_end > pdata->irq_base) {
+		twl_init_chip_irq(id->name);
+		status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
+		if (status < 0)
+			goto fail;
+	}
+
+	status = add_children(pdata, id->driver_data);
+fail:
+	if (status < 0)
+		twl4030_remove(client);
+	return status;
+}
+
+static const struct i2c_device_id twl4030_ids[] = {
+	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
+	{ "twl5030", 0 },		/* T2 updated */
+	{ "twl5031", TWL5031 },		/* TWL5030 updated */
+	{ "tps65950", 0 },		/* catalog version of twl5030 */
+	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
+	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(i2c, twl4030_ids);
+
+/* One Client Driver , 4 Clients */
+static struct i2c_driver twl4030_driver = {
+	.driver.name	= DRIVER_NAME,
+	.id_table	= twl4030_ids,
+	.probe		= twl4030_probe,
+	.remove		= twl4030_remove,
+};
+
+static int __init twl4030_init(void)
+{
+	return i2c_add_driver(&twl4030_driver);
+}
+subsys_initcall(twl4030_init);
+
+static void __exit twl4030_exit(void)
+{
+	i2c_del_driver(&twl4030_driver);
+}
+module_exit(twl4030_exit);
+
+MODULE_AUTHOR("Texas Instruments, Inc.");
+MODULE_DESCRIPTION("I2C Core interface for TWL4030");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
deleted file mode 100644
index 7c2ec0aa9d87..000000000000
--- a/drivers/mfd/twl4030-core.c
+++ /dev/null
@@ -1,929 +0,0 @@
-/*
- * twl4030_core.c - driver for TWL4030/TPS659x0 PM and audio CODEC devices
- *
- * Copyright (C) 2005-2006 Texas Instruments, Inc.
- *
- * Modifications to defer interrupt handling to a kernel thread:
- * Copyright (C) 2006 MontaVista Software, Inc.
- *
- * Based on tlv320aic23.c:
- * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
- *
- * Code cleanup and modifications to IRQ handler.
- * by syed khasim <x0khasim@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-
-#include <linux/regulator/machine.h>
-
-#include <linux/i2c.h>
-#include <linux/i2c/twl4030.h>
-
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
-#include <plat/cpu.h>
-#endif
-
-/*
- * The TWL4030 "Triton 2" is one of a family of a multi-function "Power
- * Management and System Companion Device" chips originally designed for
- * use in OMAP2 and OMAP 3 based systems.  Its control interfaces use I2C,
- * often at around 3 Mbit/sec, including for interrupt handling.
- *
- * This driver core provides genirq support for the interrupts emitted,
- * by the various modules, and exports register access primitives.
- *
- * FIXME this driver currently requires use of the first interrupt line
- * (and associated registers).
- */
-
-#define DRIVER_NAME			"twl4030"
-
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-#define twl_has_bci()		true
-#else
-#define twl_has_bci()		false
-#endif
-
-#if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
-#define twl_has_keypad()	true
-#else
-#define twl_has_keypad()	false
-#endif
-
-#if defined(CONFIG_GPIO_TWL4030) || defined(CONFIG_GPIO_TWL4030_MODULE)
-#define twl_has_gpio()	true
-#else
-#define twl_has_gpio()	false
-#endif
-
-#if defined(CONFIG_REGULATOR_TWL4030) \
-	|| defined(CONFIG_REGULATOR_TWL4030_MODULE)
-#define twl_has_regulator()	true
-#else
-#define twl_has_regulator()	false
-#endif
-
-#if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
-#define twl_has_madc()	true
-#else
-#define twl_has_madc()	false
-#endif
-
-#ifdef CONFIG_TWL4030_POWER
-#define twl_has_power()        true
-#else
-#define twl_has_power()        false
-#endif
-
-#if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
-#define twl_has_rtc()	true
-#else
-#define twl_has_rtc()	false
-#endif
-
-#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE)
-#define twl_has_usb()	true
-#else
-#define twl_has_usb()	false
-#endif
-
-#if defined(CONFIG_TWL4030_WATCHDOG) || \
-	defined(CONFIG_TWL4030_WATCHDOG_MODULE)
-#define twl_has_watchdog()        true
-#else
-#define twl_has_watchdog()        false
-#endif
-
-#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
-#define twl_has_codec()	true
-#else
-#define twl_has_codec()	false
-#endif
-
-/* Triton Core internal information (BEGIN) */
-
-/* Last - for index max*/
-#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
-
-#define TWL4030_NUM_SLAVES		4
-
-#if defined(CONFIG_INPUT_TWL4030_PWRBUTTON) \
-	|| defined(CONFIG_INPUT_TWL4030_PWBUTTON_MODULE)
-#define twl_has_pwrbutton()	true
-#else
-#define twl_has_pwrbutton()	false
-#endif
-
-/* Base Address defns for twl4030_map[] */
-
-/* subchip/slave 0 - USB ID */
-#define TWL4030_BASEADD_USB		0x0000
-
-/* subchip/slave 1 - AUD ID */
-#define TWL4030_BASEADD_AUDIO_VOICE	0x0000
-#define TWL4030_BASEADD_GPIO		0x0098
-#define TWL4030_BASEADD_INTBR		0x0085
-#define TWL4030_BASEADD_PIH		0x0080
-#define TWL4030_BASEADD_TEST		0x004C
-
-/* subchip/slave 2 - AUX ID */
-#define TWL4030_BASEADD_INTERRUPTS	0x00B9
-#define TWL4030_BASEADD_LED		0x00EE
-#define TWL4030_BASEADD_MADC		0x0000
-#define TWL4030_BASEADD_MAIN_CHARGE	0x0074
-#define TWL4030_BASEADD_PRECHARGE	0x00AA
-#define TWL4030_BASEADD_PWM0		0x00F8
-#define TWL4030_BASEADD_PWM1		0x00FB
-#define TWL4030_BASEADD_PWMA		0x00EF
-#define TWL4030_BASEADD_PWMB		0x00F1
-#define TWL4030_BASEADD_KEYPAD		0x00D2
-
-#define TWL5031_BASEADD_ACCESSORY	0x0074 /* Replaces Main Charge */
-#define TWL5031_BASEADD_INTERRUPTS	0x00B9 /* Different than TWL4030's
-						  one */
-
-/* subchip/slave 3 - POWER ID */
-#define TWL4030_BASEADD_BACKUP		0x0014
-#define TWL4030_BASEADD_INT		0x002E
-#define TWL4030_BASEADD_PM_MASTER	0x0036
-#define TWL4030_BASEADD_PM_RECEIVER	0x005B
-#define TWL4030_BASEADD_RTC		0x001C
-#define TWL4030_BASEADD_SECURED_REG	0x0000
-
-/* Triton Core internal information (END) */
-
-
-/* Few power values */
-#define R_CFG_BOOT			0x05
-#define R_PROTECT_KEY			0x0E
-
-/* access control values for R_PROTECT_KEY */
-#define KEY_UNLOCK1			0xce
-#define KEY_UNLOCK2			0xec
-#define KEY_LOCK			0x00
-
-/* some fields in R_CFG_BOOT */
-#define HFCLK_FREQ_19p2_MHZ		(1 << 0)
-#define HFCLK_FREQ_26_MHZ		(2 << 0)
-#define HFCLK_FREQ_38p4_MHZ		(3 << 0)
-#define HIGH_PERF_SQ			(1 << 3)
-#define CK32K_LOWPWR_EN			(1 << 7)
-
-
-/* chip-specific feature flags, for i2c_device_id.driver_data */
-#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
-#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
-#define TWL5031			BIT(2)  /* twl5031 has different registers */
-
-/*----------------------------------------------------------------------*/
-
-/* is driver active, bound to a chip? */
-static bool inuse;
-
-/* Structure for each TWL4030 Slave */
-struct twl4030_client {
-	struct i2c_client *client;
-	u8 address;
-
-	/* max numb of i2c_msg required is for read =2 */
-	struct i2c_msg xfer_msg[2];
-
-	/* To lock access to xfer_msg */
-	struct mutex xfer_lock;
-};
-
-static struct twl4030_client twl4030_modules[TWL4030_NUM_SLAVES];
-
-
-/* mapping the module id to slave id and base address */
-struct twl4030mapping {
-	unsigned char sid;	/* Slave ID */
-	unsigned char base;	/* base address */
-};
-
-static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
-	/*
-	 * NOTE:  don't change this table without updating the
-	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
-	 * so they continue to match the order in this table.
-	 */
-
-	{ 0, TWL4030_BASEADD_USB },
-
-	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
-	{ 1, TWL4030_BASEADD_GPIO },
-	{ 1, TWL4030_BASEADD_INTBR },
-	{ 1, TWL4030_BASEADD_PIH },
-	{ 1, TWL4030_BASEADD_TEST },
-
-	{ 2, TWL4030_BASEADD_KEYPAD },
-	{ 2, TWL4030_BASEADD_MADC },
-	{ 2, TWL4030_BASEADD_INTERRUPTS },
-	{ 2, TWL4030_BASEADD_LED },
-	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
-	{ 2, TWL4030_BASEADD_PRECHARGE },
-	{ 2, TWL4030_BASEADD_PWM0 },
-	{ 2, TWL4030_BASEADD_PWM1 },
-	{ 2, TWL4030_BASEADD_PWMA },
-	{ 2, TWL4030_BASEADD_PWMB },
-	{ 2, TWL5031_BASEADD_ACCESSORY },
-	{ 2, TWL5031_BASEADD_INTERRUPTS },
-
-	{ 3, TWL4030_BASEADD_BACKUP },
-	{ 3, TWL4030_BASEADD_INT },
-	{ 3, TWL4030_BASEADD_PM_MASTER },
-	{ 3, TWL4030_BASEADD_PM_RECEIVER },
-	{ 3, TWL4030_BASEADD_RTC },
-	{ 3, TWL4030_BASEADD_SECURED_REG },
-};
-
-/*----------------------------------------------------------------------*/
-
-/* Exported Functions */
-
-/**
- * twl4030_i2c_write - Writes a n bit register in TWL4030
- * @mod_no: module number
- * @value: an array of num_bytes+1 containing data to write
- * @reg: register address (just offset will do)
- * @num_bytes: number of bytes to transfer
- *
- * IMPORTANT: for 'value' parameter: Allocate value num_bytes+1 and
- * valid data starts at Offset 1.
- *
- * Returns the result of operation - 0 is success
- */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
-{
-	int ret;
-	int sid;
-	struct twl4030_client *twl;
-	struct i2c_msg *msg;
-
-	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
-		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
-		return -EPERM;
-	}
-	sid = twl4030_map[mod_no].sid;
-	twl = &twl4030_modules[sid];
-
-	if (unlikely(!inuse)) {
-		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
-		return -EPERM;
-	}
-	mutex_lock(&twl->xfer_lock);
-	/*
-	 * [MSG1]: fill the register address data
-	 * fill the data Tx buffer
-	 */
-	msg = &twl->xfer_msg[0];
-	msg->addr = twl->address;
-	msg->len = num_bytes + 1;
-	msg->flags = 0;
-	msg->buf = value;
-	/* over write the first byte of buffer with the register address */
-	*value = twl4030_map[mod_no].base + reg;
-	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
-	mutex_unlock(&twl->xfer_lock);
-
-	/* i2c_transfer returns number of messages transferred */
-	if (ret != 1) {
-		pr_err("%s: i2c_write failed to transfer all messages\n",
-			DRIVER_NAME);
-		if (ret < 0)
-			return ret;
-		else
-			return -EIO;
-	} else {
-		return 0;
-	}
-}
-EXPORT_SYMBOL(twl4030_i2c_write);
-
-/**
- * twl4030_i2c_read - Reads a n bit register in TWL4030
- * @mod_no: module number
- * @value: an array of num_bytes containing data to be read
- * @reg: register address (just offset will do)
- * @num_bytes: number of bytes to transfer
- *
- * Returns result of operation - num_bytes is success else failure.
- */
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
-{
-	int ret;
-	u8 val;
-	int sid;
-	struct twl4030_client *twl;
-	struct i2c_msg *msg;
-
-	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
-		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
-		return -EPERM;
-	}
-	sid = twl4030_map[mod_no].sid;
-	twl = &twl4030_modules[sid];
-
-	if (unlikely(!inuse)) {
-		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
-		return -EPERM;
-	}
-	mutex_lock(&twl->xfer_lock);
-	/* [MSG1] fill the register address data */
-	msg = &twl->xfer_msg[0];
-	msg->addr = twl->address;
-	msg->len = 1;
-	msg->flags = 0;	/* Read the register value */
-	val = twl4030_map[mod_no].base + reg;
-	msg->buf = &val;
-	/* [MSG2] fill the data rx buffer */
-	msg = &twl->xfer_msg[1];
-	msg->addr = twl->address;
-	msg->flags = I2C_M_RD;	/* Read the register value */
-	msg->len = num_bytes;	/* only n bytes */
-	msg->buf = value;
-	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 2);
-	mutex_unlock(&twl->xfer_lock);
-
-	/* i2c_transfer returns number of messages transferred */
-	if (ret != 2) {
-		pr_err("%s: i2c_read failed to transfer all messages\n",
-			DRIVER_NAME);
-		if (ret < 0)
-			return ret;
-		else
-			return -EIO;
-	} else {
-		return 0;
-	}
-}
-EXPORT_SYMBOL(twl4030_i2c_read);
-
-/**
- * twl4030_i2c_write_u8 - Writes a 8 bit register in TWL4030
- * @mod_no: module number
- * @value: the value to be written 8 bit
- * @reg: register address (just offset will do)
- *
- * Returns result of operation - 0 is success
- */
-int twl4030_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
-{
-
-	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
-	u8 temp_buffer[2] = { 0 };
-	/* offset 1 contains the data */
-	temp_buffer[1] = value;
-	return twl4030_i2c_write(mod_no, temp_buffer, reg, 1);
-}
-EXPORT_SYMBOL(twl4030_i2c_write_u8);
-
-/**
- * twl4030_i2c_read_u8 - Reads a 8 bit register from TWL4030
- * @mod_no: module number
- * @value: the value read 8 bit
- * @reg: register address (just offset will do)
- *
- * Returns result of operation - 0 is success
- */
-int twl4030_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
-{
-	return twl4030_i2c_read(mod_no, value, reg, 1);
-}
-EXPORT_SYMBOL(twl4030_i2c_read_u8);
-
-/*----------------------------------------------------------------------*/
-
-static struct device *
-add_numbered_child(unsigned chip, const char *name, int num,
-		void *pdata, unsigned pdata_len,
-		bool can_wakeup, int irq0, int irq1)
-{
-	struct platform_device	*pdev;
-	struct twl4030_client	*twl = &twl4030_modules[chip];
-	int			status;
-
-	pdev = platform_device_alloc(name, num);
-	if (!pdev) {
-		dev_dbg(&twl->client->dev, "can't alloc dev\n");
-		status = -ENOMEM;
-		goto err;
-	}
-
-	device_init_wakeup(&pdev->dev, can_wakeup);
-	pdev->dev.parent = &twl->client->dev;
-
-	if (pdata) {
-		status = platform_device_add_data(pdev, pdata, pdata_len);
-		if (status < 0) {
-			dev_dbg(&pdev->dev, "can't add platform_data\n");
-			goto err;
-		}
-	}
-
-	if (irq0) {
-		struct resource r[2] = {
-			{ .start = irq0, .flags = IORESOURCE_IRQ, },
-			{ .start = irq1, .flags = IORESOURCE_IRQ, },
-		};
-
-		status = platform_device_add_resources(pdev, r, irq1 ? 2 : 1);
-		if (status < 0) {
-			dev_dbg(&pdev->dev, "can't add irqs\n");
-			goto err;
-		}
-	}
-
-	status = platform_device_add(pdev);
-
-err:
-	if (status < 0) {
-		platform_device_put(pdev);
-		dev_err(&twl->client->dev, "can't add %s dev\n", name);
-		return ERR_PTR(status);
-	}
-	return &pdev->dev;
-}
-
-static inline struct device *add_child(unsigned chip, const char *name,
-		void *pdata, unsigned pdata_len,
-		bool can_wakeup, int irq0, int irq1)
-{
-	return add_numbered_child(chip, name, -1, pdata, pdata_len,
-		can_wakeup, irq0, irq1);
-}
-
-static struct device *
-add_regulator_linked(int num, struct regulator_init_data *pdata,
-		struct regulator_consumer_supply *consumers,
-		unsigned num_consumers)
-{
-	/* regulator framework demands init_data ... */
-	if (!pdata)
-		return NULL;
-
-	if (consumers) {
-		pdata->consumer_supplies = consumers;
-		pdata->num_consumer_supplies = num_consumers;
-	}
-
-	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
-	return add_numbered_child(3, "twl4030_reg", num,
-		pdata, sizeof(*pdata), false, 0, 0);
-}
-
-static struct device *
-add_regulator(int num, struct regulator_init_data *pdata)
-{
-	return add_regulator_linked(num, pdata, NULL, 0);
-}
-
-/*
- * NOTE:  We know the first 8 IRQs after pdata->base_irq are
- * for the PIH, and the next are for the PWR_INT SIH, since
- * that's how twl_init_irq() sets things up.
- */
-
-static int
-add_children(struct twl4030_platform_data *pdata, unsigned long features)
-{
-	struct device	*child;
-
-	if (twl_has_bci() && pdata->bci &&
-	    !(features & (TPS_SUBSET | TWL5031))) {
-		child = add_child(3, "twl4030_bci",
-				pdata->bci, sizeof(*pdata->bci),
-				false,
-				/* irq0 = CHG_PRES, irq1 = BCI */
-				pdata->irq_base + 8 + 1, pdata->irq_base + 2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_gpio() && pdata->gpio) {
-		child = add_child(1, "twl4030_gpio",
-				pdata->gpio, sizeof(*pdata->gpio),
-				false, pdata->irq_base + 0, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_keypad() && pdata->keypad) {
-		child = add_child(2, "twl4030_keypad",
-				pdata->keypad, sizeof(*pdata->keypad),
-				true, pdata->irq_base + 1, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_madc() && pdata->madc) {
-		child = add_child(2, "twl4030_madc",
-				pdata->madc, sizeof(*pdata->madc),
-				true, pdata->irq_base + 3, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_rtc()) {
-		/*
-		 * REVISIT platform_data here currently might expose the
-		 * "msecure" line ... but for now we just expect board
-		 * setup to tell the chip "it's always ok to SET_TIME".
-		 * Eventually, Linux might become more aware of such
-		 * HW security concerns, and "least privilege".
-		 */
-		child = add_child(3, "twl4030_rtc",
-				NULL, 0,
-				true, pdata->irq_base + 8 + 3, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_usb() && pdata->usb) {
-
-		static struct regulator_consumer_supply usb1v5 = {
-			.supply =	"usb1v5",
-		};
-		static struct regulator_consumer_supply usb1v8 = {
-			.supply =	"usb1v8",
-		};
-		static struct regulator_consumer_supply usb3v1 = {
-			.supply =	"usb3v1",
-		};
-
-	/* First add the regulators so that they can be used by transceiver */
-		if (twl_has_regulator()) {
-			/* this is a template that gets copied */
-			struct regulator_init_data usb_fixed = {
-				.constraints.valid_modes_mask =
-					REGULATOR_MODE_NORMAL
-					| REGULATOR_MODE_STANDBY,
-				.constraints.valid_ops_mask =
-					REGULATOR_CHANGE_MODE
-					| REGULATOR_CHANGE_STATUS,
-			};
-
-			child = add_regulator_linked(TWL4030_REG_VUSB1V5,
-						      &usb_fixed, &usb1v5, 1);
-			if (IS_ERR(child))
-				return PTR_ERR(child);
-
-			child = add_regulator_linked(TWL4030_REG_VUSB1V8,
-						      &usb_fixed, &usb1v8, 1);
-			if (IS_ERR(child))
-				return PTR_ERR(child);
-
-			child = add_regulator_linked(TWL4030_REG_VUSB3V1,
-						      &usb_fixed, &usb3v1, 1);
-			if (IS_ERR(child))
-				return PTR_ERR(child);
-
-		}
-
-		child = add_child(0, "twl4030_usb",
-				pdata->usb, sizeof(*pdata->usb),
-				true,
-				/* irq0 = USB_PRES, irq1 = USB */
-				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
-
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		/* we need to connect regulators to this transceiver */
-		if (twl_has_regulator() && child) {
-			usb1v5.dev = child;
-			usb1v8.dev = child;
-			usb3v1.dev = child;
-		}
-	}
-
-	if (twl_has_watchdog()) {
-		child = add_child(0, "twl4030_wdt", NULL, 0, false, 0, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_pwrbutton()) {
-		child = add_child(1, "twl4030_pwrbutton",
-				NULL, 0, true, pdata->irq_base + 8 + 0, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_codec() && pdata->codec) {
-		child = add_child(1, "twl4030_codec",
-				pdata->codec, sizeof(*pdata->codec),
-				false, 0, 0);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	if (twl_has_regulator()) {
-		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VIO, pdata->vio);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VDD1, pdata->vdd1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VDD2, pdata->vdd2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator((features & TWL4030_VAUX2)
-					? TWL4030_REG_VAUX2_4030
-					: TWL4030_REG_VAUX2,
-				pdata->vaux2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VINTANA1, pdata->vintana1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VINTANA2, pdata->vintana2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VINTDIG, pdata->vintdig);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	/* maybe add LDOs that are omitted on cost-reduced parts */
-	if (twl_has_regulator() && !(features & TPS_SUBSET)) {
-		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
-		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
-	return 0;
-}
-
-/*----------------------------------------------------------------------*/
-
-/*
- * These three functions initialize the on-chip clock framework,
- * letting it generate the right frequencies for USB, MADC, and
- * other purposes.
- */
-static inline int __init protect_pm_master(void)
-{
-	int e = 0;
-
-	e = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_LOCK,
-			R_PROTECT_KEY);
-	return e;
-}
-
-static inline int __init unprotect_pm_master(void)
-{
-	int e = 0;
-
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK1,
-			R_PROTECT_KEY);
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK2,
-			R_PROTECT_KEY);
-	return e;
-}
-
-static void clocks_init(struct device *dev,
-			struct twl4030_clock_init_data *clock)
-{
-	int e = 0;
-	struct clk *osc;
-	u32 rate;
-	u8 ctrl = HFCLK_FREQ_26_MHZ;
-
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
-	if (cpu_is_omap2430())
-		osc = clk_get(dev, "osc_ck");
-	else
-		osc = clk_get(dev, "osc_sys_ck");
-
-	if (IS_ERR(osc)) {
-		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
-				"using bootloader value (unknown osc rate)\n");
-		return;
-	}
-
-	rate = clk_get_rate(osc);
-	clk_put(osc);
-
-#else
-	/* REVISIT for non-OMAP systems, pass the clock rate from
-	 * board init code, using platform_data.
-	 */
-	osc = ERR_PTR(-EIO);
-
-	printk(KERN_WARNING "Skipping twl4030 internal clock init and "
-	       "using bootloader value (unknown osc rate)\n");
-
-	return;
-#endif
-
-	switch (rate) {
-	case 19200000:
-		ctrl = HFCLK_FREQ_19p2_MHZ;
-		break;
-	case 26000000:
-		ctrl = HFCLK_FREQ_26_MHZ;
-		break;
-	case 38400000:
-		ctrl = HFCLK_FREQ_38p4_MHZ;
-		break;
-	}
-
-	ctrl |= HIGH_PERF_SQ;
-	if (clock && clock->ck32k_lowpwr_enable)
-		ctrl |= CK32K_LOWPWR_EN;
-
-	e |= unprotect_pm_master();
-	/* effect->MADC+USB ck en */
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
-	e |= protect_pm_master();
-
-	if (e < 0)
-		pr_err("%s: clock init err [%d]\n", DRIVER_NAME, e);
-}
-
-/*----------------------------------------------------------------------*/
-
-int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
-int twl_exit_irq(void);
-int twl_init_chip_irq(const char *chip);
-
-static int twl4030_remove(struct i2c_client *client)
-{
-	unsigned i;
-	int status;
-
-	status = twl_exit_irq();
-	if (status < 0)
-		return status;
-
-	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
-		struct twl4030_client	*twl = &twl4030_modules[i];
-
-		if (twl->client && twl->client != client)
-			i2c_unregister_device(twl->client);
-		twl4030_modules[i].client = NULL;
-	}
-	inuse = false;
-	return 0;
-}
-
-/* NOTE:  this driver only handles a single twl4030/tps659x0 chip */
-static int __init
-twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
-{
-	int				status;
-	unsigned			i;
-	struct twl4030_platform_data	*pdata = client->dev.platform_data;
-
-	if (!pdata) {
-		dev_dbg(&client->dev, "no platform data?\n");
-		return -EINVAL;
-	}
-
-	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) {
-		dev_dbg(&client->dev, "can't talk I2C?\n");
-		return -EIO;
-	}
-
-	if (inuse) {
-		dev_dbg(&client->dev, "driver is already in use\n");
-		return -EBUSY;
-	}
-
-	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
-		struct twl4030_client	*twl = &twl4030_modules[i];
-
-		twl->address = client->addr + i;
-		if (i == 0)
-			twl->client = client;
-		else {
-			twl->client = i2c_new_dummy(client->adapter,
-					twl->address);
-			if (!twl->client) {
-				dev_err(&client->dev,
-					"can't attach client %d\n", i);
-				status = -ENOMEM;
-				goto fail;
-			}
-		}
-		mutex_init(&twl->xfer_lock);
-	}
-	inuse = true;
-
-	/* setup clock framework */
-	clocks_init(&client->dev, pdata->clock);
-
-	/* load power event scripts */
-	if (twl_has_power() && pdata->power)
-		twl4030_power_init(pdata->power);
-
-	/* Maybe init the T2 Interrupt subsystem */
-	if (client->irq
-			&& pdata->irq_base
-			&& pdata->irq_end > pdata->irq_base) {
-		twl_init_chip_irq(id->name);
-		status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
-		if (status < 0)
-			goto fail;
-	}
-
-	status = add_children(pdata, id->driver_data);
-fail:
-	if (status < 0)
-		twl4030_remove(client);
-	return status;
-}
-
-static const struct i2c_device_id twl4030_ids[] = {
-	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
-	{ "twl5030", 0 },		/* T2 updated */
-	{ "twl5031", TWL5031 },		/* TWL5030 updated */
-	{ "tps65950", 0 },		/* catalog version of twl5030 */
-	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
-	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
-	{ /* end of list */ },
-};
-MODULE_DEVICE_TABLE(i2c, twl4030_ids);
-
-/* One Client Driver , 4 Clients */
-static struct i2c_driver twl4030_driver = {
-	.driver.name	= DRIVER_NAME,
-	.id_table	= twl4030_ids,
-	.probe		= twl4030_probe,
-	.remove		= twl4030_remove,
-};
-
-static int __init twl4030_init(void)
-{
-	return i2c_add_driver(&twl4030_driver);
-}
-subsys_initcall(twl4030_init);
-
-static void __exit twl4030_exit(void)
-{
-	i2c_del_driver(&twl4030_driver);
-}
-module_exit(twl4030_exit);
-
-MODULE_AUTHOR("Texas Instruments, Inc.");
-MODULE_DESCRIPTION("I2C Core interface for TWL4030");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 3f7e93ca0514..c4528db549c6 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -32,7 +32,7 @@
 #include <linux/irq.h>
 #include <linux/kthread.h>
 
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 
 /*
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 3048f18e0419..424b255d6f92 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -26,7 +26,7 @@
 
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/platform_device.h>
 
 #include <asm/mach-types.h>
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 4257a8683778..9ae3cc44e668 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
-obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
+obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
new file mode 100644
index 000000000000..c8a6e583d773
--- /dev/null
+++ b/drivers/regulator/twl-regulator.c
@@ -0,0 +1,500 @@
+/*
+ * twl4030-regulator.c -- support regulators in twl4030 family chips
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/i2c/twl.h>
+
+
+/*
+ * The TWL4030/TW5030/TPS659x0 family chips include power management, a
+ * USB OTG transceiver, an RTC, ADC, PWM, and lots more.  Some versions
+ * include an audio codec, battery charger, and more voltage regulators.
+ * These chips are often used in OMAP-based systems.
+ *
+ * This driver implements software-based resource control for various
+ * voltage regulators.  This is usually augmented with state machine
+ * based control.
+ */
+
+struct twlreg_info {
+	/* start of regulator's PM_RECEIVER control register bank */
+	u8			base;
+
+	/* twl4030 resource ID, for resource control state machine */
+	u8			id;
+
+	/* voltage in mV = table[VSEL]; table_len must be a power-of-two */
+	u8			table_len;
+	const u16		*table;
+
+	/* chip constraints on regulator behavior */
+	u16			min_mV;
+
+	/* used by regulator core */
+	struct regulator_desc	desc;
+};
+
+
+/* LDO control registers ... offset is from the base of its register bank.
+ * The first three registers of all power resource banks help hardware to
+ * manage the various resource groups.
+ */
+#define VREG_GRP		0
+#define VREG_TYPE		1
+#define VREG_REMAP		2
+#define VREG_DEDICATED		3	/* LDO control */
+
+
+static inline int
+twl4030reg_read(struct twlreg_info *info, unsigned offset)
+{
+	u8 value;
+	int status;
+
+	status = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER,
+			&value, info->base + offset);
+	return (status < 0) ? status : value;
+}
+
+static inline int
+twl4030reg_write(struct twlreg_info *info, unsigned offset, u8 value)
+{
+	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+			value, info->base + offset);
+}
+
+/*----------------------------------------------------------------------*/
+
+/* generic power resource operations, which work on all regulators */
+
+static int twl4030reg_grp(struct regulator_dev *rdev)
+{
+	return twl4030reg_read(rdev_get_drvdata(rdev), VREG_GRP);
+}
+
+/*
+ * Enable/disable regulators by joining/leaving the P1 (processor) group.
+ * We assume nobody else is updating the DEV_GRP registers.
+ */
+
+#define P3_GRP		BIT(7)		/* "peripherals" */
+#define P2_GRP		BIT(6)		/* secondary processor, modem, etc */
+#define P1_GRP		BIT(5)		/* CPU/Linux */
+
+static int twl4030reg_is_enabled(struct regulator_dev *rdev)
+{
+	int	state = twl4030reg_grp(rdev);
+
+	if (state < 0)
+		return state;
+
+	return (state & P1_GRP) != 0;
+}
+
+static int twl4030reg_enable(struct regulator_dev *rdev)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	int			grp;
+
+	grp = twl4030reg_read(info, VREG_GRP);
+	if (grp < 0)
+		return grp;
+
+	grp |= P1_GRP;
+	return twl4030reg_write(info, VREG_GRP, grp);
+}
+
+static int twl4030reg_disable(struct regulator_dev *rdev)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	int			grp;
+
+	grp = twl4030reg_read(info, VREG_GRP);
+	if (grp < 0)
+		return grp;
+
+	grp &= ~P1_GRP;
+	return twl4030reg_write(info, VREG_GRP, grp);
+}
+
+static int twl4030reg_get_status(struct regulator_dev *rdev)
+{
+	int	state = twl4030reg_grp(rdev);
+
+	if (state < 0)
+		return state;
+	state &= 0x0f;
+
+	/* assume state != WARM_RESET; we'd not be running...  */
+	if (!state)
+		return REGULATOR_STATUS_OFF;
+	return (state & BIT(3))
+		? REGULATOR_STATUS_NORMAL
+		: REGULATOR_STATUS_STANDBY;
+}
+
+static int twl4030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	unsigned		message;
+	int			status;
+
+	/* We can only set the mode through state machine commands... */
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_ACTIVE);
+		break;
+	case REGULATOR_MODE_STANDBY:
+		message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_SLEEP);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Ensure the resource is associated with some group */
+	status = twl4030reg_grp(rdev);
+	if (status < 0)
+		return status;
+	if (!(status & (P3_GRP | P2_GRP | P1_GRP)))
+		return -EACCES;
+
+	status = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+			message >> 8, 0x15 /* PB_WORD_MSB */ );
+	if (status >= 0)
+		return status;
+
+	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+			message, 0x16 /* PB_WORD_LSB */ );
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Support for adjustable-voltage LDOs uses a four bit (or less) voltage
+ * select field in its control register.   We use tables indexed by VSEL
+ * to record voltages in milliVolts.  (Accuracy is about three percent.)
+ *
+ * Note that VSEL values for VAUX2 changed in twl5030 and newer silicon;
+ * currently handled by listing two slightly different VAUX2 regulators,
+ * only one of which will be configured.
+ *
+ * VSEL values documented as "TI cannot support these values" are flagged
+ * in these tables as UNSUP() values; we normally won't assign them.
+ *
+ * VAUX3 at 3V is incorrectly listed in some TI manuals as unsupported.
+ * TI are revising the twl5030/tps659x0 specs to support that 3.0V setting.
+ */
+#ifdef CONFIG_TWL4030_ALLOW_UNSUPPORTED
+#define UNSUP_MASK	0x0000
+#else
+#define UNSUP_MASK	0x8000
+#endif
+
+#define UNSUP(x)	(UNSUP_MASK | (x))
+#define IS_UNSUP(x)	(UNSUP_MASK & (x))
+#define LDO_MV(x)	(~UNSUP_MASK & (x))
+
+
+static const u16 VAUX1_VSEL_table[] = {
+	UNSUP(1500), UNSUP(1800), 2500, 2800,
+	3000, 3000, 3000, 3000,
+};
+static const u16 VAUX2_4030_VSEL_table[] = {
+	UNSUP(1000), UNSUP(1000), UNSUP(1200), 1300,
+	1500, 1800, UNSUP(1850), 2500,
+	UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
+	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VAUX2_VSEL_table[] = {
+	1700, 1700, 1900, 1300,
+	1500, 1800, 2000, 2500,
+	2100, 2800, 2200, 2300,
+	2400, 2400, 2400, 2400,
+};
+static const u16 VAUX3_VSEL_table[] = {
+	1500, 1800, 2500, 2800,
+	3000, 3000, 3000, 3000,
+};
+static const u16 VAUX4_VSEL_table[] = {
+	700, 1000, 1200, UNSUP(1300),
+	1500, 1800, UNSUP(1850), 2500,
+	UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
+	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VMMC1_VSEL_table[] = {
+	1850, 2850, 3000, 3150,
+};
+static const u16 VMMC2_VSEL_table[] = {
+	UNSUP(1000), UNSUP(1000), UNSUP(1200), UNSUP(1300),
+	UNSUP(1500), UNSUP(1800), 1850, UNSUP(2500),
+	2600, 2800, 2850, 3000,
+	3150, 3150, 3150, 3150,
+};
+static const u16 VPLL1_VSEL_table[] = {
+	1000, 1200, 1300, 1800,
+	UNSUP(2800), UNSUP(3000), UNSUP(3000), UNSUP(3000),
+};
+static const u16 VPLL2_VSEL_table[] = {
+	700, 1000, 1200, 1300,
+	UNSUP(1500), 1800, UNSUP(1850), UNSUP(2500),
+	UNSUP(2600), UNSUP(2800), UNSUP(2850), UNSUP(3000),
+	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VSIM_VSEL_table[] = {
+	UNSUP(1000), UNSUP(1200), UNSUP(1300), 1800,
+	2800, 3000, 3000, 3000,
+};
+static const u16 VDAC_VSEL_table[] = {
+	1200, 1300, 1800, 1800,
+};
+
+
+static int twl4030ldo_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	int			mV = info->table[index];
+
+	return IS_UNSUP(mV) ? 0 : (LDO_MV(mV) * 1000);
+}
+
+static int
+twl4030ldo_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	int			vsel;
+
+	for (vsel = 0; vsel < info->table_len; vsel++) {
+		int mV = info->table[vsel];
+		int uV;
+
+		if (IS_UNSUP(mV))
+			continue;
+		uV = LDO_MV(mV) * 1000;
+
+		/* REVISIT for VAUX2, first match may not be best/lowest */
+
+		/* use the first in-range value */
+		if (min_uV <= uV && uV <= max_uV)
+			return twl4030reg_write(info, VREG_DEDICATED, vsel);
+	}
+
+	return -EDOM;
+}
+
+static int twl4030ldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+	int			vsel = twl4030reg_read(info, VREG_DEDICATED);
+
+	if (vsel < 0)
+		return vsel;
+
+	vsel &= info->table_len - 1;
+	return LDO_MV(info->table[vsel]) * 1000;
+}
+
+static struct regulator_ops twl4030ldo_ops = {
+	.list_voltage	= twl4030ldo_list_voltage,
+
+	.set_voltage	= twl4030ldo_set_voltage,
+	.get_voltage	= twl4030ldo_get_voltage,
+
+	.enable		= twl4030reg_enable,
+	.disable	= twl4030reg_disable,
+	.is_enabled	= twl4030reg_is_enabled,
+
+	.set_mode	= twl4030reg_set_mode,
+
+	.get_status	= twl4030reg_get_status,
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Fixed voltage LDOs don't have a VSEL field to update.
+ */
+static int twl4030fixed_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+
+	return info->min_mV * 1000;
+}
+
+static int twl4030fixed_get_voltage(struct regulator_dev *rdev)
+{
+	struct twlreg_info	*info = rdev_get_drvdata(rdev);
+
+	return info->min_mV * 1000;
+}
+
+static struct regulator_ops twl4030fixed_ops = {
+	.list_voltage	= twl4030fixed_list_voltage,
+
+	.get_voltage	= twl4030fixed_get_voltage,
+
+	.enable		= twl4030reg_enable,
+	.disable	= twl4030reg_disable,
+	.is_enabled	= twl4030reg_is_enabled,
+
+	.set_mode	= twl4030reg_set_mode,
+
+	.get_status	= twl4030reg_get_status,
+};
+
+/*----------------------------------------------------------------------*/
+
+#define TWL_ADJUSTABLE_LDO(label, offset, num) { \
+	.base = offset, \
+	.id = num, \
+	.table_len = ARRAY_SIZE(label##_VSEL_table), \
+	.table = label##_VSEL_table, \
+	.desc = { \
+		.name = #label, \
+		.id = TWL4030_REG_##label, \
+		.n_voltages = ARRAY_SIZE(label##_VSEL_table), \
+		.ops = &twl4030ldo_ops, \
+		.type = REGULATOR_VOLTAGE, \
+		.owner = THIS_MODULE, \
+		}, \
+	}
+
+#define TWL_FIXED_LDO(label, offset, mVolts, num) { \
+	.base = offset, \
+	.id = num, \
+	.min_mV = mVolts, \
+	.desc = { \
+		.name = #label, \
+		.id = TWL4030_REG_##label, \
+		.n_voltages = 1, \
+		.ops = &twl4030fixed_ops, \
+		.type = REGULATOR_VOLTAGE, \
+		.owner = THIS_MODULE, \
+		}, \
+	}
+
+/*
+ * We list regulators here if systems need some level of
+ * software control over them after boot.
+ */
+static struct twlreg_info twl4030_regs[] = {
+	TWL_ADJUSTABLE_LDO(VAUX1, 0x17, 1),
+	TWL_ADJUSTABLE_LDO(VAUX2_4030, 0x1b, 2),
+	TWL_ADJUSTABLE_LDO(VAUX2, 0x1b, 2),
+	TWL_ADJUSTABLE_LDO(VAUX3, 0x1f, 3),
+	TWL_ADJUSTABLE_LDO(VAUX4, 0x23, 4),
+	TWL_ADJUSTABLE_LDO(VMMC1, 0x27, 5),
+	TWL_ADJUSTABLE_LDO(VMMC2, 0x2b, 6),
+	/*
+	TWL_ADJUSTABLE_LDO(VPLL1, 0x2f, 7),
+	*/
+	TWL_ADJUSTABLE_LDO(VPLL2, 0x33, 8),
+	TWL_ADJUSTABLE_LDO(VSIM, 0x37, 9),
+	TWL_ADJUSTABLE_LDO(VDAC, 0x3b, 10),
+	/*
+	TWL_ADJUSTABLE_LDO(VINTANA1, 0x3f, 11),
+	TWL_ADJUSTABLE_LDO(VINTANA2, 0x43, 12),
+	TWL_ADJUSTABLE_LDO(VINTDIG, 0x47, 13),
+	TWL_SMPS(VIO, 0x4b, 14),
+	TWL_SMPS(VDD1, 0x55, 15),
+	TWL_SMPS(VDD2, 0x63, 16),
+	 */
+	TWL_FIXED_LDO(VUSB1V5, 0x71, 1500, 17),
+	TWL_FIXED_LDO(VUSB1V8, 0x74, 1800, 18),
+	TWL_FIXED_LDO(VUSB3V1, 0x77, 3100, 19),
+	/* VUSBCP is managed *only* by the USB subchip */
+};
+
+static int twl4030reg_probe(struct platform_device *pdev)
+{
+	int				i;
+	struct twlreg_info		*info;
+	struct regulator_init_data	*initdata;
+	struct regulation_constraints	*c;
+	struct regulator_dev		*rdev;
+
+	for (i = 0, info = NULL; i < ARRAY_SIZE(twl4030_regs); i++) {
+		if (twl4030_regs[i].desc.id != pdev->id)
+			continue;
+		info = twl4030_regs + i;
+		break;
+	}
+	if (!info)
+		return -ENODEV;
+
+	initdata = pdev->dev.platform_data;
+	if (!initdata)
+		return -EINVAL;
+
+	/* Constrain board-specific capabilities according to what
+	 * this driver and the chip itself can actually do.
+	 */
+	c = &initdata->constraints;
+	c->valid_modes_mask &= REGULATOR_MODE_NORMAL | REGULATOR_MODE_STANDBY;
+	c->valid_ops_mask &= REGULATOR_CHANGE_VOLTAGE
+				| REGULATOR_CHANGE_MODE
+				| REGULATOR_CHANGE_STATUS;
+
+	rdev = regulator_register(&info->desc, &pdev->dev, initdata, info);
+	if (IS_ERR(rdev)) {
+		dev_err(&pdev->dev, "can't register %s, %ld\n",
+				info->desc.name, PTR_ERR(rdev));
+		return PTR_ERR(rdev);
+	}
+	platform_set_drvdata(pdev, rdev);
+
+	/* NOTE:  many regulators support short-circuit IRQs (presentable
+	 * as REGULATOR_OVER_CURRENT notifications?) configured via:
+	 *  - SC_CONFIG
+	 *  - SC_DETECT1 (vintana2, vmmc1/2, vaux1/2/3/4)
+	 *  - SC_DETECT2 (vusb, vdac, vio, vdd1/2, vpll2)
+	 *  - IT_CONFIG
+	 */
+
+	return 0;
+}
+
+static int __devexit twl4030reg_remove(struct platform_device *pdev)
+{
+	regulator_unregister(platform_get_drvdata(pdev));
+	return 0;
+}
+
+MODULE_ALIAS("platform:twl4030_reg");
+
+static struct platform_driver twl4030reg_driver = {
+	.probe		= twl4030reg_probe,
+	.remove		= __devexit_p(twl4030reg_remove),
+	/* NOTE: short name, to work around driver model truncation of
+	 * "twl4030_regulator.12" (and friends) to "twl4030_regulator.1".
+	 */
+	.driver.name	= "twl4030_reg",
+	.driver.owner	= THIS_MODULE,
+};
+
+static int __init twl4030reg_init(void)
+{
+	return platform_driver_register(&twl4030reg_driver);
+}
+subsys_initcall(twl4030reg_init);
+
+static void __exit twl4030reg_exit(void)
+{
+	platform_driver_unregister(&twl4030reg_driver);
+}
+module_exit(twl4030reg_exit)
+
+MODULE_DESCRIPTION("TWL4030 regulator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/twl4030-regulator.c b/drivers/regulator/twl4030-regulator.c
deleted file mode 100644
index e2032fb60b55..000000000000
--- a/drivers/regulator/twl4030-regulator.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * twl4030-regulator.c -- support regulators in twl4030 family chips
- *
- * Copyright (C) 2008 David Brownell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/driver.h>
-#include <linux/regulator/machine.h>
-#include <linux/i2c/twl4030.h>
-
-
-/*
- * The TWL4030/TW5030/TPS659x0 family chips include power management, a
- * USB OTG transceiver, an RTC, ADC, PWM, and lots more.  Some versions
- * include an audio codec, battery charger, and more voltage regulators.
- * These chips are often used in OMAP-based systems.
- *
- * This driver implements software-based resource control for various
- * voltage regulators.  This is usually augmented with state machine
- * based control.
- */
-
-struct twlreg_info {
-	/* start of regulator's PM_RECEIVER control register bank */
-	u8			base;
-
-	/* twl4030 resource ID, for resource control state machine */
-	u8			id;
-
-	/* voltage in mV = table[VSEL]; table_len must be a power-of-two */
-	u8			table_len;
-	const u16		*table;
-
-	/* chip constraints on regulator behavior */
-	u16			min_mV;
-
-	/* used by regulator core */
-	struct regulator_desc	desc;
-};
-
-
-/* LDO control registers ... offset is from the base of its register bank.
- * The first three registers of all power resource banks help hardware to
- * manage the various resource groups.
- */
-#define VREG_GRP		0
-#define VREG_TYPE		1
-#define VREG_REMAP		2
-#define VREG_DEDICATED		3	/* LDO control */
-
-
-static inline int
-twl4030reg_read(struct twlreg_info *info, unsigned offset)
-{
-	u8 value;
-	int status;
-
-	status = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER,
-			&value, info->base + offset);
-	return (status < 0) ? status : value;
-}
-
-static inline int
-twl4030reg_write(struct twlreg_info *info, unsigned offset, u8 value)
-{
-	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
-			value, info->base + offset);
-}
-
-/*----------------------------------------------------------------------*/
-
-/* generic power resource operations, which work on all regulators */
-
-static int twl4030reg_grp(struct regulator_dev *rdev)
-{
-	return twl4030reg_read(rdev_get_drvdata(rdev), VREG_GRP);
-}
-
-/*
- * Enable/disable regulators by joining/leaving the P1 (processor) group.
- * We assume nobody else is updating the DEV_GRP registers.
- */
-
-#define P3_GRP		BIT(7)		/* "peripherals" */
-#define P2_GRP		BIT(6)		/* secondary processor, modem, etc */
-#define P1_GRP		BIT(5)		/* CPU/Linux */
-
-static int twl4030reg_is_enabled(struct regulator_dev *rdev)
-{
-	int	state = twl4030reg_grp(rdev);
-
-	if (state < 0)
-		return state;
-
-	return (state & P1_GRP) != 0;
-}
-
-static int twl4030reg_enable(struct regulator_dev *rdev)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			grp;
-
-	grp = twl4030reg_read(info, VREG_GRP);
-	if (grp < 0)
-		return grp;
-
-	grp |= P1_GRP;
-	return twl4030reg_write(info, VREG_GRP, grp);
-}
-
-static int twl4030reg_disable(struct regulator_dev *rdev)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			grp;
-
-	grp = twl4030reg_read(info, VREG_GRP);
-	if (grp < 0)
-		return grp;
-
-	grp &= ~P1_GRP;
-	return twl4030reg_write(info, VREG_GRP, grp);
-}
-
-static int twl4030reg_get_status(struct regulator_dev *rdev)
-{
-	int	state = twl4030reg_grp(rdev);
-
-	if (state < 0)
-		return state;
-	state &= 0x0f;
-
-	/* assume state != WARM_RESET; we'd not be running...  */
-	if (!state)
-		return REGULATOR_STATUS_OFF;
-	return (state & BIT(3))
-		? REGULATOR_STATUS_NORMAL
-		: REGULATOR_STATUS_STANDBY;
-}
-
-static int twl4030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	unsigned		message;
-	int			status;
-
-	/* We can only set the mode through state machine commands... */
-	switch (mode) {
-	case REGULATOR_MODE_NORMAL:
-		message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_ACTIVE);
-		break;
-	case REGULATOR_MODE_STANDBY:
-		message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_SLEEP);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* Ensure the resource is associated with some group */
-	status = twl4030reg_grp(rdev);
-	if (status < 0)
-		return status;
-	if (!(status & (P3_GRP | P2_GRP | P1_GRP)))
-		return -EACCES;
-
-	status = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			message >> 8, 0x15 /* PB_WORD_MSB */ );
-	if (status >= 0)
-		return status;
-
-	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
-			message, 0x16 /* PB_WORD_LSB */ );
-}
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Support for adjustable-voltage LDOs uses a four bit (or less) voltage
- * select field in its control register.   We use tables indexed by VSEL
- * to record voltages in milliVolts.  (Accuracy is about three percent.)
- *
- * Note that VSEL values for VAUX2 changed in twl5030 and newer silicon;
- * currently handled by listing two slightly different VAUX2 regulators,
- * only one of which will be configured.
- *
- * VSEL values documented as "TI cannot support these values" are flagged
- * in these tables as UNSUP() values; we normally won't assign them.
- *
- * VAUX3 at 3V is incorrectly listed in some TI manuals as unsupported.
- * TI are revising the twl5030/tps659x0 specs to support that 3.0V setting.
- */
-#ifdef CONFIG_TWL4030_ALLOW_UNSUPPORTED
-#define UNSUP_MASK	0x0000
-#else
-#define UNSUP_MASK	0x8000
-#endif
-
-#define UNSUP(x)	(UNSUP_MASK | (x))
-#define IS_UNSUP(x)	(UNSUP_MASK & (x))
-#define LDO_MV(x)	(~UNSUP_MASK & (x))
-
-
-static const u16 VAUX1_VSEL_table[] = {
-	UNSUP(1500), UNSUP(1800), 2500, 2800,
-	3000, 3000, 3000, 3000,
-};
-static const u16 VAUX2_4030_VSEL_table[] = {
-	UNSUP(1000), UNSUP(1000), UNSUP(1200), 1300,
-	1500, 1800, UNSUP(1850), 2500,
-	UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
-	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
-};
-static const u16 VAUX2_VSEL_table[] = {
-	1700, 1700, 1900, 1300,
-	1500, 1800, 2000, 2500,
-	2100, 2800, 2200, 2300,
-	2400, 2400, 2400, 2400,
-};
-static const u16 VAUX3_VSEL_table[] = {
-	1500, 1800, 2500, 2800,
-	3000, 3000, 3000, 3000,
-};
-static const u16 VAUX4_VSEL_table[] = {
-	700, 1000, 1200, UNSUP(1300),
-	1500, 1800, UNSUP(1850), 2500,
-	UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
-	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
-};
-static const u16 VMMC1_VSEL_table[] = {
-	1850, 2850, 3000, 3150,
-};
-static const u16 VMMC2_VSEL_table[] = {
-	UNSUP(1000), UNSUP(1000), UNSUP(1200), UNSUP(1300),
-	UNSUP(1500), UNSUP(1800), 1850, UNSUP(2500),
-	2600, 2800, 2850, 3000,
-	3150, 3150, 3150, 3150,
-};
-static const u16 VPLL1_VSEL_table[] = {
-	1000, 1200, 1300, 1800,
-	UNSUP(2800), UNSUP(3000), UNSUP(3000), UNSUP(3000),
-};
-static const u16 VPLL2_VSEL_table[] = {
-	700, 1000, 1200, 1300,
-	UNSUP(1500), 1800, UNSUP(1850), UNSUP(2500),
-	UNSUP(2600), UNSUP(2800), UNSUP(2850), UNSUP(3000),
-	UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
-};
-static const u16 VSIM_VSEL_table[] = {
-	UNSUP(1000), UNSUP(1200), UNSUP(1300), 1800,
-	2800, 3000, 3000, 3000,
-};
-static const u16 VDAC_VSEL_table[] = {
-	1200, 1300, 1800, 1800,
-};
-
-
-static int twl4030ldo_list_voltage(struct regulator_dev *rdev, unsigned index)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			mV = info->table[index];
-
-	return IS_UNSUP(mV) ? 0 : (LDO_MV(mV) * 1000);
-}
-
-static int
-twl4030ldo_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			vsel;
-
-	for (vsel = 0; vsel < info->table_len; vsel++) {
-		int mV = info->table[vsel];
-		int uV;
-
-		if (IS_UNSUP(mV))
-			continue;
-		uV = LDO_MV(mV) * 1000;
-
-		/* REVISIT for VAUX2, first match may not be best/lowest */
-
-		/* use the first in-range value */
-		if (min_uV <= uV && uV <= max_uV)
-			return twl4030reg_write(info, VREG_DEDICATED, vsel);
-	}
-
-	return -EDOM;
-}
-
-static int twl4030ldo_get_voltage(struct regulator_dev *rdev)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			vsel = twl4030reg_read(info, VREG_DEDICATED);
-
-	if (vsel < 0)
-		return vsel;
-
-	vsel &= info->table_len - 1;
-	return LDO_MV(info->table[vsel]) * 1000;
-}
-
-static struct regulator_ops twl4030ldo_ops = {
-	.list_voltage	= twl4030ldo_list_voltage,
-
-	.set_voltage	= twl4030ldo_set_voltage,
-	.get_voltage	= twl4030ldo_get_voltage,
-
-	.enable		= twl4030reg_enable,
-	.disable	= twl4030reg_disable,
-	.is_enabled	= twl4030reg_is_enabled,
-
-	.set_mode	= twl4030reg_set_mode,
-
-	.get_status	= twl4030reg_get_status,
-};
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Fixed voltage LDOs don't have a VSEL field to update.
- */
-static int twl4030fixed_list_voltage(struct regulator_dev *rdev, unsigned index)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-
-	return info->min_mV * 1000;
-}
-
-static int twl4030fixed_get_voltage(struct regulator_dev *rdev)
-{
-	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-
-	return info->min_mV * 1000;
-}
-
-static struct regulator_ops twl4030fixed_ops = {
-	.list_voltage	= twl4030fixed_list_voltage,
-
-	.get_voltage	= twl4030fixed_get_voltage,
-
-	.enable		= twl4030reg_enable,
-	.disable	= twl4030reg_disable,
-	.is_enabled	= twl4030reg_is_enabled,
-
-	.set_mode	= twl4030reg_set_mode,
-
-	.get_status	= twl4030reg_get_status,
-};
-
-/*----------------------------------------------------------------------*/
-
-#define TWL_ADJUSTABLE_LDO(label, offset, num) { \
-	.base = offset, \
-	.id = num, \
-	.table_len = ARRAY_SIZE(label##_VSEL_table), \
-	.table = label##_VSEL_table, \
-	.desc = { \
-		.name = #label, \
-		.id = TWL4030_REG_##label, \
-		.n_voltages = ARRAY_SIZE(label##_VSEL_table), \
-		.ops = &twl4030ldo_ops, \
-		.type = REGULATOR_VOLTAGE, \
-		.owner = THIS_MODULE, \
-		}, \
-	}
-
-#define TWL_FIXED_LDO(label, offset, mVolts, num) { \
-	.base = offset, \
-	.id = num, \
-	.min_mV = mVolts, \
-	.desc = { \
-		.name = #label, \
-		.id = TWL4030_REG_##label, \
-		.n_voltages = 1, \
-		.ops = &twl4030fixed_ops, \
-		.type = REGULATOR_VOLTAGE, \
-		.owner = THIS_MODULE, \
-		}, \
-	}
-
-/*
- * We list regulators here if systems need some level of
- * software control over them after boot.
- */
-static struct twlreg_info twl4030_regs[] = {
-	TWL_ADJUSTABLE_LDO(VAUX1, 0x17, 1),
-	TWL_ADJUSTABLE_LDO(VAUX2_4030, 0x1b, 2),
-	TWL_ADJUSTABLE_LDO(VAUX2, 0x1b, 2),
-	TWL_ADJUSTABLE_LDO(VAUX3, 0x1f, 3),
-	TWL_ADJUSTABLE_LDO(VAUX4, 0x23, 4),
-	TWL_ADJUSTABLE_LDO(VMMC1, 0x27, 5),
-	TWL_ADJUSTABLE_LDO(VMMC2, 0x2b, 6),
-	/*
-	TWL_ADJUSTABLE_LDO(VPLL1, 0x2f, 7),
-	*/
-	TWL_ADJUSTABLE_LDO(VPLL2, 0x33, 8),
-	TWL_ADJUSTABLE_LDO(VSIM, 0x37, 9),
-	TWL_ADJUSTABLE_LDO(VDAC, 0x3b, 10),
-	/*
-	TWL_ADJUSTABLE_LDO(VINTANA1, 0x3f, 11),
-	TWL_ADJUSTABLE_LDO(VINTANA2, 0x43, 12),
-	TWL_ADJUSTABLE_LDO(VINTDIG, 0x47, 13),
-	TWL_SMPS(VIO, 0x4b, 14),
-	TWL_SMPS(VDD1, 0x55, 15),
-	TWL_SMPS(VDD2, 0x63, 16),
-	 */
-	TWL_FIXED_LDO(VUSB1V5, 0x71, 1500, 17),
-	TWL_FIXED_LDO(VUSB1V8, 0x74, 1800, 18),
-	TWL_FIXED_LDO(VUSB3V1, 0x77, 3100, 19),
-	/* VUSBCP is managed *only* by the USB subchip */
-};
-
-static int twl4030reg_probe(struct platform_device *pdev)
-{
-	int				i;
-	struct twlreg_info		*info;
-	struct regulator_init_data	*initdata;
-	struct regulation_constraints	*c;
-	struct regulator_dev		*rdev;
-
-	for (i = 0, info = NULL; i < ARRAY_SIZE(twl4030_regs); i++) {
-		if (twl4030_regs[i].desc.id != pdev->id)
-			continue;
-		info = twl4030_regs + i;
-		break;
-	}
-	if (!info)
-		return -ENODEV;
-
-	initdata = pdev->dev.platform_data;
-	if (!initdata)
-		return -EINVAL;
-
-	/* Constrain board-specific capabilities according to what
-	 * this driver and the chip itself can actually do.
-	 */
-	c = &initdata->constraints;
-	c->valid_modes_mask &= REGULATOR_MODE_NORMAL | REGULATOR_MODE_STANDBY;
-	c->valid_ops_mask &= REGULATOR_CHANGE_VOLTAGE
-				| REGULATOR_CHANGE_MODE
-				| REGULATOR_CHANGE_STATUS;
-
-	rdev = regulator_register(&info->desc, &pdev->dev, initdata, info);
-	if (IS_ERR(rdev)) {
-		dev_err(&pdev->dev, "can't register %s, %ld\n",
-				info->desc.name, PTR_ERR(rdev));
-		return PTR_ERR(rdev);
-	}
-	platform_set_drvdata(pdev, rdev);
-
-	/* NOTE:  many regulators support short-circuit IRQs (presentable
-	 * as REGULATOR_OVER_CURRENT notifications?) configured via:
-	 *  - SC_CONFIG
-	 *  - SC_DETECT1 (vintana2, vmmc1/2, vaux1/2/3/4)
-	 *  - SC_DETECT2 (vusb, vdac, vio, vdd1/2, vpll2)
-	 *  - IT_CONFIG
-	 */
-
-	return 0;
-}
-
-static int __devexit twl4030reg_remove(struct platform_device *pdev)
-{
-	regulator_unregister(platform_get_drvdata(pdev));
-	return 0;
-}
-
-MODULE_ALIAS("platform:twl4030_reg");
-
-static struct platform_driver twl4030reg_driver = {
-	.probe		= twl4030reg_probe,
-	.remove		= __devexit_p(twl4030reg_remove),
-	/* NOTE: short name, to work around driver model truncation of
-	 * "twl4030_regulator.12" (and friends) to "twl4030_regulator.1".
-	 */
-	.driver.name	= "twl4030_reg",
-	.driver.owner	= THIS_MODULE,
-};
-
-static int __init twl4030reg_init(void)
-{
-	return platform_driver_register(&twl4030reg_driver);
-}
-subsys_initcall(twl4030reg_init);
-
-static void __exit twl4030reg_exit(void)
-{
-	platform_driver_unregister(&twl4030reg_driver);
-}
-module_exit(twl4030reg_exit)
-
-MODULE_DESCRIPTION("TWL4030 regulator driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index af1ba7ae2857..7da6efb3e953 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -80,7 +80,7 @@ obj-$(CONFIG_RTC_DRV_STK17TA8)	+= rtc-stk17ta8.o
 obj-$(CONFIG_RTC_DRV_STMP)	+= rtc-stmp3xxx.o
 obj-$(CONFIG_RTC_DRV_SUN4V)	+= rtc-sun4v.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
-obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl4030.o
+obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl.o
 obj-$(CONFIG_RTC_DRV_TX4939)	+= rtc-tx4939.o
 obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
 obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
new file mode 100644
index 000000000000..93565be12fae
--- /dev/null
+++ b/drivers/rtc/rtc-twl.c
@@ -0,0 +1,540 @@
+/*
+ * rtc-twl4030.c -- TWL4030 Real Time Clock interface
+ *
+ * Copyright (C) 2007 MontaVista Software, Inc
+ * Author: Alexandre Rusev <source@mvista.com>
+ *
+ * Based on original TI driver twl4030-rtc.c
+ *   Copyright (C) 2006 Texas Instruments, Inc.
+ *
+ * Based on rtc-omap.c
+ *   Copyright (C) 2003 MontaVista Software, Inc.
+ *   Author: George G. Davis <gdavis@mvista.com> or <source@mvista.com>
+ *   Copyright (C) 2006 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+#include <linux/i2c/twl.h>
+
+
+/*
+ * RTC block register offsets (use TWL_MODULE_RTC)
+ */
+#define REG_SECONDS_REG                          0x00
+#define REG_MINUTES_REG                          0x01
+#define REG_HOURS_REG                            0x02
+#define REG_DAYS_REG                             0x03
+#define REG_MONTHS_REG                           0x04
+#define REG_YEARS_REG                            0x05
+#define REG_WEEKS_REG                            0x06
+
+#define REG_ALARM_SECONDS_REG                    0x07
+#define REG_ALARM_MINUTES_REG                    0x08
+#define REG_ALARM_HOURS_REG                      0x09
+#define REG_ALARM_DAYS_REG                       0x0A
+#define REG_ALARM_MONTHS_REG                     0x0B
+#define REG_ALARM_YEARS_REG                      0x0C
+
+#define REG_RTC_CTRL_REG                         0x0D
+#define REG_RTC_STATUS_REG                       0x0E
+#define REG_RTC_INTERRUPTS_REG                   0x0F
+
+#define REG_RTC_COMP_LSB_REG                     0x10
+#define REG_RTC_COMP_MSB_REG                     0x11
+
+/* RTC_CTRL_REG bitfields */
+#define BIT_RTC_CTRL_REG_STOP_RTC_M              0x01
+#define BIT_RTC_CTRL_REG_ROUND_30S_M             0x02
+#define BIT_RTC_CTRL_REG_AUTO_COMP_M             0x04
+#define BIT_RTC_CTRL_REG_MODE_12_24_M            0x08
+#define BIT_RTC_CTRL_REG_TEST_MODE_M             0x10
+#define BIT_RTC_CTRL_REG_SET_32_COUNTER_M        0x20
+#define BIT_RTC_CTRL_REG_GET_TIME_M              0x40
+
+/* RTC_STATUS_REG bitfields */
+#define BIT_RTC_STATUS_REG_RUN_M                 0x02
+#define BIT_RTC_STATUS_REG_1S_EVENT_M            0x04
+#define BIT_RTC_STATUS_REG_1M_EVENT_M            0x08
+#define BIT_RTC_STATUS_REG_1H_EVENT_M            0x10
+#define BIT_RTC_STATUS_REG_1D_EVENT_M            0x20
+#define BIT_RTC_STATUS_REG_ALARM_M               0x40
+#define BIT_RTC_STATUS_REG_POWER_UP_M            0x80
+
+/* RTC_INTERRUPTS_REG bitfields */
+#define BIT_RTC_INTERRUPTS_REG_EVERY_M           0x03
+#define BIT_RTC_INTERRUPTS_REG_IT_TIMER_M        0x04
+#define BIT_RTC_INTERRUPTS_REG_IT_ALARM_M        0x08
+
+
+/* REG_SECONDS_REG through REG_YEARS_REG is how many registers? */
+#define ALL_TIME_REGS		6
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Supports 1 byte read from TWL4030 RTC register.
+ */
+static int twl4030_rtc_read_u8(u8 *data, u8 reg)
+{
+	int ret;
+
+	ret = twl4030_i2c_read_u8(TWL4030_MODULE_RTC, data, reg);
+	if (ret < 0)
+		pr_err("twl4030_rtc: Could not read TWL4030"
+		       "register %X - error %d\n", reg, ret);
+	return ret;
+}
+
+/*
+ * Supports 1 byte write to TWL4030 RTC registers.
+ */
+static int twl4030_rtc_write_u8(u8 data, u8 reg)
+{
+	int ret;
+
+	ret = twl4030_i2c_write_u8(TWL4030_MODULE_RTC, data, reg);
+	if (ret < 0)
+		pr_err("twl4030_rtc: Could not write TWL4030"
+		       "register %X - error %d\n", reg, ret);
+	return ret;
+}
+
+/*
+ * Cache the value for timer/alarm interrupts register; this is
+ * only changed by callers holding rtc ops lock (or resume).
+ */
+static unsigned char rtc_irq_bits;
+
+/*
+ * Enable 1/second update and/or alarm interrupts.
+ */
+static int set_rtc_irq_bit(unsigned char bit)
+{
+	unsigned char val;
+	int ret;
+
+	val = rtc_irq_bits | bit;
+	val &= ~BIT_RTC_INTERRUPTS_REG_EVERY_M;
+	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
+	if (ret == 0)
+		rtc_irq_bits = val;
+
+	return ret;
+}
+
+/*
+ * Disable update and/or alarm interrupts.
+ */
+static int mask_rtc_irq_bit(unsigned char bit)
+{
+	unsigned char val;
+	int ret;
+
+	val = rtc_irq_bits & ~bit;
+	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
+	if (ret == 0)
+		rtc_irq_bits = val;
+
+	return ret;
+}
+
+static int twl4030_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
+{
+	int ret;
+
+	if (enabled)
+		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+	else
+		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+
+	return ret;
+}
+
+static int twl4030_rtc_update_irq_enable(struct device *dev, unsigned enabled)
+{
+	int ret;
+
+	if (enabled)
+		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+	else
+		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+
+	return ret;
+}
+
+/*
+ * Gets current TWL4030 RTC time and date parameters.
+ *
+ * The RTC's time/alarm representation is not what gmtime(3) requires
+ * Linux to use:
+ *
+ *  - Months are 1..12 vs Linux 0-11
+ *  - Years are 0..99 vs Linux 1900..N (we assume 21st century)
+ */
+static int twl4030_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+	u8 save_control;
+
+	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		return ret;
+
+	save_control |= BIT_RTC_CTRL_REG_GET_TIME_M;
+
+	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		return ret;
+
+	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+			       REG_SECONDS_REG, ALL_TIME_REGS);
+
+	if (ret < 0) {
+		dev_err(dev, "rtc_read_time error %d\n", ret);
+		return ret;
+	}
+
+	tm->tm_sec = bcd2bin(rtc_data[0]);
+	tm->tm_min = bcd2bin(rtc_data[1]);
+	tm->tm_hour = bcd2bin(rtc_data[2]);
+	tm->tm_mday = bcd2bin(rtc_data[3]);
+	tm->tm_mon = bcd2bin(rtc_data[4]) - 1;
+	tm->tm_year = bcd2bin(rtc_data[5]) + 100;
+
+	return ret;
+}
+
+static int twl4030_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char save_control;
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	rtc_data[1] = bin2bcd(tm->tm_sec);
+	rtc_data[2] = bin2bcd(tm->tm_min);
+	rtc_data[3] = bin2bcd(tm->tm_hour);
+	rtc_data[4] = bin2bcd(tm->tm_mday);
+	rtc_data[5] = bin2bcd(tm->tm_mon + 1);
+	rtc_data[6] = bin2bcd(tm->tm_year - 100);
+
+	/* Stop RTC while updating the TC registers */
+	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out;
+
+	save_control &= ~BIT_RTC_CTRL_REG_STOP_RTC_M;
+	twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out;
+
+	/* update all the time registers in one shot */
+	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, rtc_data,
+			REG_SECONDS_REG, ALL_TIME_REGS);
+	if (ret < 0) {
+		dev_err(dev, "rtc_set_time error %d\n", ret);
+		goto out;
+	}
+
+	/* Start back RTC */
+	save_control |= BIT_RTC_CTRL_REG_STOP_RTC_M;
+	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
+
+out:
+	return ret;
+}
+
+/*
+ * Gets current TWL4030 RTC alarm time.
+ */
+static int twl4030_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	unsigned char rtc_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+			       REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
+	if (ret < 0) {
+		dev_err(dev, "rtc_read_alarm error %d\n", ret);
+		return ret;
+	}
+
+	/* some of these fields may be wildcard/"match all" */
+	alm->time.tm_sec = bcd2bin(rtc_data[0]);
+	alm->time.tm_min = bcd2bin(rtc_data[1]);
+	alm->time.tm_hour = bcd2bin(rtc_data[2]);
+	alm->time.tm_mday = bcd2bin(rtc_data[3]);
+	alm->time.tm_mon = bcd2bin(rtc_data[4]) - 1;
+	alm->time.tm_year = bcd2bin(rtc_data[5]) + 100;
+
+	/* report cached alarm enable state */
+	if (rtc_irq_bits & BIT_RTC_INTERRUPTS_REG_IT_ALARM_M)
+		alm->enabled = 1;
+
+	return ret;
+}
+
+static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	unsigned char alarm_data[ALL_TIME_REGS + 1];
+	int ret;
+
+	ret = twl4030_rtc_alarm_irq_enable(dev, 0);
+	if (ret)
+		goto out;
+
+	alarm_data[1] = bin2bcd(alm->time.tm_sec);
+	alarm_data[2] = bin2bcd(alm->time.tm_min);
+	alarm_data[3] = bin2bcd(alm->time.tm_hour);
+	alarm_data[4] = bin2bcd(alm->time.tm_mday);
+	alarm_data[5] = bin2bcd(alm->time.tm_mon + 1);
+	alarm_data[6] = bin2bcd(alm->time.tm_year - 100);
+
+	/* update all the alarm registers in one shot */
+	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, alarm_data,
+			REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
+	if (ret) {
+		dev_err(dev, "rtc_set_alarm error %d\n", ret);
+		goto out;
+	}
+
+	if (alm->enabled)
+		ret = twl4030_rtc_alarm_irq_enable(dev, 1);
+out:
+	return ret;
+}
+
+static irqreturn_t twl4030_rtc_interrupt(int irq, void *rtc)
+{
+	unsigned long events = 0;
+	int ret = IRQ_NONE;
+	int res;
+	u8 rd_reg;
+
+#ifdef CONFIG_LOCKDEP
+	/* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
+	 * we don't want and can't tolerate.  Although it might be
+	 * friendlier not to borrow this thread context...
+	 */
+	local_irq_enable();
+#endif
+
+	res = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
+	if (res)
+		goto out;
+	/*
+	 * Figure out source of interrupt: ALARM or TIMER in RTC_STATUS_REG.
+	 * only one (ALARM or RTC) interrupt source may be enabled
+	 * at time, we also could check our results
+	 * by reading RTS_INTERRUPTS_REGISTER[IT_TIMER,IT_ALARM]
+	 */
+	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
+		events |= RTC_IRQF | RTC_AF;
+	else
+		events |= RTC_IRQF | RTC_UF;
+
+	res = twl4030_rtc_write_u8(rd_reg | BIT_RTC_STATUS_REG_ALARM_M,
+				   REG_RTC_STATUS_REG);
+	if (res)
+		goto out;
+
+	/* Clear on Read enabled. RTC_IT bit of TWL4030_INT_PWR_ISR1
+	 * needs 2 reads to clear the interrupt. One read is done in
+	 * do_twl4030_pwrirq(). Doing the second read, to clear
+	 * the bit.
+	 *
+	 * FIXME the reason PWR_ISR1 needs an extra read is that
+	 * RTC_IF retriggered until we cleared REG_ALARM_M above.
+	 * But re-reading like this is a bad hack; by doing so we
+	 * risk wrongly clearing status for some other IRQ (losing
+	 * the interrupt).  Be smarter about handling RTC_UF ...
+	 */
+	res = twl4030_i2c_read_u8(TWL4030_MODULE_INT,
+			&rd_reg, TWL4030_INT_PWR_ISR1);
+	if (res)
+		goto out;
+
+	/* Notify RTC core on event */
+	rtc_update_irq(rtc, 1, events);
+
+	ret = IRQ_HANDLED;
+out:
+	return ret;
+}
+
+static struct rtc_class_ops twl4030_rtc_ops = {
+	.read_time	= twl4030_rtc_read_time,
+	.set_time	= twl4030_rtc_set_time,
+	.read_alarm	= twl4030_rtc_read_alarm,
+	.set_alarm	= twl4030_rtc_set_alarm,
+	.alarm_irq_enable = twl4030_rtc_alarm_irq_enable,
+	.update_irq_enable = twl4030_rtc_update_irq_enable,
+};
+
+/*----------------------------------------------------------------------*/
+
+static int __devinit twl4030_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtc;
+	int ret = 0;
+	int irq = platform_get_irq(pdev, 0);
+	u8 rd_reg;
+
+	if (irq <= 0)
+		return -EINVAL;
+
+	rtc = rtc_device_register(pdev->name,
+				  &pdev->dev, &twl4030_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		ret = PTR_ERR(rtc);
+		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
+			PTR_ERR(rtc));
+		goto out0;
+
+	}
+
+	platform_set_drvdata(pdev, rtc);
+
+	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
+	if (ret < 0)
+		goto out1;
+
+	if (rd_reg & BIT_RTC_STATUS_REG_POWER_UP_M)
+		dev_warn(&pdev->dev, "Power up reset detected.\n");
+
+	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
+		dev_warn(&pdev->dev, "Pending Alarm interrupt detected.\n");
+
+	/* Clear RTC Power up reset and pending alarm interrupts */
+	ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_STATUS_REG);
+	if (ret < 0)
+		goto out1;
+
+	ret = request_irq(irq, twl4030_rtc_interrupt,
+				IRQF_TRIGGER_RISING,
+				dev_name(&rtc->dev), rtc);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "IRQ is not free.\n");
+		goto out1;
+	}
+
+	/* Check RTC module status, Enable if it is off */
+	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG);
+	if (ret < 0)
+		goto out2;
+
+	if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) {
+		dev_info(&pdev->dev, "Enabling TWL4030-RTC.\n");
+		rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M;
+		ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG);
+		if (ret < 0)
+			goto out2;
+	}
+
+	/* init cached IRQ enable bits */
+	ret = twl4030_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
+	if (ret < 0)
+		goto out2;
+
+	return ret;
+
+out2:
+	free_irq(irq, rtc);
+out1:
+	rtc_device_unregister(rtc);
+out0:
+	return ret;
+}
+
+/*
+ * Disable all TWL4030 RTC module interrupts.
+ * Sets status flag to free.
+ */
+static int __devexit twl4030_rtc_remove(struct platform_device *pdev)
+{
+	/* leave rtc running, but disable irqs */
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+
+	free_irq(irq, rtc);
+
+	rtc_device_unregister(rtc);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static void twl4030_rtc_shutdown(struct platform_device *pdev)
+{
+	/* mask timer interrupts, but leave alarm interrupts on to enable
+	   power-on when alarm is triggered */
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+}
+
+#ifdef CONFIG_PM
+
+static unsigned char irqstat;
+
+static int twl4030_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	irqstat = rtc_irq_bits;
+
+	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
+	return 0;
+}
+
+static int twl4030_rtc_resume(struct platform_device *pdev)
+{
+	set_rtc_irq_bit(irqstat);
+	return 0;
+}
+
+#else
+#define twl4030_rtc_suspend NULL
+#define twl4030_rtc_resume  NULL
+#endif
+
+MODULE_ALIAS("platform:twl4030_rtc");
+
+static struct platform_driver twl4030rtc_driver = {
+	.probe		= twl4030_rtc_probe,
+	.remove		= __devexit_p(twl4030_rtc_remove),
+	.shutdown	= twl4030_rtc_shutdown,
+	.suspend	= twl4030_rtc_suspend,
+	.resume		= twl4030_rtc_resume,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "twl4030_rtc",
+	},
+};
+
+static int __init twl4030_rtc_init(void)
+{
+	return platform_driver_register(&twl4030rtc_driver);
+}
+module_init(twl4030_rtc_init);
+
+static void __exit twl4030_rtc_exit(void)
+{
+	platform_driver_unregister(&twl4030rtc_driver);
+}
+module_exit(twl4030_rtc_exit);
+
+MODULE_AUTHOR("Texas Instruments, MontaVista Software");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c
deleted file mode 100644
index 9c8c70c497dc..000000000000
--- a/drivers/rtc/rtc-twl4030.c
+++ /dev/null
@@ -1,540 +0,0 @@
-/*
- * rtc-twl4030.c -- TWL4030 Real Time Clock interface
- *
- * Copyright (C) 2007 MontaVista Software, Inc
- * Author: Alexandre Rusev <source@mvista.com>
- *
- * Based on original TI driver twl4030-rtc.c
- *   Copyright (C) 2006 Texas Instruments, Inc.
- *
- * Based on rtc-omap.c
- *   Copyright (C) 2003 MontaVista Software, Inc.
- *   Author: George G. Davis <gdavis@mvista.com> or <source@mvista.com>
- *   Copyright (C) 2006 David Brownell
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-
-#include <linux/i2c/twl4030.h>
-
-
-/*
- * RTC block register offsets (use TWL_MODULE_RTC)
- */
-#define REG_SECONDS_REG                          0x00
-#define REG_MINUTES_REG                          0x01
-#define REG_HOURS_REG                            0x02
-#define REG_DAYS_REG                             0x03
-#define REG_MONTHS_REG                           0x04
-#define REG_YEARS_REG                            0x05
-#define REG_WEEKS_REG                            0x06
-
-#define REG_ALARM_SECONDS_REG                    0x07
-#define REG_ALARM_MINUTES_REG                    0x08
-#define REG_ALARM_HOURS_REG                      0x09
-#define REG_ALARM_DAYS_REG                       0x0A
-#define REG_ALARM_MONTHS_REG                     0x0B
-#define REG_ALARM_YEARS_REG                      0x0C
-
-#define REG_RTC_CTRL_REG                         0x0D
-#define REG_RTC_STATUS_REG                       0x0E
-#define REG_RTC_INTERRUPTS_REG                   0x0F
-
-#define REG_RTC_COMP_LSB_REG                     0x10
-#define REG_RTC_COMP_MSB_REG                     0x11
-
-/* RTC_CTRL_REG bitfields */
-#define BIT_RTC_CTRL_REG_STOP_RTC_M              0x01
-#define BIT_RTC_CTRL_REG_ROUND_30S_M             0x02
-#define BIT_RTC_CTRL_REG_AUTO_COMP_M             0x04
-#define BIT_RTC_CTRL_REG_MODE_12_24_M            0x08
-#define BIT_RTC_CTRL_REG_TEST_MODE_M             0x10
-#define BIT_RTC_CTRL_REG_SET_32_COUNTER_M        0x20
-#define BIT_RTC_CTRL_REG_GET_TIME_M              0x40
-
-/* RTC_STATUS_REG bitfields */
-#define BIT_RTC_STATUS_REG_RUN_M                 0x02
-#define BIT_RTC_STATUS_REG_1S_EVENT_M            0x04
-#define BIT_RTC_STATUS_REG_1M_EVENT_M            0x08
-#define BIT_RTC_STATUS_REG_1H_EVENT_M            0x10
-#define BIT_RTC_STATUS_REG_1D_EVENT_M            0x20
-#define BIT_RTC_STATUS_REG_ALARM_M               0x40
-#define BIT_RTC_STATUS_REG_POWER_UP_M            0x80
-
-/* RTC_INTERRUPTS_REG bitfields */
-#define BIT_RTC_INTERRUPTS_REG_EVERY_M           0x03
-#define BIT_RTC_INTERRUPTS_REG_IT_TIMER_M        0x04
-#define BIT_RTC_INTERRUPTS_REG_IT_ALARM_M        0x08
-
-
-/* REG_SECONDS_REG through REG_YEARS_REG is how many registers? */
-#define ALL_TIME_REGS		6
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Supports 1 byte read from TWL4030 RTC register.
- */
-static int twl4030_rtc_read_u8(u8 *data, u8 reg)
-{
-	int ret;
-
-	ret = twl4030_i2c_read_u8(TWL4030_MODULE_RTC, data, reg);
-	if (ret < 0)
-		pr_err("twl4030_rtc: Could not read TWL4030"
-		       "register %X - error %d\n", reg, ret);
-	return ret;
-}
-
-/*
- * Supports 1 byte write to TWL4030 RTC registers.
- */
-static int twl4030_rtc_write_u8(u8 data, u8 reg)
-{
-	int ret;
-
-	ret = twl4030_i2c_write_u8(TWL4030_MODULE_RTC, data, reg);
-	if (ret < 0)
-		pr_err("twl4030_rtc: Could not write TWL4030"
-		       "register %X - error %d\n", reg, ret);
-	return ret;
-}
-
-/*
- * Cache the value for timer/alarm interrupts register; this is
- * only changed by callers holding rtc ops lock (or resume).
- */
-static unsigned char rtc_irq_bits;
-
-/*
- * Enable 1/second update and/or alarm interrupts.
- */
-static int set_rtc_irq_bit(unsigned char bit)
-{
-	unsigned char val;
-	int ret;
-
-	val = rtc_irq_bits | bit;
-	val &= ~BIT_RTC_INTERRUPTS_REG_EVERY_M;
-	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
-	if (ret == 0)
-		rtc_irq_bits = val;
-
-	return ret;
-}
-
-/*
- * Disable update and/or alarm interrupts.
- */
-static int mask_rtc_irq_bit(unsigned char bit)
-{
-	unsigned char val;
-	int ret;
-
-	val = rtc_irq_bits & ~bit;
-	ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG);
-	if (ret == 0)
-		rtc_irq_bits = val;
-
-	return ret;
-}
-
-static int twl4030_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
-{
-	int ret;
-
-	if (enabled)
-		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
-	else
-		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
-
-	return ret;
-}
-
-static int twl4030_rtc_update_irq_enable(struct device *dev, unsigned enabled)
-{
-	int ret;
-
-	if (enabled)
-		ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-	else
-		ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-
-	return ret;
-}
-
-/*
- * Gets current TWL4030 RTC time and date parameters.
- *
- * The RTC's time/alarm representation is not what gmtime(3) requires
- * Linux to use:
- *
- *  - Months are 1..12 vs Linux 0-11
- *  - Years are 0..99 vs Linux 1900..N (we assume 21st century)
- */
-static int twl4030_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
-	int ret;
-	u8 save_control;
-
-	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
-	if (ret < 0)
-		return ret;
-
-	save_control |= BIT_RTC_CTRL_REG_GET_TIME_M;
-
-	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
-	if (ret < 0)
-		return ret;
-
-	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
-			       REG_SECONDS_REG, ALL_TIME_REGS);
-
-	if (ret < 0) {
-		dev_err(dev, "rtc_read_time error %d\n", ret);
-		return ret;
-	}
-
-	tm->tm_sec = bcd2bin(rtc_data[0]);
-	tm->tm_min = bcd2bin(rtc_data[1]);
-	tm->tm_hour = bcd2bin(rtc_data[2]);
-	tm->tm_mday = bcd2bin(rtc_data[3]);
-	tm->tm_mon = bcd2bin(rtc_data[4]) - 1;
-	tm->tm_year = bcd2bin(rtc_data[5]) + 100;
-
-	return ret;
-}
-
-static int twl4030_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	unsigned char save_control;
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
-	int ret;
-
-	rtc_data[1] = bin2bcd(tm->tm_sec);
-	rtc_data[2] = bin2bcd(tm->tm_min);
-	rtc_data[3] = bin2bcd(tm->tm_hour);
-	rtc_data[4] = bin2bcd(tm->tm_mday);
-	rtc_data[5] = bin2bcd(tm->tm_mon + 1);
-	rtc_data[6] = bin2bcd(tm->tm_year - 100);
-
-	/* Stop RTC while updating the TC registers */
-	ret = twl4030_rtc_read_u8(&save_control, REG_RTC_CTRL_REG);
-	if (ret < 0)
-		goto out;
-
-	save_control &= ~BIT_RTC_CTRL_REG_STOP_RTC_M;
-	twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
-	if (ret < 0)
-		goto out;
-
-	/* update all the time registers in one shot */
-	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, rtc_data,
-			REG_SECONDS_REG, ALL_TIME_REGS);
-	if (ret < 0) {
-		dev_err(dev, "rtc_set_time error %d\n", ret);
-		goto out;
-	}
-
-	/* Start back RTC */
-	save_control |= BIT_RTC_CTRL_REG_STOP_RTC_M;
-	ret = twl4030_rtc_write_u8(save_control, REG_RTC_CTRL_REG);
-
-out:
-	return ret;
-}
-
-/*
- * Gets current TWL4030 RTC alarm time.
- */
-static int twl4030_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	unsigned char rtc_data[ALL_TIME_REGS + 1];
-	int ret;
-
-	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
-			       REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
-	if (ret < 0) {
-		dev_err(dev, "rtc_read_alarm error %d\n", ret);
-		return ret;
-	}
-
-	/* some of these fields may be wildcard/"match all" */
-	alm->time.tm_sec = bcd2bin(rtc_data[0]);
-	alm->time.tm_min = bcd2bin(rtc_data[1]);
-	alm->time.tm_hour = bcd2bin(rtc_data[2]);
-	alm->time.tm_mday = bcd2bin(rtc_data[3]);
-	alm->time.tm_mon = bcd2bin(rtc_data[4]) - 1;
-	alm->time.tm_year = bcd2bin(rtc_data[5]) + 100;
-
-	/* report cached alarm enable state */
-	if (rtc_irq_bits & BIT_RTC_INTERRUPTS_REG_IT_ALARM_M)
-		alm->enabled = 1;
-
-	return ret;
-}
-
-static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	unsigned char alarm_data[ALL_TIME_REGS + 1];
-	int ret;
-
-	ret = twl4030_rtc_alarm_irq_enable(dev, 0);
-	if (ret)
-		goto out;
-
-	alarm_data[1] = bin2bcd(alm->time.tm_sec);
-	alarm_data[2] = bin2bcd(alm->time.tm_min);
-	alarm_data[3] = bin2bcd(alm->time.tm_hour);
-	alarm_data[4] = bin2bcd(alm->time.tm_mday);
-	alarm_data[5] = bin2bcd(alm->time.tm_mon + 1);
-	alarm_data[6] = bin2bcd(alm->time.tm_year - 100);
-
-	/* update all the alarm registers in one shot */
-	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, alarm_data,
-			REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
-	if (ret) {
-		dev_err(dev, "rtc_set_alarm error %d\n", ret);
-		goto out;
-	}
-
-	if (alm->enabled)
-		ret = twl4030_rtc_alarm_irq_enable(dev, 1);
-out:
-	return ret;
-}
-
-static irqreturn_t twl4030_rtc_interrupt(int irq, void *rtc)
-{
-	unsigned long events = 0;
-	int ret = IRQ_NONE;
-	int res;
-	u8 rd_reg;
-
-#ifdef CONFIG_LOCKDEP
-	/* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
-	 * we don't want and can't tolerate.  Although it might be
-	 * friendlier not to borrow this thread context...
-	 */
-	local_irq_enable();
-#endif
-
-	res = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
-	if (res)
-		goto out;
-	/*
-	 * Figure out source of interrupt: ALARM or TIMER in RTC_STATUS_REG.
-	 * only one (ALARM or RTC) interrupt source may be enabled
-	 * at time, we also could check our results
-	 * by reading RTS_INTERRUPTS_REGISTER[IT_TIMER,IT_ALARM]
-	 */
-	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
-		events |= RTC_IRQF | RTC_AF;
-	else
-		events |= RTC_IRQF | RTC_UF;
-
-	res = twl4030_rtc_write_u8(rd_reg | BIT_RTC_STATUS_REG_ALARM_M,
-				   REG_RTC_STATUS_REG);
-	if (res)
-		goto out;
-
-	/* Clear on Read enabled. RTC_IT bit of TWL4030_INT_PWR_ISR1
-	 * needs 2 reads to clear the interrupt. One read is done in
-	 * do_twl4030_pwrirq(). Doing the second read, to clear
-	 * the bit.
-	 *
-	 * FIXME the reason PWR_ISR1 needs an extra read is that
-	 * RTC_IF retriggered until we cleared REG_ALARM_M above.
-	 * But re-reading like this is a bad hack; by doing so we
-	 * risk wrongly clearing status for some other IRQ (losing
-	 * the interrupt).  Be smarter about handling RTC_UF ...
-	 */
-	res = twl4030_i2c_read_u8(TWL4030_MODULE_INT,
-			&rd_reg, TWL4030_INT_PWR_ISR1);
-	if (res)
-		goto out;
-
-	/* Notify RTC core on event */
-	rtc_update_irq(rtc, 1, events);
-
-	ret = IRQ_HANDLED;
-out:
-	return ret;
-}
-
-static struct rtc_class_ops twl4030_rtc_ops = {
-	.read_time	= twl4030_rtc_read_time,
-	.set_time	= twl4030_rtc_set_time,
-	.read_alarm	= twl4030_rtc_read_alarm,
-	.set_alarm	= twl4030_rtc_set_alarm,
-	.alarm_irq_enable = twl4030_rtc_alarm_irq_enable,
-	.update_irq_enable = twl4030_rtc_update_irq_enable,
-};
-
-/*----------------------------------------------------------------------*/
-
-static int __devinit twl4030_rtc_probe(struct platform_device *pdev)
-{
-	struct rtc_device *rtc;
-	int ret = 0;
-	int irq = platform_get_irq(pdev, 0);
-	u8 rd_reg;
-
-	if (irq <= 0)
-		return -EINVAL;
-
-	rtc = rtc_device_register(pdev->name,
-				  &pdev->dev, &twl4030_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
-		dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
-			PTR_ERR(rtc));
-		goto out0;
-
-	}
-
-	platform_set_drvdata(pdev, rtc);
-
-	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
-	if (ret < 0)
-		goto out1;
-
-	if (rd_reg & BIT_RTC_STATUS_REG_POWER_UP_M)
-		dev_warn(&pdev->dev, "Power up reset detected.\n");
-
-	if (rd_reg & BIT_RTC_STATUS_REG_ALARM_M)
-		dev_warn(&pdev->dev, "Pending Alarm interrupt detected.\n");
-
-	/* Clear RTC Power up reset and pending alarm interrupts */
-	ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_STATUS_REG);
-	if (ret < 0)
-		goto out1;
-
-	ret = request_irq(irq, twl4030_rtc_interrupt,
-				IRQF_TRIGGER_RISING,
-				dev_name(&rtc->dev), rtc);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "IRQ is not free.\n");
-		goto out1;
-	}
-
-	/* Check RTC module status, Enable if it is off */
-	ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG);
-	if (ret < 0)
-		goto out2;
-
-	if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) {
-		dev_info(&pdev->dev, "Enabling TWL4030-RTC.\n");
-		rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M;
-		ret = twl4030_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG);
-		if (ret < 0)
-			goto out2;
-	}
-
-	/* init cached IRQ enable bits */
-	ret = twl4030_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
-	if (ret < 0)
-		goto out2;
-
-	return ret;
-
-out2:
-	free_irq(irq, rtc);
-out1:
-	rtc_device_unregister(rtc);
-out0:
-	return ret;
-}
-
-/*
- * Disable all TWL4030 RTC module interrupts.
- * Sets status flag to free.
- */
-static int __devexit twl4030_rtc_remove(struct platform_device *pdev)
-{
-	/* leave rtc running, but disable irqs */
-	struct rtc_device *rtc = platform_get_drvdata(pdev);
-	int irq = platform_get_irq(pdev, 0);
-
-	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
-	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-
-	free_irq(irq, rtc);
-
-	rtc_device_unregister(rtc);
-	platform_set_drvdata(pdev, NULL);
-	return 0;
-}
-
-static void twl4030_rtc_shutdown(struct platform_device *pdev)
-{
-	/* mask timer interrupts, but leave alarm interrupts on to enable
-	   power-on when alarm is triggered */
-	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-}
-
-#ifdef CONFIG_PM
-
-static unsigned char irqstat;
-
-static int twl4030_rtc_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	irqstat = rtc_irq_bits;
-
-	mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-	return 0;
-}
-
-static int twl4030_rtc_resume(struct platform_device *pdev)
-{
-	set_rtc_irq_bit(irqstat);
-	return 0;
-}
-
-#else
-#define twl4030_rtc_suspend NULL
-#define twl4030_rtc_resume  NULL
-#endif
-
-MODULE_ALIAS("platform:twl4030_rtc");
-
-static struct platform_driver twl4030rtc_driver = {
-	.probe		= twl4030_rtc_probe,
-	.remove		= __devexit_p(twl4030_rtc_remove),
-	.shutdown	= twl4030_rtc_shutdown,
-	.suspend	= twl4030_rtc_suspend,
-	.resume		= twl4030_rtc_resume,
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "twl4030_rtc",
-	},
-};
-
-static int __init twl4030_rtc_init(void)
-{
-	return platform_driver_register(&twl4030rtc_driver);
-}
-module_init(twl4030_rtc_init);
-
-static void __exit twl4030_rtc_exit(void)
-{
-	platform_driver_unregister(&twl4030rtc_driver);
-}
-module_exit(twl4030_rtc_exit);
-
-MODULE_AUTHOR("Texas Instruments, MontaVista Software");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/otg/twl4030-usb.c b/drivers/usb/otg/twl4030-usb.c
index bd9883f41e63..3acbdb82bcf9 100644
--- a/drivers/usb/otg/twl4030-usb.c
+++ b/drivers/usb/otg/twl4030-usb.c
@@ -33,7 +33,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/usb/otg.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <linux/regulator/consumer.h>
 #include <linux/err.h>
 
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
index 760645d9dbb6..3764a36d9142 100644
--- a/drivers/video/omap/lcd_2430sdp.c
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -25,7 +25,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 #include <plat/mux.h>
 #include <asm/mach-types.h>
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index cb46556f2973..20968b2aadef 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -26,7 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/miscdevice.h>
 #include <linux/uaccess.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 
 #define TWL4030_WATCHDOG_CFG_REG_OFFS	0x3
 
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
new file mode 100644
index 000000000000..a50bcf8a4048
--- /dev/null
+++ b/include/linux/i2c/twl.h
@@ -0,0 +1,550 @@
+/*
+ * twl4030.h - header for TWL4030 PM and audio CODEC device
+ *
+ * Copyright (C) 2005-2006 Texas Instruments, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#ifndef __TWL4030_H_
+#define __TWL4030_H_
+
+#include <linux/types.h>
+#include <linux/input/matrix_keypad.h>
+
+/*
+ * Using the twl4030 core we address registers using a pair
+ *	{ module id, relative register offset }
+ * which that core then maps to the relevant
+ *	{ i2c slave, absolute register address }
+ *
+ * The module IDs are meaningful only to the twl4030 core code,
+ * which uses them as array indices to look up the first register
+ * address each module uses within a given i2c slave.
+ */
+
+/* Slave 0 (i2c address 0x48) */
+#define TWL4030_MODULE_USB		0x00
+
+/* Slave 1 (i2c address 0x49) */
+#define TWL4030_MODULE_AUDIO_VOICE	0x01
+#define TWL4030_MODULE_GPIO		0x02
+#define TWL4030_MODULE_INTBR		0x03
+#define TWL4030_MODULE_PIH		0x04
+#define TWL4030_MODULE_TEST		0x05
+
+/* Slave 2 (i2c address 0x4a) */
+#define TWL4030_MODULE_KEYPAD		0x06
+#define TWL4030_MODULE_MADC		0x07
+#define TWL4030_MODULE_INTERRUPTS	0x08
+#define TWL4030_MODULE_LED		0x09
+#define TWL4030_MODULE_MAIN_CHARGE	0x0A
+#define TWL4030_MODULE_PRECHARGE	0x0B
+#define TWL4030_MODULE_PWM0		0x0C
+#define TWL4030_MODULE_PWM1		0x0D
+#define TWL4030_MODULE_PWMA		0x0E
+#define TWL4030_MODULE_PWMB		0x0F
+
+#define TWL5031_MODULE_ACCESSORY	0x10
+#define TWL5031_MODULE_INTERRUPTS	0x11
+
+/* Slave 3 (i2c address 0x4b) */
+#define TWL4030_MODULE_BACKUP		0x12
+#define TWL4030_MODULE_INT		0x13
+#define TWL4030_MODULE_PM_MASTER	0x14
+#define TWL4030_MODULE_PM_RECEIVER	0x15
+#define TWL4030_MODULE_RTC		0x16
+#define TWL4030_MODULE_SECURED_REG	0x17
+
+/*
+ * Read and write single 8-bit registers
+ */
+int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
+int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
+
+/*
+ * Read and write several 8-bit registers at once.
+ *
+ * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
+ * for the value, and populate your data starting at offset 1.
+ */
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * NOTE:  at up to 1024 registers, this is a big chip.
+ *
+ * Avoid putting register declarations in this file, instead of into
+ * a driver-private file, unless some of the registers in a block
+ * need to be shared with other drivers.  One example is blocks that
+ * have Secondary IRQ Handler (SIH) registers.
+ */
+
+#define TWL4030_SIH_CTRL_EXCLEN_MASK	BIT(0)
+#define TWL4030_SIH_CTRL_PENDDIS_MASK	BIT(1)
+#define TWL4030_SIH_CTRL_COR_MASK	BIT(2)
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * GPIO Block Register offsets (use TWL4030_MODULE_GPIO)
+ */
+
+#define REG_GPIODATAIN1			0x0
+#define REG_GPIODATAIN2			0x1
+#define REG_GPIODATAIN3			0x2
+#define REG_GPIODATADIR1		0x3
+#define REG_GPIODATADIR2		0x4
+#define REG_GPIODATADIR3		0x5
+#define REG_GPIODATAOUT1		0x6
+#define REG_GPIODATAOUT2		0x7
+#define REG_GPIODATAOUT3		0x8
+#define REG_CLEARGPIODATAOUT1		0x9
+#define REG_CLEARGPIODATAOUT2		0xA
+#define REG_CLEARGPIODATAOUT3		0xB
+#define REG_SETGPIODATAOUT1		0xC
+#define REG_SETGPIODATAOUT2		0xD
+#define REG_SETGPIODATAOUT3		0xE
+#define REG_GPIO_DEBEN1			0xF
+#define REG_GPIO_DEBEN2			0x10
+#define REG_GPIO_DEBEN3			0x11
+#define REG_GPIO_CTRL			0x12
+#define REG_GPIOPUPDCTR1		0x13
+#define REG_GPIOPUPDCTR2		0x14
+#define REG_GPIOPUPDCTR3		0x15
+#define REG_GPIOPUPDCTR4		0x16
+#define REG_GPIOPUPDCTR5		0x17
+#define REG_GPIO_ISR1A			0x19
+#define REG_GPIO_ISR2A			0x1A
+#define REG_GPIO_ISR3A			0x1B
+#define REG_GPIO_IMR1A			0x1C
+#define REG_GPIO_IMR2A			0x1D
+#define REG_GPIO_IMR3A			0x1E
+#define REG_GPIO_ISR1B			0x1F
+#define REG_GPIO_ISR2B			0x20
+#define REG_GPIO_ISR3B			0x21
+#define REG_GPIO_IMR1B			0x22
+#define REG_GPIO_IMR2B			0x23
+#define REG_GPIO_IMR3B			0x24
+#define REG_GPIO_EDR1			0x28
+#define REG_GPIO_EDR2			0x29
+#define REG_GPIO_EDR3			0x2A
+#define REG_GPIO_EDR4			0x2B
+#define REG_GPIO_EDR5			0x2C
+#define REG_GPIO_SIH_CTRL		0x2D
+
+/* Up to 18 signals are available as GPIOs, when their
+ * pins are not assigned to another use (such as ULPI/USB).
+ */
+#define TWL4030_GPIO_MAX		18
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_KEYPAD_KEYP_ISR1	0x11
+#define TWL4030_KEYPAD_KEYP_IMR1	0x12
+#define TWL4030_KEYPAD_KEYP_ISR2	0x13
+#define TWL4030_KEYPAD_KEYP_IMR2	0x14
+#define TWL4030_KEYPAD_KEYP_SIR		0x15	/* test register */
+#define TWL4030_KEYPAD_KEYP_EDR		0x16
+#define TWL4030_KEYPAD_KEYP_SIH_CTRL	0x17
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Multichannel ADC register offsets (use TWL4030_MODULE_MADC)
+ * ... SIH/interrupt only
+ */
+
+#define TWL4030_MADC_ISR1		0x61
+#define TWL4030_MADC_IMR1		0x62
+#define TWL4030_MADC_ISR2		0x63
+#define TWL4030_MADC_IMR2		0x64
+#define TWL4030_MADC_SIR		0x65	/* test register */
+#define TWL4030_MADC_EDR		0x66
+#define TWL4030_MADC_SIH_CTRL		0x67
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS)
+ */
+
+#define TWL4030_INTERRUPTS_BCIISR1A	0x0
+#define TWL4030_INTERRUPTS_BCIISR2A	0x1
+#define TWL4030_INTERRUPTS_BCIIMR1A	0x2
+#define TWL4030_INTERRUPTS_BCIIMR2A	0x3
+#define TWL4030_INTERRUPTS_BCIISR1B	0x4
+#define TWL4030_INTERRUPTS_BCIISR2B	0x5
+#define TWL4030_INTERRUPTS_BCIIMR1B	0x6
+#define TWL4030_INTERRUPTS_BCIIMR2B	0x7
+#define TWL4030_INTERRUPTS_BCISIR1	0x8	/* test register */
+#define TWL4030_INTERRUPTS_BCISIR2	0x9	/* test register */
+#define TWL4030_INTERRUPTS_BCIEDR1	0xa
+#define TWL4030_INTERRUPTS_BCIEDR2	0xb
+#define TWL4030_INTERRUPTS_BCIEDR3	0xc
+#define TWL4030_INTERRUPTS_BCISIHCTRL	0xd
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Power Interrupt block register offsets (use TWL4030_MODULE_INT)
+ */
+
+#define TWL4030_INT_PWR_ISR1		0x0
+#define TWL4030_INT_PWR_IMR1		0x1
+#define TWL4030_INT_PWR_ISR2		0x2
+#define TWL4030_INT_PWR_IMR2		0x3
+#define TWL4030_INT_PWR_SIR		0x4	/* test register */
+#define TWL4030_INT_PWR_EDR1		0x5
+#define TWL4030_INT_PWR_EDR2		0x6
+#define TWL4030_INT_PWR_SIH_CTRL	0x7
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Accessory Interrupts
+ */
+#define TWL5031_ACIIMR_LSB		0x05
+#define TWL5031_ACIIMR_MSB		0x06
+#define TWL5031_ACIIDR_LSB		0x07
+#define TWL5031_ACIIDR_MSB		0x08
+#define TWL5031_ACCISR1			0x0F
+#define TWL5031_ACCIMR1			0x10
+#define TWL5031_ACCISR2			0x11
+#define TWL5031_ACCIMR2			0x12
+#define TWL5031_ACCSIR			0x13
+#define TWL5031_ACCEDR1			0x14
+#define TWL5031_ACCSIHCTRL		0x15
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Battery Charger Controller
+ */
+
+#define TWL5031_INTERRUPTS_BCIISR1	0x0
+#define TWL5031_INTERRUPTS_BCIIMR1	0x1
+#define TWL5031_INTERRUPTS_BCIISR2	0x2
+#define TWL5031_INTERRUPTS_BCIIMR2	0x3
+#define TWL5031_INTERRUPTS_BCISIR	0x4
+#define TWL5031_INTERRUPTS_BCIEDR1	0x5
+#define TWL5031_INTERRUPTS_BCIEDR2	0x6
+#define TWL5031_INTERRUPTS_BCISIHCTRL	0x7
+
+/*----------------------------------------------------------------------*/
+
+/* Power bus message definitions */
+
+/* The TWL4030/5030 splits its power-management resources (the various
+ * regulators, clock and reset lines) into 3 processor groups - P1, P2 and
+ * P3. These groups can then be configured to transition between sleep, wait-on
+ * and active states by sending messages to the power bus.  See Section 5.4.2
+ * Power Resources of TWL4030 TRM
+ */
+
+/* Processor groups */
+#define DEV_GRP_NULL		0x0
+#define DEV_GRP_P1		0x1	/* P1: all OMAP devices */
+#define DEV_GRP_P2		0x2	/* P2: all Modem devices */
+#define DEV_GRP_P3		0x4	/* P3: all peripheral devices */
+
+/* Resource groups */
+#define RES_GRP_RES		0x0	/* Reserved */
+#define RES_GRP_PP		0x1	/* Power providers */
+#define RES_GRP_RC		0x2	/* Reset and control */
+#define RES_GRP_PP_RC		0x3
+#define RES_GRP_PR		0x4	/* Power references */
+#define RES_GRP_PP_PR		0x5
+#define RES_GRP_RC_PR		0x6
+#define RES_GRP_ALL		0x7	/* All resource groups */
+
+#define RES_TYPE2_R0		0x0
+
+#define RES_TYPE_ALL		0x7
+
+/* Resource states */
+#define RES_STATE_WRST		0xF
+#define RES_STATE_ACTIVE	0xE
+#define RES_STATE_SLEEP		0x8
+#define RES_STATE_OFF		0x0
+
+/* Power resources */
+
+/* Power providers */
+#define RES_VAUX1               1
+#define RES_VAUX2               2
+#define RES_VAUX3               3
+#define RES_VAUX4               4
+#define RES_VMMC1               5
+#define RES_VMMC2               6
+#define RES_VPLL1               7
+#define RES_VPLL2               8
+#define RES_VSIM                9
+#define RES_VDAC                10
+#define RES_VINTANA1            11
+#define RES_VINTANA2            12
+#define RES_VINTDIG             13
+#define RES_VIO                 14
+#define RES_VDD1                15
+#define RES_VDD2                16
+#define RES_VUSB_1V5            17
+#define RES_VUSB_1V8            18
+#define RES_VUSB_3V1            19
+#define RES_VUSBCP              20
+#define RES_REGEN               21
+/* Reset and control */
+#define RES_NRES_PWRON          22
+#define RES_CLKEN               23
+#define RES_SYSEN               24
+#define RES_HFCLKOUT            25
+#define RES_32KCLKOUT           26
+#define RES_RESET               27
+/* Power Reference */
+#define RES_Main_Ref            28
+
+#define TOTAL_RESOURCES		28
+/*
+ * Power Bus Message Format ... these can be sent individually by Linux,
+ * but are usually part of downloaded scripts that are run when various
+ * power events are triggered.
+ *
+ *  Broadcast Message (16 Bits):
+ *    DEV_GRP[15:13] MT[12]  RES_GRP[11:9]  RES_TYPE2[8:7] RES_TYPE[6:4]
+ *    RES_STATE[3:0]
+ *
+ *  Singular Message (16 Bits):
+ *    DEV_GRP[15:13] MT[12]  RES_ID[11:4]  RES_STATE[3:0]
+ */
+
+#define MSG_BROADCAST(devgrp, grp, type, type2, state) \
+	( (devgrp) << 13 | 1 << 12 | (grp) << 9 | (type2) << 7 \
+	| (type) << 4 | (state))
+
+#define MSG_SINGULAR(devgrp, id, state) \
+	((devgrp) << 13 | 0 << 12 | (id) << 4 | (state))
+
+/*----------------------------------------------------------------------*/
+
+struct twl4030_clock_init_data {
+	bool ck32k_lowpwr_enable;
+};
+
+struct twl4030_bci_platform_data {
+	int *battery_tmp_tbl;
+	unsigned int tblsize;
+};
+
+/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */
+struct twl4030_gpio_platform_data {
+	int		gpio_base;
+	unsigned	irq_base, irq_end;
+
+	/* package the two LED signals as output-only GPIOs? */
+	bool		use_leds;
+
+	/* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
+	u8		mmc_cd;
+
+	/* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */
+	u32		debounce;
+
+	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
+	 * should be enabled.  Else, if that bit is set in "pulldowns",
+	 * that pulldown is enabled.  Don't waste power by letting any
+	 * digital inputs float...
+	 */
+	u32		pullups;
+	u32		pulldowns;
+
+	int		(*setup)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+	int		(*teardown)(struct device *dev,
+				unsigned gpio, unsigned ngpio);
+};
+
+struct twl4030_madc_platform_data {
+	int		irq_line;
+};
+
+/* Boards have uniqe mappings of {row, col} --> keycode.
+ * Column and row are 8 bits each, but range only from 0..7.
+ * a PERSISTENT_KEY is "always on" and never reported.
+ */
+#define PERSISTENT_KEY(r, c)	KEY((r), (c), KEY_RESERVED)
+
+struct twl4030_keypad_data {
+	const struct matrix_keymap_data *keymap_data;
+	unsigned rows;
+	unsigned cols;
+	bool rep;
+};
+
+enum twl4030_usb_mode {
+	T2_USB_MODE_ULPI = 1,
+	T2_USB_MODE_CEA2011_3PIN = 2,
+};
+
+struct twl4030_usb_data {
+	enum twl4030_usb_mode	usb_mode;
+};
+
+struct twl4030_ins {
+	u16 pmb_message;
+	u8 delay;
+};
+
+struct twl4030_script {
+	struct twl4030_ins *script;
+	unsigned size;
+	u8 flags;
+#define TWL4030_WRST_SCRIPT	(1<<0)
+#define TWL4030_WAKEUP12_SCRIPT	(1<<1)
+#define TWL4030_WAKEUP3_SCRIPT	(1<<2)
+#define TWL4030_SLEEP_SCRIPT	(1<<3)
+};
+
+struct twl4030_resconfig {
+	u8 resource;
+	u8 devgroup;	/* Processor group that Power resource belongs to */
+	u8 type;	/* Power resource addressed, 6 / broadcast message */
+	u8 type2;	/* Power resource addressed, 3 / broadcast message */
+	u8 remap_off;	/* off state remapping */
+	u8 remap_sleep;	/* sleep state remapping */
+};
+
+struct twl4030_power_data {
+	struct twl4030_script **scripts;
+	unsigned num;
+	struct twl4030_resconfig *resource_config;
+#define TWL4030_RESCONFIG_UNDEF	((u8)-1)
+};
+
+extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
+
+struct twl4030_codec_audio_data {
+	unsigned int	audio_mclk;
+	unsigned int ramp_delay_value;
+	unsigned int hs_extmute:1;
+	void (*set_hs_extmute)(int mute);
+};
+
+struct twl4030_codec_vibra_data {
+	unsigned int	audio_mclk;
+	unsigned int	coexist;
+};
+
+struct twl4030_codec_data {
+	unsigned int	audio_mclk;
+	struct twl4030_codec_audio_data		*audio;
+	struct twl4030_codec_vibra_data		*vibra;
+};
+
+struct twl4030_platform_data {
+	unsigned				irq_base, irq_end;
+	struct twl4030_clock_init_data		*clock;
+	struct twl4030_bci_platform_data	*bci;
+	struct twl4030_gpio_platform_data	*gpio;
+	struct twl4030_madc_platform_data	*madc;
+	struct twl4030_keypad_data		*keypad;
+	struct twl4030_usb_data			*usb;
+	struct twl4030_power_data		*power;
+	struct twl4030_codec_data		*codec;
+
+	/* LDO regulators */
+	struct regulator_init_data		*vdac;
+	struct regulator_init_data		*vpll1;
+	struct regulator_init_data		*vpll2;
+	struct regulator_init_data		*vmmc1;
+	struct regulator_init_data		*vmmc2;
+	struct regulator_init_data		*vsim;
+	struct regulator_init_data		*vaux1;
+	struct regulator_init_data		*vaux2;
+	struct regulator_init_data		*vaux3;
+	struct regulator_init_data		*vaux4;
+	struct regulator_init_data		*vio;
+	struct regulator_init_data		*vdd1;
+	struct regulator_init_data		*vdd2;
+	struct regulator_init_data		*vintana1;
+	struct regulator_init_data		*vintana2;
+	struct regulator_init_data		*vintdig;
+};
+
+/*----------------------------------------------------------------------*/
+
+int twl4030_sih_setup(int module);
+
+/* Offsets to Power Registers */
+#define TWL4030_VDAC_DEV_GRP		0x3B
+#define TWL4030_VDAC_DEDICATED		0x3E
+#define TWL4030_VAUX1_DEV_GRP		0x17
+#define TWL4030_VAUX1_DEDICATED		0x1A
+#define TWL4030_VAUX2_DEV_GRP		0x1B
+#define TWL4030_VAUX2_DEDICATED		0x1E
+#define TWL4030_VAUX3_DEV_GRP		0x1F
+#define TWL4030_VAUX3_DEDICATED		0x22
+
+#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
+	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
+	extern int twl4030charger_usb_en(int enable);
+#else
+	static inline int twl4030charger_usb_en(int enable) { return 0; }
+#endif
+
+/*----------------------------------------------------------------------*/
+
+/* Linux-specific regulator identifiers ... for now, we only support
+ * the LDOs, and leave the three buck converters alone.  VDD1 and VDD2
+ * need to tie into hardware based voltage scaling (cpufreq etc), while
+ * VIO is generally fixed.
+ */
+
+/* EXTERNAL dc-to-dc buck converters */
+#define TWL4030_REG_VDD1	0
+#define TWL4030_REG_VDD2	1
+#define TWL4030_REG_VIO		2
+
+/* EXTERNAL LDOs */
+#define TWL4030_REG_VDAC	3
+#define TWL4030_REG_VPLL1	4
+#define TWL4030_REG_VPLL2	5	/* not on all chips */
+#define TWL4030_REG_VMMC1	6
+#define TWL4030_REG_VMMC2	7	/* not on all chips */
+#define TWL4030_REG_VSIM	8	/* not on all chips */
+#define TWL4030_REG_VAUX1	9	/* not on all chips */
+#define TWL4030_REG_VAUX2_4030	10	/* (twl4030-specific) */
+#define TWL4030_REG_VAUX2	11	/* (twl5030 and newer) */
+#define TWL4030_REG_VAUX3	12	/* not on all chips */
+#define TWL4030_REG_VAUX4	13	/* not on all chips */
+
+/* INTERNAL LDOs */
+#define TWL4030_REG_VINTANA1	14
+#define TWL4030_REG_VINTANA2	15
+#define TWL4030_REG_VINTDIG	16
+#define TWL4030_REG_VUSB1V5	17
+#define TWL4030_REG_VUSB1V8	18
+#define TWL4030_REG_VUSB3V1	19
+
+#endif /* End of __TWL4030_H */
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
deleted file mode 100644
index a50bcf8a4048..000000000000
--- a/include/linux/i2c/twl4030.h
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * twl4030.h - header for TWL4030 PM and audio CODEC device
- *
- * Copyright (C) 2005-2006 Texas Instruments, Inc.
- *
- * Based on tlv320aic23.c:
- * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- */
-
-#ifndef __TWL4030_H_
-#define __TWL4030_H_
-
-#include <linux/types.h>
-#include <linux/input/matrix_keypad.h>
-
-/*
- * Using the twl4030 core we address registers using a pair
- *	{ module id, relative register offset }
- * which that core then maps to the relevant
- *	{ i2c slave, absolute register address }
- *
- * The module IDs are meaningful only to the twl4030 core code,
- * which uses them as array indices to look up the first register
- * address each module uses within a given i2c slave.
- */
-
-/* Slave 0 (i2c address 0x48) */
-#define TWL4030_MODULE_USB		0x00
-
-/* Slave 1 (i2c address 0x49) */
-#define TWL4030_MODULE_AUDIO_VOICE	0x01
-#define TWL4030_MODULE_GPIO		0x02
-#define TWL4030_MODULE_INTBR		0x03
-#define TWL4030_MODULE_PIH		0x04
-#define TWL4030_MODULE_TEST		0x05
-
-/* Slave 2 (i2c address 0x4a) */
-#define TWL4030_MODULE_KEYPAD		0x06
-#define TWL4030_MODULE_MADC		0x07
-#define TWL4030_MODULE_INTERRUPTS	0x08
-#define TWL4030_MODULE_LED		0x09
-#define TWL4030_MODULE_MAIN_CHARGE	0x0A
-#define TWL4030_MODULE_PRECHARGE	0x0B
-#define TWL4030_MODULE_PWM0		0x0C
-#define TWL4030_MODULE_PWM1		0x0D
-#define TWL4030_MODULE_PWMA		0x0E
-#define TWL4030_MODULE_PWMB		0x0F
-
-#define TWL5031_MODULE_ACCESSORY	0x10
-#define TWL5031_MODULE_INTERRUPTS	0x11
-
-/* Slave 3 (i2c address 0x4b) */
-#define TWL4030_MODULE_BACKUP		0x12
-#define TWL4030_MODULE_INT		0x13
-#define TWL4030_MODULE_PM_MASTER	0x14
-#define TWL4030_MODULE_PM_RECEIVER	0x15
-#define TWL4030_MODULE_RTC		0x16
-#define TWL4030_MODULE_SECURED_REG	0x17
-
-/*
- * Read and write single 8-bit registers
- */
-int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
-int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
-
-/*
- * Read and write several 8-bit registers at once.
- *
- * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
- * for the value, and populate your data starting at offset 1.
- */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
-
-/*----------------------------------------------------------------------*/
-
-/*
- * NOTE:  at up to 1024 registers, this is a big chip.
- *
- * Avoid putting register declarations in this file, instead of into
- * a driver-private file, unless some of the registers in a block
- * need to be shared with other drivers.  One example is blocks that
- * have Secondary IRQ Handler (SIH) registers.
- */
-
-#define TWL4030_SIH_CTRL_EXCLEN_MASK	BIT(0)
-#define TWL4030_SIH_CTRL_PENDDIS_MASK	BIT(1)
-#define TWL4030_SIH_CTRL_COR_MASK	BIT(2)
-
-/*----------------------------------------------------------------------*/
-
-/*
- * GPIO Block Register offsets (use TWL4030_MODULE_GPIO)
- */
-
-#define REG_GPIODATAIN1			0x0
-#define REG_GPIODATAIN2			0x1
-#define REG_GPIODATAIN3			0x2
-#define REG_GPIODATADIR1		0x3
-#define REG_GPIODATADIR2		0x4
-#define REG_GPIODATADIR3		0x5
-#define REG_GPIODATAOUT1		0x6
-#define REG_GPIODATAOUT2		0x7
-#define REG_GPIODATAOUT3		0x8
-#define REG_CLEARGPIODATAOUT1		0x9
-#define REG_CLEARGPIODATAOUT2		0xA
-#define REG_CLEARGPIODATAOUT3		0xB
-#define REG_SETGPIODATAOUT1		0xC
-#define REG_SETGPIODATAOUT2		0xD
-#define REG_SETGPIODATAOUT3		0xE
-#define REG_GPIO_DEBEN1			0xF
-#define REG_GPIO_DEBEN2			0x10
-#define REG_GPIO_DEBEN3			0x11
-#define REG_GPIO_CTRL			0x12
-#define REG_GPIOPUPDCTR1		0x13
-#define REG_GPIOPUPDCTR2		0x14
-#define REG_GPIOPUPDCTR3		0x15
-#define REG_GPIOPUPDCTR4		0x16
-#define REG_GPIOPUPDCTR5		0x17
-#define REG_GPIO_ISR1A			0x19
-#define REG_GPIO_ISR2A			0x1A
-#define REG_GPIO_ISR3A			0x1B
-#define REG_GPIO_IMR1A			0x1C
-#define REG_GPIO_IMR2A			0x1D
-#define REG_GPIO_IMR3A			0x1E
-#define REG_GPIO_ISR1B			0x1F
-#define REG_GPIO_ISR2B			0x20
-#define REG_GPIO_ISR3B			0x21
-#define REG_GPIO_IMR1B			0x22
-#define REG_GPIO_IMR2B			0x23
-#define REG_GPIO_IMR3B			0x24
-#define REG_GPIO_EDR1			0x28
-#define REG_GPIO_EDR2			0x29
-#define REG_GPIO_EDR3			0x2A
-#define REG_GPIO_EDR4			0x2B
-#define REG_GPIO_EDR5			0x2C
-#define REG_GPIO_SIH_CTRL		0x2D
-
-/* Up to 18 signals are available as GPIOs, when their
- * pins are not assigned to another use (such as ULPI/USB).
- */
-#define TWL4030_GPIO_MAX		18
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
- * ... SIH/interrupt only
- */
-
-#define TWL4030_KEYPAD_KEYP_ISR1	0x11
-#define TWL4030_KEYPAD_KEYP_IMR1	0x12
-#define TWL4030_KEYPAD_KEYP_ISR2	0x13
-#define TWL4030_KEYPAD_KEYP_IMR2	0x14
-#define TWL4030_KEYPAD_KEYP_SIR		0x15	/* test register */
-#define TWL4030_KEYPAD_KEYP_EDR		0x16
-#define TWL4030_KEYPAD_KEYP_SIH_CTRL	0x17
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Multichannel ADC register offsets (use TWL4030_MODULE_MADC)
- * ... SIH/interrupt only
- */
-
-#define TWL4030_MADC_ISR1		0x61
-#define TWL4030_MADC_IMR1		0x62
-#define TWL4030_MADC_ISR2		0x63
-#define TWL4030_MADC_IMR2		0x64
-#define TWL4030_MADC_SIR		0x65	/* test register */
-#define TWL4030_MADC_EDR		0x66
-#define TWL4030_MADC_SIH_CTRL		0x67
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS)
- */
-
-#define TWL4030_INTERRUPTS_BCIISR1A	0x0
-#define TWL4030_INTERRUPTS_BCIISR2A	0x1
-#define TWL4030_INTERRUPTS_BCIIMR1A	0x2
-#define TWL4030_INTERRUPTS_BCIIMR2A	0x3
-#define TWL4030_INTERRUPTS_BCIISR1B	0x4
-#define TWL4030_INTERRUPTS_BCIISR2B	0x5
-#define TWL4030_INTERRUPTS_BCIIMR1B	0x6
-#define TWL4030_INTERRUPTS_BCIIMR2B	0x7
-#define TWL4030_INTERRUPTS_BCISIR1	0x8	/* test register */
-#define TWL4030_INTERRUPTS_BCISIR2	0x9	/* test register */
-#define TWL4030_INTERRUPTS_BCIEDR1	0xa
-#define TWL4030_INTERRUPTS_BCIEDR2	0xb
-#define TWL4030_INTERRUPTS_BCIEDR3	0xc
-#define TWL4030_INTERRUPTS_BCISIHCTRL	0xd
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Power Interrupt block register offsets (use TWL4030_MODULE_INT)
- */
-
-#define TWL4030_INT_PWR_ISR1		0x0
-#define TWL4030_INT_PWR_IMR1		0x1
-#define TWL4030_INT_PWR_ISR2		0x2
-#define TWL4030_INT_PWR_IMR2		0x3
-#define TWL4030_INT_PWR_SIR		0x4	/* test register */
-#define TWL4030_INT_PWR_EDR1		0x5
-#define TWL4030_INT_PWR_EDR2		0x6
-#define TWL4030_INT_PWR_SIH_CTRL	0x7
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Accessory Interrupts
- */
-#define TWL5031_ACIIMR_LSB		0x05
-#define TWL5031_ACIIMR_MSB		0x06
-#define TWL5031_ACIIDR_LSB		0x07
-#define TWL5031_ACIIDR_MSB		0x08
-#define TWL5031_ACCISR1			0x0F
-#define TWL5031_ACCIMR1			0x10
-#define TWL5031_ACCISR2			0x11
-#define TWL5031_ACCIMR2			0x12
-#define TWL5031_ACCSIR			0x13
-#define TWL5031_ACCEDR1			0x14
-#define TWL5031_ACCSIHCTRL		0x15
-
-/*----------------------------------------------------------------------*/
-
-/*
- * Battery Charger Controller
- */
-
-#define TWL5031_INTERRUPTS_BCIISR1	0x0
-#define TWL5031_INTERRUPTS_BCIIMR1	0x1
-#define TWL5031_INTERRUPTS_BCIISR2	0x2
-#define TWL5031_INTERRUPTS_BCIIMR2	0x3
-#define TWL5031_INTERRUPTS_BCISIR	0x4
-#define TWL5031_INTERRUPTS_BCIEDR1	0x5
-#define TWL5031_INTERRUPTS_BCIEDR2	0x6
-#define TWL5031_INTERRUPTS_BCISIHCTRL	0x7
-
-/*----------------------------------------------------------------------*/
-
-/* Power bus message definitions */
-
-/* The TWL4030/5030 splits its power-management resources (the various
- * regulators, clock and reset lines) into 3 processor groups - P1, P2 and
- * P3. These groups can then be configured to transition between sleep, wait-on
- * and active states by sending messages to the power bus.  See Section 5.4.2
- * Power Resources of TWL4030 TRM
- */
-
-/* Processor groups */
-#define DEV_GRP_NULL		0x0
-#define DEV_GRP_P1		0x1	/* P1: all OMAP devices */
-#define DEV_GRP_P2		0x2	/* P2: all Modem devices */
-#define DEV_GRP_P3		0x4	/* P3: all peripheral devices */
-
-/* Resource groups */
-#define RES_GRP_RES		0x0	/* Reserved */
-#define RES_GRP_PP		0x1	/* Power providers */
-#define RES_GRP_RC		0x2	/* Reset and control */
-#define RES_GRP_PP_RC		0x3
-#define RES_GRP_PR		0x4	/* Power references */
-#define RES_GRP_PP_PR		0x5
-#define RES_GRP_RC_PR		0x6
-#define RES_GRP_ALL		0x7	/* All resource groups */
-
-#define RES_TYPE2_R0		0x0
-
-#define RES_TYPE_ALL		0x7
-
-/* Resource states */
-#define RES_STATE_WRST		0xF
-#define RES_STATE_ACTIVE	0xE
-#define RES_STATE_SLEEP		0x8
-#define RES_STATE_OFF		0x0
-
-/* Power resources */
-
-/* Power providers */
-#define RES_VAUX1               1
-#define RES_VAUX2               2
-#define RES_VAUX3               3
-#define RES_VAUX4               4
-#define RES_VMMC1               5
-#define RES_VMMC2               6
-#define RES_VPLL1               7
-#define RES_VPLL2               8
-#define RES_VSIM                9
-#define RES_VDAC                10
-#define RES_VINTANA1            11
-#define RES_VINTANA2            12
-#define RES_VINTDIG             13
-#define RES_VIO                 14
-#define RES_VDD1                15
-#define RES_VDD2                16
-#define RES_VUSB_1V5            17
-#define RES_VUSB_1V8            18
-#define RES_VUSB_3V1            19
-#define RES_VUSBCP              20
-#define RES_REGEN               21
-/* Reset and control */
-#define RES_NRES_PWRON          22
-#define RES_CLKEN               23
-#define RES_SYSEN               24
-#define RES_HFCLKOUT            25
-#define RES_32KCLKOUT           26
-#define RES_RESET               27
-/* Power Reference */
-#define RES_Main_Ref            28
-
-#define TOTAL_RESOURCES		28
-/*
- * Power Bus Message Format ... these can be sent individually by Linux,
- * but are usually part of downloaded scripts that are run when various
- * power events are triggered.
- *
- *  Broadcast Message (16 Bits):
- *    DEV_GRP[15:13] MT[12]  RES_GRP[11:9]  RES_TYPE2[8:7] RES_TYPE[6:4]
- *    RES_STATE[3:0]
- *
- *  Singular Message (16 Bits):
- *    DEV_GRP[15:13] MT[12]  RES_ID[11:4]  RES_STATE[3:0]
- */
-
-#define MSG_BROADCAST(devgrp, grp, type, type2, state) \
-	( (devgrp) << 13 | 1 << 12 | (grp) << 9 | (type2) << 7 \
-	| (type) << 4 | (state))
-
-#define MSG_SINGULAR(devgrp, id, state) \
-	((devgrp) << 13 | 0 << 12 | (id) << 4 | (state))
-
-/*----------------------------------------------------------------------*/
-
-struct twl4030_clock_init_data {
-	bool ck32k_lowpwr_enable;
-};
-
-struct twl4030_bci_platform_data {
-	int *battery_tmp_tbl;
-	unsigned int tblsize;
-};
-
-/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */
-struct twl4030_gpio_platform_data {
-	int		gpio_base;
-	unsigned	irq_base, irq_end;
-
-	/* package the two LED signals as output-only GPIOs? */
-	bool		use_leds;
-
-	/* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
-	u8		mmc_cd;
-
-	/* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */
-	u32		debounce;
-
-	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
-	 * should be enabled.  Else, if that bit is set in "pulldowns",
-	 * that pulldown is enabled.  Don't waste power by letting any
-	 * digital inputs float...
-	 */
-	u32		pullups;
-	u32		pulldowns;
-
-	int		(*setup)(struct device *dev,
-				unsigned gpio, unsigned ngpio);
-	int		(*teardown)(struct device *dev,
-				unsigned gpio, unsigned ngpio);
-};
-
-struct twl4030_madc_platform_data {
-	int		irq_line;
-};
-
-/* Boards have uniqe mappings of {row, col} --> keycode.
- * Column and row are 8 bits each, but range only from 0..7.
- * a PERSISTENT_KEY is "always on" and never reported.
- */
-#define PERSISTENT_KEY(r, c)	KEY((r), (c), KEY_RESERVED)
-
-struct twl4030_keypad_data {
-	const struct matrix_keymap_data *keymap_data;
-	unsigned rows;
-	unsigned cols;
-	bool rep;
-};
-
-enum twl4030_usb_mode {
-	T2_USB_MODE_ULPI = 1,
-	T2_USB_MODE_CEA2011_3PIN = 2,
-};
-
-struct twl4030_usb_data {
-	enum twl4030_usb_mode	usb_mode;
-};
-
-struct twl4030_ins {
-	u16 pmb_message;
-	u8 delay;
-};
-
-struct twl4030_script {
-	struct twl4030_ins *script;
-	unsigned size;
-	u8 flags;
-#define TWL4030_WRST_SCRIPT	(1<<0)
-#define TWL4030_WAKEUP12_SCRIPT	(1<<1)
-#define TWL4030_WAKEUP3_SCRIPT	(1<<2)
-#define TWL4030_SLEEP_SCRIPT	(1<<3)
-};
-
-struct twl4030_resconfig {
-	u8 resource;
-	u8 devgroup;	/* Processor group that Power resource belongs to */
-	u8 type;	/* Power resource addressed, 6 / broadcast message */
-	u8 type2;	/* Power resource addressed, 3 / broadcast message */
-	u8 remap_off;	/* off state remapping */
-	u8 remap_sleep;	/* sleep state remapping */
-};
-
-struct twl4030_power_data {
-	struct twl4030_script **scripts;
-	unsigned num;
-	struct twl4030_resconfig *resource_config;
-#define TWL4030_RESCONFIG_UNDEF	((u8)-1)
-};
-
-extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
-
-struct twl4030_codec_audio_data {
-	unsigned int	audio_mclk;
-	unsigned int ramp_delay_value;
-	unsigned int hs_extmute:1;
-	void (*set_hs_extmute)(int mute);
-};
-
-struct twl4030_codec_vibra_data {
-	unsigned int	audio_mclk;
-	unsigned int	coexist;
-};
-
-struct twl4030_codec_data {
-	unsigned int	audio_mclk;
-	struct twl4030_codec_audio_data		*audio;
-	struct twl4030_codec_vibra_data		*vibra;
-};
-
-struct twl4030_platform_data {
-	unsigned				irq_base, irq_end;
-	struct twl4030_clock_init_data		*clock;
-	struct twl4030_bci_platform_data	*bci;
-	struct twl4030_gpio_platform_data	*gpio;
-	struct twl4030_madc_platform_data	*madc;
-	struct twl4030_keypad_data		*keypad;
-	struct twl4030_usb_data			*usb;
-	struct twl4030_power_data		*power;
-	struct twl4030_codec_data		*codec;
-
-	/* LDO regulators */
-	struct regulator_init_data		*vdac;
-	struct regulator_init_data		*vpll1;
-	struct regulator_init_data		*vpll2;
-	struct regulator_init_data		*vmmc1;
-	struct regulator_init_data		*vmmc2;
-	struct regulator_init_data		*vsim;
-	struct regulator_init_data		*vaux1;
-	struct regulator_init_data		*vaux2;
-	struct regulator_init_data		*vaux3;
-	struct regulator_init_data		*vaux4;
-	struct regulator_init_data		*vio;
-	struct regulator_init_data		*vdd1;
-	struct regulator_init_data		*vdd2;
-	struct regulator_init_data		*vintana1;
-	struct regulator_init_data		*vintana2;
-	struct regulator_init_data		*vintdig;
-};
-
-/*----------------------------------------------------------------------*/
-
-int twl4030_sih_setup(int module);
-
-/* Offsets to Power Registers */
-#define TWL4030_VDAC_DEV_GRP		0x3B
-#define TWL4030_VDAC_DEDICATED		0x3E
-#define TWL4030_VAUX1_DEV_GRP		0x17
-#define TWL4030_VAUX1_DEDICATED		0x1A
-#define TWL4030_VAUX2_DEV_GRP		0x1B
-#define TWL4030_VAUX2_DEDICATED		0x1E
-#define TWL4030_VAUX3_DEV_GRP		0x1F
-#define TWL4030_VAUX3_DEDICATED		0x22
-
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-	extern int twl4030charger_usb_en(int enable);
-#else
-	static inline int twl4030charger_usb_en(int enable) { return 0; }
-#endif
-
-/*----------------------------------------------------------------------*/
-
-/* Linux-specific regulator identifiers ... for now, we only support
- * the LDOs, and leave the three buck converters alone.  VDD1 and VDD2
- * need to tie into hardware based voltage scaling (cpufreq etc), while
- * VIO is generally fixed.
- */
-
-/* EXTERNAL dc-to-dc buck converters */
-#define TWL4030_REG_VDD1	0
-#define TWL4030_REG_VDD2	1
-#define TWL4030_REG_VIO		2
-
-/* EXTERNAL LDOs */
-#define TWL4030_REG_VDAC	3
-#define TWL4030_REG_VPLL1	4
-#define TWL4030_REG_VPLL2	5	/* not on all chips */
-#define TWL4030_REG_VMMC1	6
-#define TWL4030_REG_VMMC2	7	/* not on all chips */
-#define TWL4030_REG_VSIM	8	/* not on all chips */
-#define TWL4030_REG_VAUX1	9	/* not on all chips */
-#define TWL4030_REG_VAUX2_4030	10	/* (twl4030-specific) */
-#define TWL4030_REG_VAUX2	11	/* (twl5030 and newer) */
-#define TWL4030_REG_VAUX3	12	/* not on all chips */
-#define TWL4030_REG_VAUX4	13	/* not on all chips */
-
-/* INTERNAL LDOs */
-#define TWL4030_REG_VINTANA1	14
-#define TWL4030_REG_VINTANA2	15
-#define TWL4030_REG_VINTDIG	16
-#define TWL4030_REG_VUSB1V5	17
-#define TWL4030_REG_VUSB1V8	18
-#define TWL4030_REG_VUSB3V1	19
-
-#endif /* End of __TWL4030_H */
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 5f1681f6ca76..c3a6ceb542cb 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -26,7 +26,7 @@
 #include <linux/pm.h>
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
-#include <linux/i2c/twl4030.h>
+#include <linux/i2c/twl.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-- 
cgit v1.2.3


From fc7b92fca4e546184557f1c53f84ad57c66b7695 Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Sun, 13 Dec 2009 21:23:33 +0100
Subject: mfd: Rename all twl4030_i2c*

This patch renames function names like twl4030_i2c_write_u8,
twl4030_i2c_read_u8 to twl_i2c_write_u8, twl_i2c_read_u8
and also common variable in twl-core.c

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/twl4030-gpio.c             |  18 ++---
 drivers/input/keyboard/twl4030_keypad.c |   4 +-
 drivers/input/misc/twl4030-pwrbutton.c  |   2 +-
 drivers/mfd/twl-core.c                  | 136 +++++++++++++++++---------------
 drivers/mfd/twl4030-irq.c               |  18 ++---
 drivers/mfd/twl4030-power.c             |  68 ++++++++--------
 drivers/regulator/twl-regulator.c       |   8 +-
 drivers/rtc/rtc-twl.c                   |  14 ++--
 drivers/usb/otg/twl4030-usb.c           |  36 ++++-----
 drivers/video/omap/lcd_2430sdp.c        |   2 +-
 drivers/watchdog/twl4030_wdt.c          |   2 +-
 include/linux/i2c/twl.h                 |  31 ++++++--
 sound/soc/codecs/twl4030.c              |   8 +-
 13 files changed, 187 insertions(+), 160 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
index a320d7bfe67c..7fe881e2bdfb 100644
--- a/drivers/gpio/twl4030-gpio.c
+++ b/drivers/gpio/twl4030-gpio.c
@@ -80,7 +80,7 @@ static unsigned int gpio_usage_count;
  */
 static inline int gpio_twl4030_write(u8 address, u8 data)
 {
-	return twl4030_i2c_write_u8(TWL4030_MODULE_GPIO, data, address);
+	return twl_i2c_write_u8(TWL4030_MODULE_GPIO, data, address);
 }
 
 /*----------------------------------------------------------------------*/
@@ -117,7 +117,7 @@ static inline int gpio_twl4030_read(u8 address)
 	u8 data;
 	int ret = 0;
 
-	ret = twl4030_i2c_read_u8(TWL4030_MODULE_GPIO, &data, address);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_GPIO, &data, address);
 	return (ret < 0) ? ret : data;
 }
 
@@ -142,7 +142,7 @@ static void twl4030_led_set_value(int led, int value)
 		cached_leden &= ~mask;
 	else
 		cached_leden |= mask;
-	status = twl4030_i2c_write_u8(TWL4030_MODULE_LED, cached_leden,
+	status = twl_i2c_write_u8(TWL4030_MODULE_LED, cached_leden,
 			TWL4030_LED_LEDEN);
 	mutex_unlock(&gpio_lock);
 }
@@ -223,23 +223,23 @@ static int twl_request(struct gpio_chip *chip, unsigned offset)
 		}
 
 		/* initialize PWM to always-drive */
-		status = twl4030_i2c_write_u8(module, 0x7f,
+		status = twl_i2c_write_u8(module, 0x7f,
 				TWL4030_PWMx_PWMxOFF);
 		if (status < 0)
 			goto done;
-		status = twl4030_i2c_write_u8(module, 0x7f,
+		status = twl_i2c_write_u8(module, 0x7f,
 				TWL4030_PWMx_PWMxON);
 		if (status < 0)
 			goto done;
 
 		/* init LED to not-driven (high) */
 		module = TWL4030_MODULE_LED;
-		status = twl4030_i2c_read_u8(module, &cached_leden,
+		status = twl_i2c_read_u8(module, &cached_leden,
 				TWL4030_LED_LEDEN);
 		if (status < 0)
 			goto done;
 		cached_leden &= ~ledclr_mask;
-		status = twl4030_i2c_write_u8(module, cached_leden,
+		status = twl_i2c_write_u8(module, cached_leden,
 				TWL4030_LED_LEDEN);
 		if (status < 0)
 			goto done;
@@ -370,7 +370,7 @@ static int __devinit gpio_twl4030_pulls(u32 ups, u32 downs)
 		message[i] = bit_mask;
 	}
 
-	return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
+	return twl_i2c_write(TWL4030_MODULE_GPIO, message,
 				REG_GPIOPUPDCTR1, 5);
 }
 
@@ -387,7 +387,7 @@ static int __devinit gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
 	debounce >>= 8;
 	message[3] = (debounce & 0x03);
 
-	return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
+	return twl_i2c_write(TWL4030_MODULE_GPIO, message,
 				REG_GPIO_DEBEN1, 3);
 }
 
diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c
index 1d1536a2fe46..eeaa7acb9cfc 100644
--- a/drivers/input/keyboard/twl4030_keypad.c
+++ b/drivers/input/keyboard/twl4030_keypad.c
@@ -133,7 +133,7 @@ struct twl4030_keypad {
 static int twl4030_kpread(struct twl4030_keypad *kp,
 		u8 *data, u32 reg, u8 num_bytes)
 {
-	int ret = twl4030_i2c_read(TWL4030_MODULE_KEYPAD, data, reg, num_bytes);
+	int ret = twl_i2c_read(TWL4030_MODULE_KEYPAD, data, reg, num_bytes);
 
 	if (ret < 0)
 		dev_warn(kp->dbg_dev,
@@ -145,7 +145,7 @@ static int twl4030_kpread(struct twl4030_keypad *kp,
 
 static int twl4030_kpwrite_u8(struct twl4030_keypad *kp, u8 data, u32 reg)
 {
-	int ret = twl4030_i2c_write_u8(TWL4030_MODULE_KEYPAD, data, reg);
+	int ret = twl_i2c_write_u8(TWL4030_MODULE_KEYPAD, data, reg);
 
 	if (ret < 0)
 		dev_warn(kp->dbg_dev,
diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c
index a73b889fff79..bdde5c889035 100644
--- a/drivers/input/misc/twl4030-pwrbutton.c
+++ b/drivers/input/misc/twl4030-pwrbutton.c
@@ -49,7 +49,7 @@ static irqreturn_t powerbutton_irq(int irq, void *_pwr)
 	local_irq_enable();
 #endif
 
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &value,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &value,
 				  STS_HW_CONDITIONS);
 	if (!err)  {
 		input_report_key(pwr, KEY_POWER, value & PWR_PWRON_IRQ);
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 44714f5cf495..9021f44de2a4 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -1,5 +1,6 @@
 /*
- * twl4030_core.c - driver for TWL4030/TPS659x0 PM and audio CODEC devices
+ * twl_core.c - driver for TWL4030/TWL5030/TWL60X0/TPS659x0 PM
+ * and audio CODEC devices
  *
  * Copyright (C) 2005-2006 Texas Instruments, Inc.
  *
@@ -55,7 +56,7 @@
  * (and associated registers).
  */
 
-#define DRIVER_NAME			"twl4030"
+#define DRIVER_NAME			"twl"
 
 #if defined(CONFIG_TWL4030_BCI_BATTERY) || \
 	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
@@ -125,7 +126,7 @@
 /* Last - for index max*/
 #define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
 
-#define TWL4030_NUM_SLAVES		4
+#define TWL_NUM_SLAVES		4
 
 #if defined(CONFIG_INPUT_TWL4030_PWRBUTTON) \
 	|| defined(CONFIG_INPUT_TWL4030_PWBUTTON_MODULE)
@@ -134,6 +135,13 @@
 #define twl_has_pwrbutton()	false
 #endif
 
+#define SUB_CHIP_ID0 0
+#define SUB_CHIP_ID1 1
+#define SUB_CHIP_ID2 2
+#define SUB_CHIP_ID3 3
+
+#define TWL_MODULE_LAST TWL4030_MODULE_LAST
+
 /* Base Address defns for twl4030_map[] */
 
 /* subchip/slave 0 - USB ID */
@@ -201,7 +209,7 @@
 static bool inuse;
 
 /* Structure for each TWL4030 Slave */
-struct twl4030_client {
+struct twl_client {
 	struct i2c_client *client;
 	u8 address;
 
@@ -212,16 +220,16 @@ struct twl4030_client {
 	struct mutex xfer_lock;
 };
 
-static struct twl4030_client twl4030_modules[TWL4030_NUM_SLAVES];
+static struct twl_client twl_modules[TWL_NUM_SLAVES];
 
 
 /* mapping the module id to slave id and base address */
-struct twl4030mapping {
+struct twl_mapping {
 	unsigned char sid;	/* Slave ID */
 	unsigned char base;	/* base address */
 };
 
-static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
 	/*
 	 * NOTE:  don't change this table without updating the
 	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
@@ -262,7 +270,7 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
 /* Exported Functions */
 
 /**
- * twl4030_i2c_write - Writes a n bit register in TWL4030
+ * twl_i2c_write - Writes a n bit register in TWL4030/TWL5030/TWL60X0
  * @mod_no: module number
  * @value: an array of num_bytes+1 containing data to write
  * @reg: register address (just offset will do)
@@ -273,19 +281,19 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
  *
  * Returns the result of operation - 0 is success
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
+int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	int sid;
-	struct twl4030_client *twl;
+	struct twl_client *twl;
 	struct i2c_msg *msg;
 
-	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+	if (unlikely(mod_no > TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
 	sid = twl4030_map[mod_no].sid;
-	twl = &twl4030_modules[sid];
+	twl = &twl_modules[sid];
 
 	if (unlikely(!inuse)) {
 		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
@@ -318,10 +326,10 @@ int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(twl4030_i2c_write);
+EXPORT_SYMBOL(twl_i2c_write);
 
 /**
- * twl4030_i2c_read - Reads a n bit register in TWL4030
+ * twl_i2c_read - Reads a n bit register in TWL4030/TWL5030/TWL60X0
  * @mod_no: module number
  * @value: an array of num_bytes containing data to be read
  * @reg: register address (just offset will do)
@@ -329,20 +337,20 @@ EXPORT_SYMBOL(twl4030_i2c_write);
  *
  * Returns result of operation - num_bytes is success else failure.
  */
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
+int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	u8 val;
 	int sid;
-	struct twl4030_client *twl;
+	struct twl_client *twl;
 	struct i2c_msg *msg;
 
-	if (unlikely(mod_no > TWL4030_MODULE_LAST)) {
+	if (unlikely(mod_no > TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
 	sid = twl4030_map[mod_no].sid;
-	twl = &twl4030_modules[sid];
+	twl = &twl_modules[sid];
 
 	if (unlikely(!inuse)) {
 		pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid);
@@ -377,40 +385,40 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(twl4030_i2c_read);
+EXPORT_SYMBOL(twl_i2c_read);
 
 /**
- * twl4030_i2c_write_u8 - Writes a 8 bit register in TWL4030
+ * twl_i2c_write_u8 - Writes a 8 bit register in TWL4030/TWL5030/TWL60X0
  * @mod_no: module number
  * @value: the value to be written 8 bit
  * @reg: register address (just offset will do)
  *
  * Returns result of operation - 0 is success
  */
-int twl4030_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
+int twl_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
 {
 
 	/* 2 bytes offset 1 contains the data offset 0 is used by i2c_write */
 	u8 temp_buffer[2] = { 0 };
 	/* offset 1 contains the data */
 	temp_buffer[1] = value;
-	return twl4030_i2c_write(mod_no, temp_buffer, reg, 1);
+	return twl_i2c_write(mod_no, temp_buffer, reg, 1);
 }
-EXPORT_SYMBOL(twl4030_i2c_write_u8);
+EXPORT_SYMBOL(twl_i2c_write_u8);
 
 /**
- * twl4030_i2c_read_u8 - Reads a 8 bit register from TWL4030
+ * twl_i2c_read_u8 - Reads a 8 bit register from TWL4030/TWL5030/TWL60X0
  * @mod_no: module number
  * @value: the value read 8 bit
  * @reg: register address (just offset will do)
  *
  * Returns result of operation - 0 is success
  */
-int twl4030_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
+int twl_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
 {
-	return twl4030_i2c_read(mod_no, value, reg, 1);
+	return twl_i2c_read(mod_no, value, reg, 1);
 }
-EXPORT_SYMBOL(twl4030_i2c_read_u8);
+EXPORT_SYMBOL(twl_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
@@ -420,7 +428,7 @@ add_numbered_child(unsigned chip, const char *name, int num,
 		bool can_wakeup, int irq0, int irq1)
 {
 	struct platform_device	*pdev;
-	struct twl4030_client	*twl = &twl4030_modules[chip];
+	struct twl_client	*twl = &twl_modules[chip];
 	int			status;
 
 	pdev = platform_device_alloc(name, num);
@@ -515,23 +523,24 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 				pdata->bci, sizeof(*pdata->bci),
 				false,
 				/* irq0 = CHG_PRES, irq1 = BCI */
-				pdata->irq_base + 8 + 1, pdata->irq_base + 2);
+				pdata->irq_base + BCI_PRES_INTR_OFFSET,
+				pdata->irq_base + BCI_INTR_OFFSET);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
 	if (twl_has_gpio() && pdata->gpio) {
-		child = add_child(1, "twl4030_gpio",
+		child = add_child(SUB_CHIP_ID1, "twl4030_gpio",
 				pdata->gpio, sizeof(*pdata->gpio),
-				false, pdata->irq_base + 0, 0);
+				false, pdata->irq_base + GPIO_INTR_OFFSET, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
 	if (twl_has_keypad() && pdata->keypad) {
-		child = add_child(2, "twl4030_keypad",
+		child = add_child(SUB_CHIP_ID2, "twl4030_keypad",
 				pdata->keypad, sizeof(*pdata->keypad),
-				true, pdata->irq_base + 1, 0);
+				true, pdata->irq_base + KEYPAD_INTR_OFFSET, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
@@ -539,7 +548,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	if (twl_has_madc() && pdata->madc) {
 		child = add_child(2, "twl4030_madc",
 				pdata->madc, sizeof(*pdata->madc),
-				true, pdata->irq_base + 3, 0);
+				true, pdata->irq_base + MADC_INTR_OFFSET, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
@@ -554,7 +563,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		 */
 		child = add_child(3, "twl4030_rtc",
 				NULL, 0,
-				true, pdata->irq_base + 8 + 3, 0);
+				true, pdata->irq_base + RTC_INTR_OFFSET, 0);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
@@ -604,7 +613,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 				pdata->usb, sizeof(*pdata->usb),
 				true,
 				/* irq0 = USB_PRES, irq1 = USB */
-				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
+				pdata->irq_base + USB_PRES_INTR_OFFSET,
+				pdata->irq_base + USB_INTR_OFFSET);
 
 		if (IS_ERR(child))
 			return PTR_ERR(child);
@@ -724,7 +734,7 @@ static inline int __init protect_pm_master(void)
 {
 	int e = 0;
 
-	e = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_LOCK,
+	e = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, KEY_LOCK,
 			R_PROTECT_KEY);
 	return e;
 }
@@ -733,9 +743,9 @@ static inline int __init unprotect_pm_master(void)
 {
 	int e = 0;
 
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK1,
+	e |= twl_i2c_write_u8(TWL_MODULE_PM_MASTER, KEY_UNLOCK1,
 			R_PROTECT_KEY);
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_UNLOCK2,
+	e |= twl_i2c_write_u8(TWL_MODULE_PM_MASTER, KEY_UNLOCK2,
 			R_PROTECT_KEY);
 	return e;
 }
@@ -755,7 +765,7 @@ static void clocks_init(struct device *dev,
 		osc = clk_get(dev, "osc_sys_ck");
 
 	if (IS_ERR(osc)) {
-		printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+		printk(KERN_WARNING "Skipping twl internal clock init and "
 				"using bootloader value (unknown osc rate)\n");
 		return;
 	}
@@ -769,7 +779,7 @@ static void clocks_init(struct device *dev,
 	 */
 	osc = ERR_PTR(-EIO);
 
-	printk(KERN_WARNING "Skipping twl4030 internal clock init and "
+	printk(KERN_WARNING "Skipping twl internal clock init and "
 	       "using bootloader value (unknown osc rate)\n");
 
 	return;
@@ -793,7 +803,7 @@ static void clocks_init(struct device *dev,
 
 	e |= unprotect_pm_master();
 	/* effect->MADC+USB ck en */
-	e |= twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
+	e |= twl_i2c_write_u8(TWL_MODULE_PM_MASTER, ctrl, R_CFG_BOOT);
 	e |= protect_pm_master();
 
 	if (e < 0)
@@ -806,7 +816,7 @@ int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
 int twl_exit_irq(void);
 int twl_init_chip_irq(const char *chip);
 
-static int twl4030_remove(struct i2c_client *client)
+static int twl_remove(struct i2c_client *client)
 {
 	unsigned i;
 	int status;
@@ -815,12 +825,12 @@ static int twl4030_remove(struct i2c_client *client)
 	if (status < 0)
 		return status;
 
-	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
-		struct twl4030_client	*twl = &twl4030_modules[i];
+	for (i = 0; i < TWL_NUM_SLAVES; i++) {
+		struct twl_client	*twl = &twl_modules[i];
 
 		if (twl->client && twl->client != client)
 			i2c_unregister_device(twl->client);
-		twl4030_modules[i].client = NULL;
+		twl_modules[i].client = NULL;
 	}
 	inuse = false;
 	return 0;
@@ -828,7 +838,7 @@ static int twl4030_remove(struct i2c_client *client)
 
 /* NOTE:  this driver only handles a single twl4030/tps659x0 chip */
 static int __init
-twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
+twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	int				status;
 	unsigned			i;
@@ -849,8 +859,8 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return -EBUSY;
 	}
 
-	for (i = 0; i < TWL4030_NUM_SLAVES; i++) {
-		struct twl4030_client	*twl = &twl4030_modules[i];
+	for (i = 0; i < TWL_NUM_SLAVES; i++) {
+		struct twl_client	*twl = &twl_modules[i];
 
 		twl->address = client->addr + i;
 		if (i == 0)
@@ -889,11 +899,11 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	status = add_children(pdata, id->driver_data);
 fail:
 	if (status < 0)
-		twl4030_remove(client);
+		twl_remove(client);
 	return status;
 }
 
-static const struct i2c_device_id twl4030_ids[] = {
+static const struct i2c_device_id twl_ids[] = {
 	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
 	{ "twl5030", 0 },		/* T2 updated */
 	{ "twl5031", TWL5031 },		/* TWL5030 updated */
@@ -902,28 +912,28 @@ static const struct i2c_device_id twl4030_ids[] = {
 	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
 	{ /* end of list */ },
 };
-MODULE_DEVICE_TABLE(i2c, twl4030_ids);
+MODULE_DEVICE_TABLE(i2c, twl_ids);
 
 /* One Client Driver , 4 Clients */
-static struct i2c_driver twl4030_driver = {
+static struct i2c_driver twl_driver = {
 	.driver.name	= DRIVER_NAME,
-	.id_table	= twl4030_ids,
-	.probe		= twl4030_probe,
-	.remove		= twl4030_remove,
+	.id_table	= twl_ids,
+	.probe		= twl_probe,
+	.remove		= twl_remove,
 };
 
-static int __init twl4030_init(void)
+static int __init twl_init(void)
 {
-	return i2c_add_driver(&twl4030_driver);
+	return i2c_add_driver(&twl_driver);
 }
-subsys_initcall(twl4030_init);
+subsys_initcall(twl_init);
 
-static void __exit twl4030_exit(void)
+static void __exit twl_exit(void)
 {
-	i2c_del_driver(&twl4030_driver);
+	i2c_del_driver(&twl_driver);
 }
-module_exit(twl4030_exit);
+module_exit(twl_exit);
 
 MODULE_AUTHOR("Texas Instruments, Inc.");
-MODULE_DESCRIPTION("I2C Core interface for TWL4030");
+MODULE_DESCRIPTION("I2C Core interface for TWL");
 MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index c4528db549c6..5a62cf916987 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -296,7 +296,7 @@ static int twl4030_irq_thread(void *data)
 		/* Wait for IRQ, then read PIH irq status (also blocking) */
 		wait_for_completion_interruptible(&irq_event);
 
-		ret = twl4030_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
+		ret = twl_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
 					  REG_PIH_ISR_P1);
 		if (ret) {
 			pr_warning("twl4030: I2C error %d reading PIH ISR\n",
@@ -396,7 +396,7 @@ static int twl4030_init_sih_modules(unsigned line)
 		if (sih->irq_lines <= line)
 			continue;
 
-		status = twl4030_i2c_write(sih->module, buf,
+		status = twl_i2c_write(sih->module, buf,
 				sih->mask[line].imr_offset, sih->bytes_ixr);
 		if (status < 0)
 			pr_err("twl4030: err %d initializing %s %s\n",
@@ -410,7 +410,7 @@ static int twl4030_init_sih_modules(unsigned line)
 		 * And for PWR_INT it's not documented...
 		 */
 		if (sih->set_cor) {
-			status = twl4030_i2c_write_u8(sih->module,
+			status = twl_i2c_write_u8(sih->module,
 					TWL4030_SIH_CTRL_COR_MASK,
 					sih->control_offset);
 			if (status < 0)
@@ -438,14 +438,14 @@ static int twl4030_init_sih_modules(unsigned line)
 		 * uncommon with PWR_INT.PWRON.
 		 */
 		for (j = 0; j < 2; j++) {
-			status = twl4030_i2c_read(sih->module, rxbuf,
+			status = twl_i2c_read(sih->module, rxbuf,
 				sih->mask[line].isr_offset, sih->bytes_ixr);
 			if (status < 0)
 				pr_err("twl4030: err %d initializing %s %s\n",
 					status, sih->name, "ISR");
 
 			if (!sih->set_cor)
-				status = twl4030_i2c_write(sih->module, buf,
+				status = twl_i2c_write(sih->module, buf,
 					sih->mask[line].isr_offset,
 					sih->bytes_ixr);
 			/* else COR=1 means read sufficed.
@@ -514,7 +514,7 @@ static void twl4030_sih_do_mask(struct work_struct *work)
 		return;
 
 	/* write the whole mask ... simpler than subsetting it */
-	status = twl4030_i2c_write(sih->module, imr.bytes,
+	status = twl_i2c_write(sih->module, imr.bytes,
 			sih->mask[irq_line].imr_offset, sih->bytes_ixr);
 	if (status)
 		pr_err("twl4030: %s, %s --> %d\n", __func__,
@@ -545,7 +545,7 @@ static void twl4030_sih_do_edge(struct work_struct *work)
 	 * any processor on the other IRQ line, EDR registers are
 	 * shared.
 	 */
-	status = twl4030_i2c_read(sih->module, bytes + 1,
+	status = twl_i2c_read(sih->module, bytes + 1,
 			sih->edr_offset, sih->bytes_edr);
 	if (status) {
 		pr_err("twl4030: %s, %s --> %d\n", __func__,
@@ -579,7 +579,7 @@ static void twl4030_sih_do_edge(struct work_struct *work)
 	}
 
 	/* Write */
-	status = twl4030_i2c_write(sih->module, bytes,
+	status = twl_i2c_write(sih->module, bytes,
 			sih->edr_offset, sih->bytes_edr);
 	if (status)
 		pr_err("twl4030: %s, %s --> %d\n", __func__,
@@ -664,7 +664,7 @@ static inline int sih_read_isr(const struct sih *sih)
 	/* FIXME need retry-on-error ... */
 
 	isr.word = 0;
-	status = twl4030_i2c_read(sih->module, isr.bytes,
+	status = twl_i2c_read(sih->module, isr.bytes,
 			sih->mask[irq_line].isr_offset, sih->bytes_ixr);
 
 	return (status < 0) ? status : le32_to_cpu(isr.word);
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 424b255d6f92..0815292fdafc 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -131,11 +131,11 @@ static int __init twl4030_write_script_byte(u8 address, u8 byte)
 {
 	int err;
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
 				R_MEMORY_ADDRESS);
 	if (err)
 		goto out;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, byte,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, byte,
 				R_MEMORY_DATA);
 out:
 	return err;
@@ -192,18 +192,18 @@ static int __init twl4030_config_wakeup3_sequence(u8 address)
 	u8 data;
 
 	/* Set SLEEP to ACTIVE SEQ address for P3 */
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
 				R_SEQ_ADD_S2A3);
 	if (err)
 		goto out;
 
 	/* P3 LVL_WAKEUP should be on LEVEL */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
 				R_P3_SW_EVENTS);
 	if (err)
 		goto out;
 	data |= LVL_WAKEUP;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
 				R_P3_SW_EVENTS);
 out:
 	if (err)
@@ -217,42 +217,42 @@ static int __init twl4030_config_wakeup12_sequence(u8 address)
 	u8 data;
 
 	/* Set SLEEP to ACTIVE SEQ address for P1 and P2 */
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
 				R_SEQ_ADD_S2A12);
 	if (err)
 		goto out;
 
 	/* P1/P2 LVL_WAKEUP should be on LEVEL */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
 				R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
 	data |= LVL_WAKEUP;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
 				R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
 				R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	data |= LVL_WAKEUP;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
 				R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	if (machine_is_omap_3430sdp() || machine_is_omap_ldp()) {
 		/* Disabling AC charger effect on sleep-active transitions */
-		err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
 					R_CFG_P1_TRANSITION);
 		if (err)
 			goto out;
 		data &= ~(1<<1);
-		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data ,
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data ,
 					R_CFG_P1_TRANSITION);
 		if (err)
 			goto out;
@@ -270,7 +270,7 @@ static int __init twl4030_config_sleep_sequence(u8 address)
 	int err;
 
 	/* Set ACTIVE to SLEEP SEQ address in T2 memory*/
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
 				R_SEQ_ADD_A2S);
 
 	if (err)
@@ -285,41 +285,41 @@ static int __init twl4030_config_warmreset_sequence(u8 address)
 	u8 rd_data;
 
 	/* Set WARM RESET SEQ address for P1 */
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
 				R_SEQ_ADD_WARM);
 	if (err)
 		goto out;
 
 	/* P1/P2/P3 enable WARMRESET */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
 				R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
 				R_P1_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
 				R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
 				R_P2_SW_EVENTS);
 	if (err)
 		goto out;
 
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
 				R_P3_SW_EVENTS);
 	if (err)
 		goto out;
 
 	rd_data |= ENABLE_WARMRESET;
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
 				R_P3_SW_EVENTS);
 out:
 	if (err)
@@ -344,8 +344,8 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 	rconfig_addr = res_config_addrs[rconfig->resource];
 
 	/* Set resource group */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &grp,
-				rconfig_addr + DEV_GRP_OFFSET);
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &grp,
+			      rconfig_addr + DEV_GRP_OFFSET);
 	if (err) {
 		pr_err("TWL4030 Resource %d group could not be read\n",
 			rconfig->resource);
@@ -355,8 +355,8 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 	if (rconfig->devgroup != TWL4030_RESCONFIG_UNDEF) {
 		grp &= ~DEV_GRP_MASK;
 		grp |= rconfig->devgroup << DEV_GRP_SHIFT;
-		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
-					grp, rconfig_addr + DEV_GRP_OFFSET);
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+				       grp, rconfig_addr + DEV_GRP_OFFSET);
 		if (err < 0) {
 			pr_err("TWL4030 failed to program devgroup\n");
 			return err;
@@ -364,7 +364,7 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 	}
 
 	/* Set resource types */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &type,
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &type,
 				rconfig_addr + TYPE_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 Resource %d type could not be read\n",
@@ -382,7 +382,7 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 		type |= rconfig->type2 << TYPE2_SHIFT;
 	}
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
 				type, rconfig_addr + TYPE_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 failed to program resource type\n");
@@ -390,8 +390,8 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 	}
 
 	/* Set remap states */
-	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &remap,
-				rconfig_addr + REMAP_OFFSET);
+	err = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &remap,
+			      rconfig_addr + REMAP_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 Resource %d remap could not be read\n",
 			rconfig->resource);
@@ -408,9 +408,9 @@ static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
 		remap |= rconfig->remap_off << SLEEP_STATE_SHIFT;
 	}
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
-				remap,
-				rconfig_addr + REMAP_OFFSET);
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+			       remap,
+			       rconfig_addr + REMAP_OFFSET);
 	if (err < 0) {
 		pr_err("TWL4030 failed to program remap\n");
 		return err;
@@ -468,12 +468,12 @@ void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 	struct twl4030_resconfig *resconfig;
 	u8 address = twl4030_start_script_address;
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_1,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_1,
 				R_PROTECT_KEY);
 	if (err)
 		goto unlock;
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_2,
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_2,
 				R_PROTECT_KEY);
 	if (err)
 		goto unlock;
@@ -496,7 +496,7 @@ void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 		}
 	}
 
-	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
 	if (err)
 		pr_err("TWL4030 Unable to relock registers\n");
 	return;
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index c8a6e583d773..8cc46e99ccca 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -64,7 +64,7 @@ twl4030reg_read(struct twlreg_info *info, unsigned offset)
 	u8 value;
 	int status;
 
-	status = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER,
+	status = twl_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER,
 			&value, info->base + offset);
 	return (status < 0) ? status : value;
 }
@@ -72,7 +72,7 @@ twl4030reg_read(struct twlreg_info *info, unsigned offset)
 static inline int
 twl4030reg_write(struct twlreg_info *info, unsigned offset, u8 value)
 {
-	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+	return twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
 			value, info->base + offset);
 }
 
@@ -171,12 +171,12 @@ static int twl4030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
 	if (!(status & (P3_GRP | P2_GRP | P1_GRP)))
 		return -EACCES;
 
-	status = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+	status = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
 			message >> 8, 0x15 /* PB_WORD_MSB */ );
 	if (status >= 0)
 		return status;
 
-	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+	return twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
 			message, 0x16 /* PB_WORD_LSB */ );
 }
 
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 93565be12fae..6119712cc8de 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -92,7 +92,7 @@ static int twl4030_rtc_read_u8(u8 *data, u8 reg)
 {
 	int ret;
 
-	ret = twl4030_i2c_read_u8(TWL4030_MODULE_RTC, data, reg);
+	ret = twl_i2c_read_u8(TWL4030_MODULE_RTC, data, reg);
 	if (ret < 0)
 		pr_err("twl4030_rtc: Could not read TWL4030"
 		       "register %X - error %d\n", reg, ret);
@@ -106,7 +106,7 @@ static int twl4030_rtc_write_u8(u8 data, u8 reg)
 {
 	int ret;
 
-	ret = twl4030_i2c_write_u8(TWL4030_MODULE_RTC, data, reg);
+	ret = twl_i2c_write_u8(TWL4030_MODULE_RTC, data, reg);
 	if (ret < 0)
 		pr_err("twl4030_rtc: Could not write TWL4030"
 		       "register %X - error %d\n", reg, ret);
@@ -201,7 +201,7 @@ static int twl4030_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if (ret < 0)
 		return ret;
 
-	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+	ret = twl_i2c_read(TWL4030_MODULE_RTC, rtc_data,
 			       REG_SECONDS_REG, ALL_TIME_REGS);
 
 	if (ret < 0) {
@@ -243,7 +243,7 @@ static int twl4030_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		goto out;
 
 	/* update all the time registers in one shot */
-	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, rtc_data,
+	ret = twl_i2c_write(TWL4030_MODULE_RTC, rtc_data,
 			REG_SECONDS_REG, ALL_TIME_REGS);
 	if (ret < 0) {
 		dev_err(dev, "rtc_set_time error %d\n", ret);
@@ -266,7 +266,7 @@ static int twl4030_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	unsigned char rtc_data[ALL_TIME_REGS + 1];
 	int ret;
 
-	ret = twl4030_i2c_read(TWL4030_MODULE_RTC, rtc_data,
+	ret = twl_i2c_read(TWL4030_MODULE_RTC, rtc_data,
 			       REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
 	if (ret < 0) {
 		dev_err(dev, "rtc_read_alarm error %d\n", ret);
@@ -305,7 +305,7 @@ static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	alarm_data[6] = bin2bcd(alm->time.tm_year - 100);
 
 	/* update all the alarm registers in one shot */
-	ret = twl4030_i2c_write(TWL4030_MODULE_RTC, alarm_data,
+	ret = twl_i2c_write(TWL4030_MODULE_RTC, alarm_data,
 			REG_ALARM_SECONDS_REG, ALL_TIME_REGS);
 	if (ret) {
 		dev_err(dev, "rtc_set_alarm error %d\n", ret);
@@ -363,7 +363,7 @@ static irqreturn_t twl4030_rtc_interrupt(int irq, void *rtc)
 	 * risk wrongly clearing status for some other IRQ (losing
 	 * the interrupt).  Be smarter about handling RTC_UF ...
 	 */
-	res = twl4030_i2c_read_u8(TWL4030_MODULE_INT,
+	res = twl_i2c_read_u8(TWL4030_MODULE_INT,
 			&rd_reg, TWL4030_INT_PWR_ISR1);
 	if (res)
 		goto out;
diff --git a/drivers/usb/otg/twl4030-usb.c b/drivers/usb/otg/twl4030-usb.c
index 3acbdb82bcf9..2be9f2fa41f9 100644
--- a/drivers/usb/otg/twl4030-usb.c
+++ b/drivers/usb/otg/twl4030-usb.c
@@ -276,16 +276,16 @@ static int twl4030_i2c_write_u8_verify(struct twl4030_usb *twl,
 {
 	u8 check;
 
-	if ((twl4030_i2c_write_u8(module, data, address) >= 0) &&
-	    (twl4030_i2c_read_u8(module, &check, address) >= 0) &&
+	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
+	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
 						(check == data))
 		return 0;
 	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
 			1, module, address, check, data);
 
 	/* Failed once: Try again */
-	if ((twl4030_i2c_write_u8(module, data, address) >= 0) &&
-	    (twl4030_i2c_read_u8(module, &check, address) >= 0) &&
+	if ((twl_i2c_write_u8(module, data, address) >= 0) &&
+	    (twl_i2c_read_u8(module, &check, address) >= 0) &&
 						(check == data))
 		return 0;
 	dev_dbg(twl->dev, "Write%d[%d,0x%x] wrote %02x but read %02x\n",
@@ -303,7 +303,7 @@ static inline int twl4030_usb_write(struct twl4030_usb *twl,
 {
 	int ret = 0;
 
-	ret = twl4030_i2c_write_u8(TWL4030_MODULE_USB, data, address);
+	ret = twl_i2c_write_u8(TWL4030_MODULE_USB, data, address);
 	if (ret < 0)
 		dev_dbg(twl->dev,
 			"TWL4030:USB:Write[0x%x] Error %d\n", address, ret);
@@ -315,7 +315,7 @@ static inline int twl4030_readb(struct twl4030_usb *twl, u8 module, u8 address)
 	u8 data;
 	int ret = 0;
 
-	ret = twl4030_i2c_read_u8(module, &data, address);
+	ret = twl_i2c_read_u8(module, &data, address);
 	if (ret >= 0)
 		ret = data;
 	else
@@ -462,7 +462,7 @@ static void twl4030_phy_power(struct twl4030_usb *twl, int on)
 		 * SLEEP. We work around this by clearing the bit after usv3v1
 		 * is re-activated. This ensures that VUSB3V1 is really active.
 		 */
-		twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0,
+		twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0,
 							VUSB_DEDICATED2);
 		regulator_enable(twl->usb1v5);
 		pwr &= ~PHY_PWR_PHYPWD;
@@ -505,44 +505,44 @@ static void twl4030_phy_resume(struct twl4030_usb *twl)
 static int twl4030_usb_ldo_init(struct twl4030_usb *twl)
 {
 	/* Enable writing to power configuration registers */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0xC0, PROTECT_KEY);
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0x0C, PROTECT_KEY);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0xC0, PROTECT_KEY);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0x0C, PROTECT_KEY);
 
 	/* put VUSB3V1 LDO in active state */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB_DEDICATED2);
 
 	/* input to VUSB3V1 LDO is from VBAT, not VBUS */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0x14, VUSB_DEDICATED1);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0x14, VUSB_DEDICATED1);
 
 	/* Initialize 3.1V regulator */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB3V1_DEV_GRP);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB3V1_DEV_GRP);
 
 	twl->usb3v1 = regulator_get(twl->dev, "usb3v1");
 	if (IS_ERR(twl->usb3v1))
 		return -ENODEV;
 
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB3V1_TYPE);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB3V1_TYPE);
 
 	/* Initialize 1.5V regulator */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V5_DEV_GRP);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V5_DEV_GRP);
 
 	twl->usb1v5 = regulator_get(twl->dev, "usb1v5");
 	if (IS_ERR(twl->usb1v5))
 		goto fail1;
 
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V5_TYPE);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V5_TYPE);
 
 	/* Initialize 1.8V regulator */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V8_DEV_GRP);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V8_DEV_GRP);
 
 	twl->usb1v8 = regulator_get(twl->dev, "usb1v8");
 	if (IS_ERR(twl->usb1v8))
 		goto fail2;
 
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V8_TYPE);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, 0, VUSB1V8_TYPE);
 
 	/* disable access to power configuration registers */
-	twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, PROTECT_KEY);
+	twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, PROTECT_KEY);
 
 	return 0;
 
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
index 3764a36d9142..e3eccc9af78e 100644
--- a/drivers/video/omap/lcd_2430sdp.c
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -52,7 +52,7 @@ static unsigned enable_gpio;
 #define TWL4030_VPLL2_DEV_GRP           0x33
 #define TWL4030_VPLL2_DEDICATED         0x36
 
-#define t2_out(c, r, v) twl4030_i2c_write_u8(c, r, v)
+#define t2_out(c, r, v) twl_i2c_write_u8(c, r, v)
 
 
 static int sdp2430_panel_init(struct lcd_panel *panel,
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 20968b2aadef..8162a40d1522 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -48,7 +48,7 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
 
 static int twl4030_wdt_write(unsigned char val)
 {
-	return twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, val,
+	return twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, val,
 					TWL4030_WATCHDOG_CFG_REG_OFFS);
 }
 
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index a50bcf8a4048..0f812f5aa723 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __TWL4030_H_
-#define __TWL4030_H_
+#ifndef __TWL_H_
+#define __TWL_H_
 
 #include <linux/types.h>
 #include <linux/input/matrix_keypad.h>
@@ -72,20 +72,37 @@
 #define TWL4030_MODULE_RTC		0x16
 #define TWL4030_MODULE_SECURED_REG	0x17
 
+#define TWL_MODULE_USB		TWL4030_MODULE_USB
+#define TWL_MODULE_AUDIO_VOICE	TWL4030_MODULE_AUDIO_VOICE
+#define TWL_MODULE_PIH		TWL4030_MODULE_PIH
+#define TWL_MODULE_MADC		TWL4030_MODULE_MADC
+#define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
+#define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
+#define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
+#define TWL_MODULE_RTC		TWL4030_MODULE_RTC
+
+#define GPIO_INTR_OFFSET	0
+#define KEYPAD_INTR_OFFSET	1
+#define BCI_INTR_OFFSET		2
+#define MADC_INTR_OFFSET	3
+#define USB_INTR_OFFSET		4
+#define BCI_PRES_INTR_OFFSET	9
+#define USB_PRES_INTR_OFFSET	10
+#define RTC_INTR_OFFSET		11
 /*
  * Read and write single 8-bit registers
  */
-int twl4030_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
-int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
+int twl_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
+int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
 
 /*
  * Read and write several 8-bit registers at once.
  *
- * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
+ * IMPORTANT:  For twl_i2c_write(), allocate num_bytes + 1
  * for the value, and populate your data starting at offset 1.
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /*----------------------------------------------------------------------*/
 
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c3a6ceb542cb..2a27f7b56726 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -175,7 +175,7 @@ static int twl4030_write(struct snd_soc_codec *codec,
 {
 	twl4030_write_reg_cache(codec, reg, value);
 	if (likely(reg < TWL4030_REG_SW_SHADOW))
-		return twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value,
+		return twl_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value,
 					    reg);
 	else
 		return 0;
@@ -261,7 +261,7 @@ static void twl4030_power_up(struct snd_soc_codec *codec)
 	do {
 		/* this takes a little while, so don't slam i2c */
 		udelay(2000);
-		twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &byte,
+		twl_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &byte,
 				    TWL4030_REG_ANAMICL);
 	} while ((i++ < 100) &&
 		 ((byte & TWL4030_CNCL_OFFSET_START) ==
@@ -542,7 +542,7 @@ static int pin_name##pga_event(struct snd_soc_dapm_widget *w,		\
 		break;							\
 	case SND_SOC_DAPM_POST_PMD:					\
 		reg_val = twl4030_read_reg_cache(w->codec, reg);	\
-		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,	\
+		twl_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,	\
 					reg_val & (~mask),		\
 					reg);				\
 		break;							\
@@ -679,7 +679,7 @@ static void headset_ramp(struct snd_soc_codec *codec, int ramp)
 		mdelay((ramp_base[(hs_pop & TWL4030_RAMP_DELAY) >> 2] /
 			twl4030->sysclk) + 1);
 		/* Bypass the reg_cache to mute the headset */
-		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+		twl_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
 					hs_gain & (~0x0f),
 					TWL4030_REG_HS_GAIN_SET);
 
-- 
cgit v1.2.3


From e8deb28ca8e221de0239eafb3c3d431d8854278e Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Mon, 14 Dec 2009 00:25:31 +0100
Subject: mfd: Add support for twl6030 irq framework

This patch adds support for phoenix interrupt framework. New iInterrupt
status register A, B, C are introduced in Phoenix and are cleared on write.
Due to the differences in interrupt handling with respect to TWL4030,
twl6030-irq.c is created for TWL6030 PMIC

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/plat-omap/include/plat/irqs.h |  16 +-
 drivers/mfd/Kconfig                    |   4 +-
 drivers/mfd/Makefile                   |   2 +-
 drivers/mfd/twl-core.c                 | 120 +++++++++++--
 drivers/mfd/twl4030-irq.c              |   6 +-
 drivers/mfd/twl6030-irq.c              | 299 +++++++++++++++++++++++++++++++++
 include/linux/i2c/twl.h                |  64 +++++++
 7 files changed, 490 insertions(+), 21 deletions(-)
 create mode 100644 drivers/mfd/twl6030-irq.c

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/include/plat/irqs.h b/arch/arm/plat-omap/include/plat/irqs.h
index ce5dd2d1dc21..97d6c50c3dcb 100644
--- a/arch/arm/plat-omap/include/plat/irqs.h
+++ b/arch/arm/plat-omap/include/plat/irqs.h
@@ -472,8 +472,22 @@
 #endif
 #define TWL4030_GPIO_IRQ_END	(TWL4030_GPIO_IRQ_BASE + TWL4030_GPIO_NR_IRQS)
 
+#define	TWL6030_IRQ_BASE	(OMAP_FPGA_IRQ_END)
+#ifdef CONFIG_TWL4030_CORE
+#define	TWL6030_BASE_NR_IRQS	20
+#else
+#define	TWL6030_BASE_NR_IRQS	0
+#endif
+#define TWL6030_IRQ_END		(TWL6030_IRQ_BASE + TWL6030_BASE_NR_IRQS)
+
 /* Total number of interrupts depends on the enabled blocks above */
-#define NR_IRQS			TWL4030_GPIO_IRQ_END
+#if (TWL4030_GPIO_IRQ_END > TWL6030_IRQ_END)
+#define TWL_IRQ_END 		TWL4030_GPIO_IRQ_END
+#else
+#define TWL_IRQ_END		TWL6030_IRQ_END
+#endif
+
+#define NR_IRQS			TWL_IRQ_END
 
 #define OMAP_IRQ_BIT(irq)	(1 << ((irq) % 32))
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b23343cdc196..87829789243e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -103,10 +103,10 @@ config MENELAUS
 	  cell phones and PDAs.
 
 config TWL4030_CORE
-	bool "Texas Instruments TWL4030/TPS659x0 Support"
+	bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support"
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
-	  Say yes here if you have TWL4030 family chip on your board.
+	  Say yes here if you have TWL4030 / TWL6030 family chip on your board.
 	  This core driver provides register access and IRQ handling
 	  facilities, and registers devices for the various functions
 	  so that function-specific drivers can bind to them.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f4d14b7589bf..ca2f2c4ff05e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
-obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o
+obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_TWL4030_CODEC)	+= twl4030-codec.o
 
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 79946fe800af..c48a6138c575 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -181,6 +181,30 @@
 /* Triton Core internal information (END) */
 
 
+/* subchip/slave 0 0x48 - POWER */
+#define TWL6030_BASEADD_RTC		0x0000
+#define TWL6030_BASEADD_MEM		0x0017
+#define TWL6030_BASEADD_PM_MASTER	0x001F
+#define TWL6030_BASEADD_PM_SLAVE_MISC	0x0030 /* PM_RECEIVER */
+#define TWL6030_BASEADD_PM_MISC		0x00E2
+#define TWL6030_BASEADD_PM_PUPD		0x00F0
+
+/* subchip/slave 1 0x49 - FEATURE */
+#define TWL6030_BASEADD_USB		0x0000
+#define TWL6030_BASEADD_GPADC_CTRL	0x002E
+#define TWL6030_BASEADD_AUX		0x0090
+#define TWL6030_BASEADD_PWM		0x00BA
+#define TWL6030_BASEADD_GASGAUGE	0x00C0
+#define TWL6030_BASEADD_PIH		0x00D0
+#define TWL6030_BASEADD_CHARGER		0x00E0
+
+/* subchip/slave 2 0x4A - DFT */
+#define TWL6030_BASEADD_DIEID		0x00C0
+
+/* subchip/slave 3 0x4B - AUDIO */
+#define TWL6030_BASEADD_AUDIO		0x0000
+#define TWL6030_BASEADD_RSV		0x0000
+
 /* Few power values */
 #define R_CFG_BOOT			0x05
 #define R_PROTECT_KEY			0x0E
@@ -202,13 +226,21 @@
 #define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
 #define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
 #define TWL5031			BIT(2)  /* twl5031 has different registers */
+#define TWL6030_CLASS		BIT(3)	/* TWL6030 class */
 
 /*----------------------------------------------------------------------*/
 
 /* is driver active, bound to a chip? */
 static bool inuse;
 
-/* Structure for each TWL4030 Slave */
+static unsigned int twl_id;
+unsigned int twl_rev(void)
+{
+	return twl_id;
+}
+EXPORT_SYMBOL(twl_rev);
+
+/* Structure for each TWL4030/TWL6030 Slave */
 struct twl_client {
 	struct i2c_client *client;
 	u8 address;
@@ -228,11 +260,12 @@ struct twl_mapping {
 	unsigned char sid;	/* Slave ID */
 	unsigned char base;	/* base address */
 };
+struct twl_mapping *twl_map;
 
 static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
 	/*
 	 * NOTE:  don't change this table without updating the
-	 * <linux/i2c/twl4030.h> defines for TWL4030_MODULE_*
+	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
 	 * so they continue to match the order in this table.
 	 */
 
@@ -265,6 +298,40 @@ static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
 	{ 3, TWL4030_BASEADD_SECURED_REG },
 };
 
+static struct twl_mapping twl6030_map[] = {
+	/*
+	 * NOTE:  don't change this table without updating the
+	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
+	 * so they continue to match the order in this table.
+	 */
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_USB },
+	{ SUB_CHIP_ID3, TWL6030_BASEADD_AUDIO },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_DIEID },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_PIH },
+
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_GPADC_CTRL },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_CHARGER },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_GASGAUGE },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_PWM },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_MASTER },
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_SLAVE_MISC },
+
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_RTC },
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_MEM },
+};
+
 /*----------------------------------------------------------------------*/
 
 /* Exported Functions */
@@ -292,7 +359,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
-	sid = twl4030_map[mod_no].sid;
+	sid = twl_map[mod_no].sid;
 	twl = &twl_modules[sid];
 
 	if (unlikely(!inuse)) {
@@ -310,7 +377,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	msg->flags = 0;
 	msg->buf = value;
 	/* over write the first byte of buffer with the register address */
-	*value = twl4030_map[mod_no].base + reg;
+	*value = twl_map[mod_no].base + reg;
 	ret = i2c_transfer(twl->client->adapter, twl->xfer_msg, 1);
 	mutex_unlock(&twl->xfer_lock);
 
@@ -349,7 +416,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
-	sid = twl4030_map[mod_no].sid;
+	sid = twl_map[mod_no].sid;
 	twl = &twl_modules[sid];
 
 	if (unlikely(!inuse)) {
@@ -362,7 +429,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	msg->addr = twl->address;
 	msg->len = 1;
 	msg->flags = 0;	/* Read the register value */
-	val = twl4030_map[mod_no].base + reg;
+	val = twl_map[mod_no].base + reg;
 	msg->buf = &val;
 	/* [MSG2] fill the data rx buffer */
 	msg = &twl->xfer_msg[1];
@@ -486,6 +553,7 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 		struct regulator_consumer_supply *consumers,
 		unsigned num_consumers)
 {
+	unsigned sub_chip_id;
 	/* regulator framework demands init_data ... */
 	if (!pdata)
 		return NULL;
@@ -496,7 +564,8 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 	}
 
 	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
-	return add_numbered_child(3, "twl_reg", num,
+	sub_chip_id = twl_map[TWL_MODULE_PM_MASTER].sid;
+	return add_numbered_child(sub_chip_id, "twl_reg", num,
 		pdata, sizeof(*pdata), false, 0, 0);
 }
 
@@ -516,6 +585,7 @@ static int
 add_children(struct twl4030_platform_data *pdata, unsigned long features)
 {
 	struct device	*child;
+	unsigned sub_chip_id;
 
 	if (twl_has_bci() && pdata->bci &&
 	    !(features & (TPS_SUBSET | TWL5031))) {
@@ -561,7 +631,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		 * Eventually, Linux might become more aware of such
 		 * HW security concerns, and "least privilege".
 		 */
-		child = add_child(3, "twl_rtc",
+		sub_chip_id = twl_map[TWL_MODULE_RTC].sid;
+		child = add_child(sub_chip_id, "twl_rtc",
 				NULL, 0,
 				true, pdata->irq_base + RTC_INTR_OFFSET, 0);
 		if (IS_ERR(child))
@@ -812,16 +883,22 @@ static void clocks_init(struct device *dev,
 
 /*----------------------------------------------------------------------*/
 
-int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
-int twl_exit_irq(void);
-int twl_init_chip_irq(const char *chip);
+int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
+int twl4030_exit_irq(void);
+int twl4030_init_chip_irq(const char *chip);
+int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end);
+int twl6030_exit_irq(void);
 
 static int twl_remove(struct i2c_client *client)
 {
 	unsigned i;
 	int status;
 
-	status = twl_exit_irq();
+	if (twl_class_is_4030())
+		status = twl4030_exit_irq();
+	else
+		status = twl6030_exit_irq();
+
 	if (status < 0)
 		return status;
 
@@ -878,6 +955,13 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		mutex_init(&twl->xfer_lock);
 	}
 	inuse = true;
+	if ((id->driver_data) & TWL6030_CLASS) {
+		twl_id = TWL6030_CLASS_ID;
+		twl_map = &twl6030_map[0];
+	} else {
+		twl_id = TWL4030_CLASS_ID;
+		twl_map = &twl4030_map[0];
+	}
 
 	/* setup clock framework */
 	clocks_init(&client->dev, pdata->clock);
@@ -890,8 +974,15 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (client->irq
 			&& pdata->irq_base
 			&& pdata->irq_end > pdata->irq_base) {
-		twl_init_chip_irq(id->name);
-		status = twl_init_irq(client->irq, pdata->irq_base, pdata->irq_end);
+		if (twl_class_is_4030()) {
+			twl4030_init_chip_irq(id->name);
+			status = twl4030_init_irq(client->irq, pdata->irq_base,
+			pdata->irq_end);
+		} else {
+			status = twl6030_init_irq(client->irq, pdata->irq_base,
+			pdata->irq_end);
+		}
+
 		if (status < 0)
 			goto fail;
 	}
@@ -910,6 +1001,7 @@ static const struct i2c_device_id twl_ids[] = {
 	{ "tps65950", 0 },		/* catalog version of twl5030 */
 	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
 	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
+	{ "twl6030", TWL6030_CLASS },	/* "Phoenix power chip" */
 	{ /* end of list */ },
 };
 MODULE_DEVICE_TABLE(i2c, twl_ids);
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 5a62cf916987..20d29bafc9f5 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -778,7 +778,7 @@ int twl4030_sih_setup(int module)
 /* FIXME pass in which interrupt line we'll use ... */
 #define twl_irq_line	0
 
-int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
+int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 {
 	static struct irq_chip	twl4030_irq_chip;
 
@@ -858,7 +858,7 @@ fail:
 	return status;
 }
 
-int twl_exit_irq(void)
+int twl4030_exit_irq(void)
 {
 	/* FIXME undo twl_init_irq() */
 	if (twl4030_irq_base) {
@@ -868,7 +868,7 @@ int twl_exit_irq(void)
 	return 0;
 }
 
-int twl_init_chip_irq(const char *chip)
+int twl4030_init_chip_irq(const char *chip)
 {
 	if (!strcmp(chip, "twl5031")) {
 		sih_modules = sih_modules_twl5031;
diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
new file mode 100644
index 000000000000..10bf228ad626
--- /dev/null
+++ b/drivers/mfd/twl6030-irq.c
@@ -0,0 +1,299 @@
+/*
+ * twl6030-irq.c - TWL6030 irq support
+ *
+ * Copyright (C) 2005-2009 Texas Instruments, Inc.
+ *
+ * Modifications to defer interrupt handling to a kernel thread:
+ * Copyright (C) 2006 MontaVista Software, Inc.
+ *
+ * Based on tlv320aic23.c:
+ * Copyright (c) by Kai Svahn <kai.svahn@nokia.com>
+ *
+ * Code cleanup and modifications to IRQ handler.
+ * by syed khasim <x0khasim@ti.com>
+ *
+ * TWL6030 specific code and IRQ handling changes by
+ * Jagadeesh Bhaskar Pakaravoor <j-pakaravoor@ti.com>
+ * Balaji T K <balajitk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kthread.h>
+#include <linux/i2c/twl.h>
+
+/*
+ * TWL6030 (unlike its predecessors, which had two level interrupt handling)
+ * three interrupt registers INT_STS_A, INT_STS_B and INT_STS_C.
+ * It exposes status bits saying who has raised an interrupt. There are
+ * three mask registers that corresponds to these status registers, that
+ * enables/disables these interrupts.
+ *
+ * We set up IRQs starting at a platform-specified base. An interrupt map table,
+ * specifies mapping between interrupt number and the associated module.
+ *
+ */
+
+static int twl6030_interrupt_mapping[24] = {
+	PWR_INTR_OFFSET,	/* Bit 0	PWRON			*/
+	PWR_INTR_OFFSET,	/* Bit 1	RPWRON			*/
+	PWR_INTR_OFFSET,	/* Bit 2	BAT_VLOW		*/
+	RTC_INTR_OFFSET,	/* Bit 3	RTC_ALARM		*/
+	RTC_INTR_OFFSET,	/* Bit 4	RTC_PERIOD		*/
+	HOTDIE_INTR_OFFSET,	/* Bit 5	HOT_DIE			*/
+	SMPSLDO_INTR_OFFSET,	/* Bit 6	VXXX_SHORT		*/
+	SMPSLDO_INTR_OFFSET,	/* Bit 7	VMMC_SHORT		*/
+
+	SMPSLDO_INTR_OFFSET,	/* Bit 8	VUSIM_SHORT		*/
+	BATDETECT_INTR_OFFSET,	/* Bit 9	BAT			*/
+	SIMDETECT_INTR_OFFSET,	/* Bit 10	SIM			*/
+	MMCDETECT_INTR_OFFSET,	/* Bit 11	MMC			*/
+	RSV_INTR_OFFSET,  	/* Bit 12	Reserved		*/
+	MADC_INTR_OFFSET,	/* Bit 13	GPADC_RT_EOC		*/
+	MADC_INTR_OFFSET,	/* Bit 14	GPADC_SW_EOC		*/
+	GASGAUGE_INTR_OFFSET,	/* Bit 15	CC_AUTOCAL		*/
+
+	USBOTG_INTR_OFFSET,	/* Bit 16	ID_WKUP			*/
+	USBOTG_INTR_OFFSET,	/* Bit 17	VBUS_WKUP		*/
+	USBOTG_INTR_OFFSET,	/* Bit 18	ID			*/
+	USBOTG_INTR_OFFSET,	/* Bit 19	VBUS			*/
+	CHARGER_INTR_OFFSET,	/* Bit 20	CHRG_CTRL		*/
+	CHARGER_INTR_OFFSET,	/* Bit 21	EXT_CHRG		*/
+	CHARGER_INTR_OFFSET,	/* Bit 22	INT_CHRG		*/
+	RSV_INTR_OFFSET,	/* Bit 23	Reserved		*/
+};
+/*----------------------------------------------------------------------*/
+
+static unsigned twl6030_irq_base;
+
+static struct completion irq_event;
+
+/*
+ * This thread processes interrupts reported by the Primary Interrupt Handler.
+ */
+static int twl6030_irq_thread(void *data)
+{
+	long irq = (long)data;
+	static unsigned i2c_errors;
+	static const unsigned max_i2c_errors = 100;
+	int ret;
+
+	current->flags |= PF_NOFREEZE;
+
+	while (!kthread_should_stop()) {
+		int i;
+		union {
+		u8 bytes[4];
+		u32 int_sts;
+		} sts;
+
+		/* Wait for IRQ, then read PIH irq status (also blocking) */
+		wait_for_completion_interruptible(&irq_event);
+
+		/* read INT_STS_A, B and C in one shot using a burst read */
+		ret = twl_i2c_read(TWL_MODULE_PIH, sts.bytes,
+				REG_INT_STS_A, 3);
+		if (ret) {
+			pr_warning("twl6030: I2C error %d reading PIH ISR\n",
+					ret);
+			if (++i2c_errors >= max_i2c_errors) {
+				printk(KERN_ERR "Maximum I2C error count"
+						" exceeded.  Terminating %s.\n",
+						__func__);
+				break;
+			}
+			complete(&irq_event);
+			continue;
+		}
+
+
+
+		sts.bytes[3] = 0; /* Only 24 bits are valid*/
+
+		for (i = 0; sts.int_sts; sts.int_sts >>= 1, i++) {
+			local_irq_disable();
+			if (sts.int_sts & 0x1) {
+				int module_irq = twl6030_irq_base +
+					twl6030_interrupt_mapping[i];
+				struct irq_desc *d = irq_to_desc(module_irq);
+
+				if (!d) {
+					pr_err("twl6030: Invalid SIH IRQ: %d\n",
+					       module_irq);
+					return -EINVAL;
+				}
+
+				/* These can't be masked ... always warn
+				 * if we get any surprises.
+				 */
+				if (d->status & IRQ_DISABLED)
+					note_interrupt(module_irq, d,
+							IRQ_NONE);
+				else
+					d->handle_irq(module_irq, d);
+
+			}
+		local_irq_enable();
+		}
+		ret = twl_i2c_write(TWL_MODULE_PIH, sts.bytes,
+				REG_INT_STS_A, 3); /* clear INT_STS_A */
+		if (ret)
+			pr_warning("twl6030: I2C error in clearing PIH ISR\n");
+
+		enable_irq(irq);
+	}
+
+	return 0;
+}
+
+/*
+ * handle_twl6030_int() is the desc->handle method for the twl6030 interrupt.
+ * This is a chained interrupt, so there is no desc->action method for it.
+ * Now we need to query the interrupt controller in the twl6030 to determine
+ * which module is generating the interrupt request.  However, we can't do i2c
+ * transactions in interrupt context, so we must defer that work to a kernel
+ * thread.  All we do here is acknowledge and mask the interrupt and wakeup
+ * the kernel thread.
+ */
+static irqreturn_t handle_twl6030_pih(int irq, void *devid)
+{
+	disable_irq_nosync(irq);
+	complete(devid);
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static inline void activate_irq(int irq)
+{
+#ifdef CONFIG_ARM
+	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
+	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
+	 */
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	/* same effect on other architectures */
+	set_irq_noprobe(irq);
+#endif
+}
+
+/*----------------------------------------------------------------------*/
+
+static unsigned twl6030_irq_next;
+
+/*----------------------------------------------------------------------*/
+int twl6030_interrupt_unmask(u8 bit_mask, u8 offset)
+{
+	int ret;
+	u8 unmask_value;
+	ret = twl_i2c_read_u8(TWL_MODULE_PIH, &unmask_value,
+			REG_INT_STS_A + offset);
+	unmask_value &= (~(bit_mask));
+	ret |= twl_i2c_write_u8(TWL_MODULE_PIH, unmask_value,
+			REG_INT_STS_A + offset); /* unmask INT_MSK_A/B/C */
+	return ret;
+}
+EXPORT_SYMBOL(twl6030_interrupt_unmask);
+
+int twl6030_interrupt_mask(u8 bit_mask, u8 offset)
+{
+	int ret;
+	u8 mask_value;
+	ret = twl_i2c_read_u8(TWL_MODULE_PIH, &mask_value,
+			REG_INT_STS_A + offset);
+	mask_value |= (bit_mask);
+	ret |= twl_i2c_write_u8(TWL_MODULE_PIH, mask_value,
+			REG_INT_STS_A + offset); /* mask INT_MSK_A/B/C */
+	return ret;
+}
+EXPORT_SYMBOL(twl6030_interrupt_mask);
+
+int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
+{
+
+	int	status = 0;
+	int	i;
+	struct task_struct	*task;
+	int ret;
+	u8 mask[4];
+
+	static struct irq_chip	twl6030_irq_chip;
+	mask[1] = 0xFF;
+	mask[2] = 0xFF;
+	mask[3] = 0xFF;
+	ret = twl_i2c_write(TWL_MODULE_PIH, &mask[0],
+			REG_INT_MSK_LINE_A, 3); /* MASK ALL INT LINES */
+	ret = twl_i2c_write(TWL_MODULE_PIH, &mask[0],
+			REG_INT_MSK_STS_A, 3); /* MASK ALL INT STS */
+	ret = twl_i2c_write(TWL_MODULE_PIH, &mask[0],
+			REG_INT_STS_A, 3); /* clear INT_STS_A,B,C */
+
+	twl6030_irq_base = irq_base;
+
+	/* install an irq handler for each of the modules;
+	 * clone dummy irq_chip since PIH can't *do* anything
+	 */
+	twl6030_irq_chip = dummy_irq_chip;
+	twl6030_irq_chip.name = "twl6030";
+	twl6030_irq_chip.set_type = NULL;
+
+	for (i = irq_base; i < irq_end; i++) {
+		set_irq_chip_and_handler(i, &twl6030_irq_chip,
+				handle_simple_irq);
+		activate_irq(i);
+	}
+
+	twl6030_irq_next = i;
+	pr_info("twl6030: %s (irq %d) chaining IRQs %d..%d\n", "PIH",
+			irq_num, irq_base, twl6030_irq_next - 1);
+
+	/* install an irq handler to demultiplex the TWL6030 interrupt */
+	init_completion(&irq_event);
+	task = kthread_run(twl6030_irq_thread, (void *)irq_num, "twl6030-irq");
+	if (IS_ERR(task)) {
+		pr_err("twl6030: could not create irq %d thread!\n", irq_num);
+		status = PTR_ERR(task);
+		goto fail_kthread;
+	}
+
+	status = request_irq(irq_num, handle_twl6030_pih, IRQF_DISABLED,
+				"TWL6030-PIH", &irq_event);
+	if (status < 0) {
+		pr_err("twl6030: could not claim irq%d: %d\n", irq_num, status);
+		goto fail_irq;
+	}
+	return status;
+fail_irq:
+	free_irq(irq_num, &irq_event);
+
+fail_kthread:
+	for (i = irq_base; i < irq_end; i++)
+		set_irq_chip_and_handler(i, NULL, NULL);
+	return status;
+}
+
+int twl6030_exit_irq(void)
+{
+
+	if (twl6030_irq_base) {
+		pr_err("twl6030: can't yet clean up IRQs?\n");
+		return -ENOSYS;
+	}
+	return 0;
+}
+
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 0f812f5aa723..8e7405d9c624 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -89,6 +89,67 @@
 #define BCI_PRES_INTR_OFFSET	9
 #define USB_PRES_INTR_OFFSET	10
 #define RTC_INTR_OFFSET		11
+
+/*
+ * Offset from TWL6030_IRQ_BASE / pdata->irq_base
+ */
+#define PWR_INTR_OFFSET		0
+#define HOTDIE_INTR_OFFSET	12
+#define SMPSLDO_INTR_OFFSET	13
+#define BATDETECT_INTR_OFFSET	14
+#define SIMDETECT_INTR_OFFSET	15
+#define MMCDETECT_INTR_OFFSET	16
+#define GASGAUGE_INTR_OFFSET	17
+#define USBOTG_INTR_OFFSET	4
+#define CHARGER_INTR_OFFSET	2
+#define RSV_INTR_OFFSET		0
+
+/* INT register offsets */
+#define REG_INT_STS_A			0x00
+#define REG_INT_STS_B			0x01
+#define REG_INT_STS_C			0x02
+
+#define REG_INT_MSK_LINE_A		0x03
+#define REG_INT_MSK_LINE_B		0x04
+#define REG_INT_MSK_LINE_C		0x05
+
+#define REG_INT_MSK_STS_A		0x06
+#define REG_INT_MSK_STS_B		0x07
+#define REG_INT_MSK_STS_C		0x08
+
+/* MASK INT REG GROUP A */
+#define TWL6030_PWR_INT_MASK 		0x07
+#define TWL6030_RTC_INT_MASK 		0x18
+#define TWL6030_HOTDIE_INT_MASK 	0x20
+#define TWL6030_SMPSLDOA_INT_MASK	0xC0
+
+/* MASK INT REG GROUP B */
+#define TWL6030_SMPSLDOB_INT_MASK 	0x01
+#define TWL6030_BATDETECT_INT_MASK 	0x02
+#define TWL6030_SIMDETECT_INT_MASK 	0x04
+#define TWL6030_MMCDETECT_INT_MASK 	0x08
+#define TWL6030_GPADC_INT_MASK 		0x60
+#define TWL6030_GASGAUGE_INT_MASK 	0x80
+
+/* MASK INT REG GROUP C */
+#define TWL6030_USBOTG_INT_MASK  	0x0F
+#define TWL6030_CHARGER_CTRL_INT_MASK 	0x10
+#define TWL6030_CHARGER_FAULT_INT_MASK 	0x60
+
+
+#define TWL4030_CLASS_ID 		0x4030
+#define TWL6030_CLASS_ID 		0x6030
+unsigned int twl_rev(void);
+#define GET_TWL_REV (twl_rev())
+#define TWL_CLASS_IS(class, id)			\
+static inline int twl_class_is_ ##class(void)	\
+{						\
+	return ((id) == (GET_TWL_REV)) ? 1 : 0;	\
+}
+
+TWL_CLASS_IS(4030, TWL4030_CLASS_ID)
+TWL_CLASS_IS(6030, TWL6030_CLASS_ID)
+
 /*
  * Read and write single 8-bit registers
  */
@@ -104,6 +165,9 @@ int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
 int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
+int twl6030_interrupt_unmask(u8 bit_mask, u8 offset);
+int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
+
 /*----------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From 441a450554dada1c59fc06fdf068cb0eeba53c6d Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Sun, 13 Dec 2009 22:19:23 +0100
Subject: regulator: Add support for twl6030 regulators

This patch updates the regulator driver to add support
for TWL6030 PMIC specific LDO regulators.
SMPS resources are not yet supported for TWL6030 and
also .set_mode and .get_status for LDO's are yet to
be implemented for TWL6030.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/Kconfig         |   2 +-
 drivers/regulator/twl-regulator.c | 116 +++++++++++++++++++++++++++++++-------
 include/linux/i2c/twl.h           |  34 +++++++++++
 3 files changed, 130 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index bcbb161bde0b..7cfdd65bebb4 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -70,7 +70,7 @@ config REGULATOR_MAX1586
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
 config REGULATOR_TWL4030
-	bool "TI TWL4030/TWL5030/TPS695x0 PMIC"
+	bool "TI TWL4030/TWL5030/TWL6030/TPS695x0 PMIC"
 	depends on TWL4030_CORE
 	help
 	  This driver supports the voltage regulators provided by
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 8e1b68a20ef0..7ea1c3a31081 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -52,27 +52,38 @@ struct twlreg_info {
  * The first three registers of all power resource banks help hardware to
  * manage the various resource groups.
  */
+/* Common offset in TWL4030/6030 */
 #define VREG_GRP		0
+/* TWL4030 register offsets */
 #define VREG_TYPE		1
 #define VREG_REMAP		2
 #define VREG_DEDICATED		3	/* LDO control */
-
+/* TWL6030 register offsets */
+#define VREG_TRANS		1
+#define VREG_STATE		2
+#define VREG_VOLTAGE		3
+/* TWL6030 Misc register offsets */
+#define VREG_BC_ALL		1
+#define VREG_BC_REF		2
+#define VREG_BC_PROC		3
+#define VREG_BC_CLK_RST		4
 
 static inline int
-twlreg_read(struct twlreg_info *info, unsigned offset)
+twlreg_read(struct twlreg_info *info, unsigned slave_subgp, unsigned offset)
 {
 	u8 value;
 	int status;
 
-	status = twl_i2c_read_u8(TWL_MODULE_PM_RECEIVER,
+	status = twl_i2c_read_u8(slave_subgp,
 			&value, info->base + offset);
 	return (status < 0) ? status : value;
 }
 
 static inline int
-twlreg_write(struct twlreg_info *info, unsigned offset, u8 value)
+twlreg_write(struct twlreg_info *info, unsigned slave_subgp, unsigned offset,
+						 u8 value)
 {
-	return twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER,
+	return twl_i2c_write_u8(slave_subgp,
 			value, info->base + offset);
 }
 
@@ -82,17 +93,22 @@ twlreg_write(struct twlreg_info *info, unsigned offset, u8 value)
 
 static int twlreg_grp(struct regulator_dev *rdev)
 {
-	return twlreg_read(rdev_get_drvdata(rdev), VREG_GRP);
+	return twlreg_read(rdev_get_drvdata(rdev), TWL_MODULE_PM_RECEIVER,
+								 VREG_GRP);
 }
 
 /*
  * Enable/disable regulators by joining/leaving the P1 (processor) group.
  * We assume nobody else is updating the DEV_GRP registers.
  */
-
-#define P3_GRP		BIT(7)		/* "peripherals" */
-#define P2_GRP		BIT(6)		/* secondary processor, modem, etc */
-#define P1_GRP		BIT(5)		/* CPU/Linux */
+/* definition for 4030 family */
+#define P3_GRP_4030	BIT(7)		/* "peripherals" */
+#define P2_GRP_4030	BIT(6)		/* secondary processor, modem, etc */
+#define P1_GRP_4030	BIT(5)		/* CPU/Linux */
+/* definition for 6030 family */
+#define P3_GRP_6030	BIT(2)		/* secondary processor, modem, etc */
+#define P2_GRP_6030	BIT(1)		/* "peripherals" */
+#define P1_GRP_6030	BIT(0)		/* CPU/Linux */
 
 static int twlreg_is_enabled(struct regulator_dev *rdev)
 {
@@ -101,7 +117,11 @@ static int twlreg_is_enabled(struct regulator_dev *rdev)
 	if (state < 0)
 		return state;
 
-	return (state & P1_GRP) != 0;
+	if (twl_class_is_4030())
+		state &= P1_GRP_4030;
+	else
+		state &= P1_GRP_6030;
+	return state;
 }
 
 static int twlreg_enable(struct regulator_dev *rdev)
@@ -109,12 +129,16 @@ static int twlreg_enable(struct regulator_dev *rdev)
 	struct twlreg_info	*info = rdev_get_drvdata(rdev);
 	int			grp;
 
-	grp = twlreg_read(info, VREG_GRP);
+	grp = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_GRP);
 	if (grp < 0)
 		return grp;
 
-	grp |= P1_GRP;
-	return twlreg_write(info, VREG_GRP, grp);
+	if (twl_class_is_4030())
+		grp |= P1_GRP_4030;
+	else
+		grp |= P1_GRP_6030;
+
+	return twlreg_write(info, TWL_MODULE_PM_RECEIVER, VREG_GRP, grp);
 }
 
 static int twlreg_disable(struct regulator_dev *rdev)
@@ -122,18 +146,25 @@ static int twlreg_disable(struct regulator_dev *rdev)
 	struct twlreg_info	*info = rdev_get_drvdata(rdev);
 	int			grp;
 
-	grp = twlreg_read(info, VREG_GRP);
+	grp = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_GRP);
 	if (grp < 0)
 		return grp;
 
-	grp &= ~P1_GRP;
-	return twlreg_write(info, VREG_GRP, grp);
+	if (twl_class_is_4030())
+		grp &= ~P1_GRP_4030;
+	else
+		grp &= ~P1_GRP_6030;
+
+	return twlreg_write(info, TWL_MODULE_PM_RECEIVER, VREG_GRP, grp);
 }
 
 static int twlreg_get_status(struct regulator_dev *rdev)
 {
 	int	state = twlreg_grp(rdev);
 
+	if (twl_class_is_6030())
+		return 0; /* FIXME return for 6030 regulator */
+
 	if (state < 0)
 		return state;
 	state &= 0x0f;
@@ -152,6 +183,9 @@ static int twlreg_set_mode(struct regulator_dev *rdev, unsigned mode)
 	unsigned		message;
 	int			status;
 
+	if (twl_class_is_6030())
+		return 0; /* FIXME return for 6030 regulator */
+
 	/* We can only set the mode through state machine commands... */
 	switch (mode) {
 	case REGULATOR_MODE_NORMAL:
@@ -168,7 +202,7 @@ static int twlreg_set_mode(struct regulator_dev *rdev, unsigned mode)
 	status = twlreg_grp(rdev);
 	if (status < 0)
 		return status;
-	if (!(status & (P3_GRP | P2_GRP | P1_GRP)))
+	if (!(status & (P3_GRP_4030 | P2_GRP_4030 | P1_GRP_4030)))
 		return -EACCES;
 
 	status = twl_i2c_write_u8(TWL_MODULE_PM_MASTER,
@@ -260,7 +294,29 @@ static const u16 VSIM_VSEL_table[] = {
 static const u16 VDAC_VSEL_table[] = {
 	1200, 1300, 1800, 1800,
 };
-
+static const u16 VAUX1_6030_VSEL_table[] = {
+	1000, 1300, 1800, 2500,
+	2800, 2900, 3000, 3000,
+};
+static const u16 VAUX2_6030_VSEL_table[] = {
+	1200, 1800, 2500, 2750,
+	2800, 2800, 2800, 2800,
+};
+static const u16 VAUX3_6030_VSEL_table[] = {
+	1000, 1200, 1300, 1800,
+	2500, 2800, 3000, 3000,
+};
+static const u16 VMMC_VSEL_table[] = {
+	1200, 1800, 2800, 2900,
+	3000, 3000, 3000, 3000,
+};
+static const u16 VPP_VSEL_table[] = {
+	1800, 1900, 2000, 2100,
+	2200, 2300, 2400, 2500,
+};
+static const u16 VUSIM_VSEL_table[] = {
+	1200, 1800, 2500, 2900,
+};
 
 static int twlldo_list_voltage(struct regulator_dev *rdev, unsigned index)
 {
@@ -288,7 +344,8 @@ twlldo_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
 
 		/* use the first in-range value */
 		if (min_uV <= uV && uV <= max_uV)
-			return twlreg_write(info, VREG_DEDICATED, vsel);
+			return twlreg_write(info, TWL_MODULE_PM_RECEIVER,
+							VREG_VOLTAGE, vsel);
 	}
 
 	return -EDOM;
@@ -297,7 +354,8 @@ twlldo_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
 static int twlldo_get_voltage(struct regulator_dev *rdev)
 {
 	struct twlreg_info	*info = rdev_get_drvdata(rdev);
-	int			vsel = twlreg_read(info, VREG_DEDICATED);
+	int		vsel = twlreg_read(info, TWL_MODULE_PM_RECEIVER,
+								VREG_VOLTAGE);
 
 	if (vsel < 0)
 		return vsel;
@@ -360,6 +418,10 @@ static struct regulator_ops twlfixed_ops = {
 		TWL_ADJUSTABLE_LDO(label, offset, num, TWL4030)
 #define TWL4030_FIXED_LDO(label, offset, mVolts, num) \
 		TWL_FIXED_LDO(label, offset, mVolts, num, TWL4030)
+#define TWL6030_ADJUSTABLE_LDO(label, offset, num) \
+		TWL_ADJUSTABLE_LDO(label, offset, num, TWL6030)
+#define TWL6030_FIXED_LDO(label, offset, mVolts, num) \
+		TWL_FIXED_LDO(label, offset, mVolts, num, TWL6030)
 
 #define TWL_ADJUSTABLE_LDO(label, offset, num, family) { \
 	.base = offset, \
@@ -420,6 +482,18 @@ static struct twlreg_info twl_regs[] = {
 	TWL4030_FIXED_LDO(VUSB1V8, 0x74, 1800, 18),
 	TWL4030_FIXED_LDO(VUSB3V1, 0x77, 3100, 19),
 	/* VUSBCP is managed *only* by the USB subchip */
+
+	/* 6030 REG with base as PMC Slave Misc : 0x0030 */
+	TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1),
+	TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 2),
+	TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 3),
+	TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 4),
+	TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 5),
+	TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 7),
+	TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15),
+	TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16),
+	TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17),
+	TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18)
 };
 
 static int twlreg_probe(struct platform_device *pdev)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 8e7405d9c624..7679e87df177 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -427,6 +427,12 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 #define MSG_SINGULAR(devgrp, id, state) \
 	((devgrp) << 13 | 0 << 12 | (id) << 4 | (state))
 
+#define MSG_BROADCAST_ALL(devgrp, state) \
+	((devgrp) << 5 | (state))
+
+#define MSG_BROADCAST_REF MSG_BROADCAST_ALL
+#define MSG_BROADCAST_PROV MSG_BROADCAST_ALL
+#define MSG_BROADCAST__CLK_RST MSG_BROADCAST_ALL
 /*----------------------------------------------------------------------*/
 
 struct twl4030_clock_init_data {
@@ -602,6 +608,7 @@ int twl4030_sih_setup(int module);
  * VIO is generally fixed.
  */
 
+/* TWL4030 SMPS/LDO's */
 /* EXTERNAL dc-to-dc buck converters */
 #define TWL4030_REG_VDD1	0
 #define TWL4030_REG_VDD2	1
@@ -628,4 +635,31 @@ int twl4030_sih_setup(int module);
 #define TWL4030_REG_VUSB1V8	18
 #define TWL4030_REG_VUSB3V1	19
 
+/* TWL6030 SMPS/LDO's */
+/* EXTERNAL dc-to-dc buck convertor contollable via SR */
+#define TWL6030_REG_VDD1	30
+#define TWL6030_REG_VDD2	31
+#define TWL6030_REG_VDD3	32
+
+/* Non SR compliant dc-to-dc buck convertors */
+#define	TWL6030_REG_VMEM	33
+#define TWL6030_REG_V2V1	34
+#define	TWL6030_REG_V1V29	35
+#define TWL6030_REG_V1V8	36
+
+/* EXTERNAL LDOs */
+#define TWL6030_REG_VAUX1_6030	37
+#define TWL6030_REG_VAUX2_6030	38
+#define TWL6030_REG_VAUX3_6030	39
+#define TWL6030_REG_VMMC	40
+#define TWL6030_REG_VPP		41
+#define TWL6030_REG_VUSIM	42
+#define TWL6030_REG_VANA	43
+#define TWL6030_REG_VCXIO	44
+#define TWL6030_REG_VDAC	45
+#define TWL6030_REG_VUSB	46
+
+/* INTERNAL LDOs */
+#define TWL6030_REG_VRTC	47
+
 #endif /* End of __TWL4030_H */
-- 
cgit v1.2.3


From 9da66539281b5e15afc4a4739014c8923059d894 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Sun, 13 Dec 2009 22:29:47 +0100
Subject: mfd: Add twl6030 regulator subdevices

This patch adds initial support for creating twl6030 PMIC
specific voltage regulators in the twl mfd driver.

Board specific regulator configurations will have to be passed from
respective board files.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 51 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/i2c/twl.h | 16 ++++++++++++----
 2 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index c48a6138c575..2a7606534196 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -639,7 +639,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
-	if (twl_has_usb() && pdata->usb) {
+	if (twl_has_usb() && pdata->usb && twl_class_is_4030()) {
 
 		static struct regulator_consumer_supply usb1v5 = {
 			.supply =	"usb1v5",
@@ -719,7 +719,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
-	if (twl_has_regulator()) {
+	/* twl4030 regulators */
+	if (twl_has_regulator() && twl_class_is_4030()) {
 		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
@@ -765,7 +766,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	}
 
 	/* maybe add LDOs that are omitted on cost-reduced parts */
-	if (twl_has_regulator() && !(features & TPS_SUBSET)) {
+	if (twl_has_regulator() && !(features & TPS_SUBSET)
+	  && twl_class_is_4030()) {
 		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
@@ -791,6 +793,49 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
+	/* twl6030 regulators */
+	if (twl_has_regulator() && twl_class_is_6030()) {
+		child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VPP, pdata->vpp);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VUSIM, pdata->vusim);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VANA, pdata->vana);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VCXIO, pdata->vcxio);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VDAC, pdata->vdac);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VUSB, pdata->vusb);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX1_6030, pdata->vaux1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX2_6030, pdata->vaux2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX3_6030, pdata->vaux3);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 7679e87df177..bf1c5be1f5b6 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -560,16 +560,17 @@ struct twl4030_platform_data {
 	struct twl4030_power_data		*power;
 	struct twl4030_codec_data		*codec;
 
-	/* LDO regulators */
+	/* Common LDO regulators for TWL4030/TWL6030 */
 	struct regulator_init_data		*vdac;
+	struct regulator_init_data		*vaux1;
+	struct regulator_init_data		*vaux2;
+	struct regulator_init_data		*vaux3;
+	/* TWL4030 LDO regulators */
 	struct regulator_init_data		*vpll1;
 	struct regulator_init_data		*vpll2;
 	struct regulator_init_data		*vmmc1;
 	struct regulator_init_data		*vmmc2;
 	struct regulator_init_data		*vsim;
-	struct regulator_init_data		*vaux1;
-	struct regulator_init_data		*vaux2;
-	struct regulator_init_data		*vaux3;
 	struct regulator_init_data		*vaux4;
 	struct regulator_init_data		*vio;
 	struct regulator_init_data		*vdd1;
@@ -577,6 +578,13 @@ struct twl4030_platform_data {
 	struct regulator_init_data		*vintana1;
 	struct regulator_init_data		*vintana2;
 	struct regulator_init_data		*vintdig;
+	/* TWL6030 LDO regulators */
+	struct regulator_init_data              *vmmc;
+	struct regulator_init_data              *vpp;
+	struct regulator_init_data              *vusim;
+	struct regulator_init_data              *vana;
+	struct regulator_init_data              *vcxio;
+	struct regulator_init_data              *vusb;
 };
 
 /*----------------------------------------------------------------------*/
-- 
cgit v1.2.3


From 7820f9e1dddcfebae2698fb2a245d04ce3aa6e74 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Dec 2009 12:49:47 +1100
Subject: md: remove sparse warning:symbol XXX was not declared.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid6algos.c | 19 -------------------
 include/linux/raid/pq.h | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 866215ac7f25..8ce102cc2e7b 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -31,25 +31,6 @@ EXPORT_SYMBOL(raid6_empty_zero_page);
 struct raid6_calls raid6_call;
 EXPORT_SYMBOL_GPL(raid6_call);
 
-/* Various routine sets */
-extern const struct raid6_calls raid6_intx1;
-extern const struct raid6_calls raid6_intx2;
-extern const struct raid6_calls raid6_intx4;
-extern const struct raid6_calls raid6_intx8;
-extern const struct raid6_calls raid6_intx16;
-extern const struct raid6_calls raid6_intx32;
-extern const struct raid6_calls raid6_mmxx1;
-extern const struct raid6_calls raid6_mmxx2;
-extern const struct raid6_calls raid6_sse1x1;
-extern const struct raid6_calls raid6_sse1x2;
-extern const struct raid6_calls raid6_sse2x1;
-extern const struct raid6_calls raid6_sse2x2;
-extern const struct raid6_calls raid6_sse2x4;
-extern const struct raid6_calls raid6_altivec1;
-extern const struct raid6_calls raid6_altivec2;
-extern const struct raid6_calls raid6_altivec4;
-extern const struct raid6_calls raid6_altivec8;
-
 const struct raid6_calls * const raid6_algos[] = {
 	&raid6_intx1,
 	&raid6_intx2,
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index d92480f8285c..1cbbd2c11aa9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -78,6 +78,25 @@ struct raid6_calls {
 /* Selected algorithm */
 extern struct raid6_calls raid6_call;
 
+/* Various routine sets */
+extern const struct raid6_calls raid6_intx1;
+extern const struct raid6_calls raid6_intx2;
+extern const struct raid6_calls raid6_intx4;
+extern const struct raid6_calls raid6_intx8;
+extern const struct raid6_calls raid6_intx16;
+extern const struct raid6_calls raid6_intx32;
+extern const struct raid6_calls raid6_mmxx1;
+extern const struct raid6_calls raid6_mmxx2;
+extern const struct raid6_calls raid6_sse1x1;
+extern const struct raid6_calls raid6_sse1x2;
+extern const struct raid6_calls raid6_sse2x1;
+extern const struct raid6_calls raid6_sse2x2;
+extern const struct raid6_calls raid6_sse2x4;
+extern const struct raid6_calls raid6_altivec1;
+extern const struct raid6_calls raid6_altivec2;
+extern const struct raid6_calls raid6_altivec4;
+extern const struct raid6_calls raid6_altivec8;
+
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
 int raid6_select_algo(void);
-- 
cgit v1.2.3


From c7cd606f60e7679c7f9eee7010f02a6f000209c1 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Sat, 12 Dec 2009 04:13:21 +0000
Subject: can: Fix data length code handling in rx path

A valid CAN dataframe can have a data length code (DLC) of 0 .. 8 data bytes.

When reading the CAN controllers register the 4-bit value may contain values
from 0 .. 15 which may exceed the reserved space in the socket buffer!

The ISO 11898-1 Chapter 8.4.2.3 (DLC field) says that register values > 8
should be reduced to 8 without any error reporting or frame drop.

This patch introduces a new helper macro to cast a given 4-bit data length
code (dlc) to __u8 and ensure the DLC value to be max. 8 bytes.

The different handlings in the rx path of the CAN netdevice drivers are fixed.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  2 +-
 drivers/net/can/bfin_can.c        |  2 +-
 drivers/net/can/mcp251x.c         | 13 +++----------
 drivers/net/can/mscan/mscan.c     |  3 ++-
 drivers/net/can/sja1000/sja1000.c | 18 ++++++++----------
 drivers/net/can/ti_hecc.c         |  2 +-
 drivers/net/can/usb/ems_usb.c     |  2 +-
 include/linux/can/dev.h           |  9 +++++++++
 8 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index cbe3fce53e3b..d0ec17878ffc 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -474,7 +474,7 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb,
 	reg_msr = at91_read(priv, AT91_MSR(mb));
 	if (reg_msr & AT91_MSR_MRTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = min_t(__u8, (reg_msr >> 16) & 0xf, 8);
+	cf->can_dlc = get_can_dlc((reg_msr >> 16) & 0xf);
 
 	*(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
 	*(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb));
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index c7fc1de28173..0ec1524523cc 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -392,7 +392,7 @@ static void bfin_can_rx(struct net_device *dev, u16 isrc)
 		cf->can_id |= CAN_RTR_FLAG;
 
 	/* get data length code */
-	cf->can_dlc = bfin_read16(&reg->chl[obj].dlc);
+	cf->can_dlc = get_can_dlc(bfin_read16(&reg->chl[obj].dlc) & 0xF);
 
 	/* get payload */
 	for (i = 0; i < 8; i += 2) {
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 78b1b69b2921..9c5a1537939c 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -403,9 +403,8 @@ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
 
 		for (i = 1; i < RXBDAT_OFF; i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
-		len = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-		if (len > 8)
-			len = 8;
+
+		len = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 		for (; i < (RXBDAT_OFF + len); i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
 	} else {
@@ -455,13 +454,7 @@ static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx)
 			(buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT);
 	}
 	/* Data length */
-	frame->can_dlc = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-	if (frame->can_dlc > 8) {
-		dev_warn(&spi->dev, "invalid frame recevied\n");
-		priv->net->stats.rx_errors++;
-		dev_kfree_skb(skb);
-		return;
-	}
+	frame->can_dlc = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 	memcpy(frame->data, buf + RXBDAT_OFF, frame->can_dlc);
 
 	priv->net->stats.rx_packets++;
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index bb06dfb58f25..07346f880ca6 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -297,7 +297,8 @@ static void mscan_get_rx_frame(struct net_device *dev, struct can_frame *frame)
 	frame->can_id |= can_id >> 1;
 	if (can_id & 1)
 		frame->can_id |= CAN_RTR_FLAG;
-	frame->can_dlc = in_8(&regs->rx.dlr) & 0xf;
+
+	frame->can_dlc = get_can_dlc(in_8(&regs->rx.dlr) & 0xf);
 
 	if (!(frame->can_id & CAN_RTR_FLAG)) {
 		void __iomem *data = &regs->rx.dsr1_0;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index b4ba88a31075..542a4f7255b4 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -293,15 +293,14 @@ static void sja1000_rx(struct net_device *dev)
 	uint8_t fi;
 	uint8_t dreg;
 	canid_t id;
-	uint8_t dlc;
 	int i;
 
+	/* create zero'ed CAN frame buffer */
 	skb = alloc_can_skb(dev, &cf);
 	if (skb == NULL)
 		return;
 
 	fi = priv->read_reg(priv, REG_FI);
-	dlc = fi & 0x0F;
 
 	if (fi & FI_FF) {
 		/* extended frame format (EFF) */
@@ -318,16 +317,15 @@ static void sja1000_rx(struct net_device *dev)
 		    | (priv->read_reg(priv, REG_ID2) >> 5);
 	}
 
-	if (fi & FI_RTR)
+	if (fi & FI_RTR) {
 		id |= CAN_RTR_FLAG;
+	} else {
+		cf->can_dlc = get_can_dlc(fi & 0x0F);
+		for (i = 0; i < cf->can_dlc; i++)
+			cf->data[i] = priv->read_reg(priv, dreg++);
+	}
 
 	cf->can_id = id;
-	cf->can_dlc = dlc;
-	for (i = 0; i < dlc; i++)
-		cf->data[i] = priv->read_reg(priv, dreg++);
-
-	while (i < 8)
-		cf->data[i++] = 0;
 
 	/* release receive buffer */
 	priv->write_reg(priv, REG_CMR, CMD_RRB);
@@ -335,7 +333,7 @@ static void sja1000_rx(struct net_device *dev)
 	netif_rx(skb);
 
 	stats->rx_packets++;
-	stats->rx_bytes += dlc;
+	stats->rx_bytes += cf->can_dlc;
 }
 
 static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 07e8016b17ec..5c993c2da528 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -552,7 +552,7 @@ static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMCF);
 	if (data & HECC_CANMCF_RTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = data & 0xF;
+	cf->can_dlc = get_can_dlc(data & 0xF);
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMDL);
 	*(u32 *)(cf->data) = cpu_to_be32(data);
 	if (cf->can_dlc > 4) {
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 591eb0eb1c2b..efbb05c71bf4 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -316,7 +316,7 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
 		return;
 
 	cf->can_id = le32_to_cpu(msg->msg.can_msg.id);
-	cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+	cf->can_dlc = get_can_dlc(msg->msg.can_msg.length & 0xF);
 
 	if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME ||
 	    msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 1ed2a5cc03f5..3db7767d2a17 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -51,6 +51,15 @@ struct can_priv {
 	struct sk_buff **echo_skb;
 };
 
+/*
+ * get_can_dlc(value) - helper macro to cast a given data length code (dlc)
+ * to __u8 and ensure the dlc value to be max. 8 bytes.
+ *
+ * To be used in the CAN netdriver receive path to ensure conformance with
+ * ISO 11898-1 Chapter 8.4.2.3 (DLC field)
+ */
+#define get_can_dlc(i)	(min_t(__u8, (i), 8))
+
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From 5185fb069972b653dd7177292e7510ff99d9e8aa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Dec 2009 14:03:27 +0000
Subject: FRV: Fix no-hardware-breakpoint case

If there is no hardware breakpoint support, modify_user_hw_breakpoint()
tries to return a NULL pointer through as an 'int' return value:

  In file included from kernel/exit.c:53:
  include/linux/hw_breakpoint.h: In function 'modify_user_hw_breakpoint':
  include/linux/hw_breakpoint.h:96: warning: return makes integer from pointer without a cast

Return 0 instead.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hw_breakpoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 69f07a9f1277..e268388146e8 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -93,7 +93,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    struct task_struct *tsk)	{ return NULL; }
 static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  struct perf_event_attr *attr)	{ return NULL; }
+			  struct perf_event_attr *attr)	{ return 0; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
-- 
cgit v1.2.3


From 491424c0f46c282a854b88830212bdb0763e93dc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Dec 2009 14:13:44 +0000
Subject: PCI: Global variable decls must match the defs in section attributes

Global variable declarations must match the definitions in section attributes
as the compiler is at liberty to vary the method it uses to access a variable,
depending on the section it is in.

When building the FRV arch, I now see:

  drivers/built-in.o: In function `pci_apply_final_quirks':
  drivers/pci/quirks.c:2606: relocation truncated to fit: R_FRV_GPREL12 against symbol `pci_dfl_cache_line_size' defined in .devinit.data section in drivers/built-in.o
  drivers/pci/quirks.c:2623: relocation truncated to fit: R_FRV_GPREL12 against symbol `pci_dfl_cache_line_size' defined in .devinit.data section in drivers/built-in.o
  drivers/pci/quirks.c:2630: relocation truncated to fit: R_FRV_GPREL12 against symbol `pci_dfl_cache_line_size' defined in .devinit.data section in drivers/built-in.o

because the declaration of pci_dfl_cache_line_size in linux/pci.h does not
match the definition in drivers/pci/pci.c.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 04771b9c3316..bf1e67080849 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1255,7 +1255,7 @@ extern int pci_pci_problems;
 
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
-extern u8 pci_dfl_cache_line_size;
+extern u8 __devinitdata pci_dfl_cache_line_size;
 extern u8 pci_cache_line_size;
 
 extern unsigned long pci_hotplug_io_size;
-- 
cgit v1.2.3


From 310ec79210d754afe51e2e4a983e846b60179abd Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 14 Dec 2009 21:17:23 +0100
Subject: i2c: Drop the kind parameter from detect callbacks

The "kind" parameter always has value -1, and nobody is using it any
longer, so we can remove it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/hwmon/adm1021.c      | 4 ++--
 drivers/hwmon/adm1025.c      | 4 ++--
 drivers/hwmon/adm1026.c      | 4 ++--
 drivers/hwmon/adm1029.c      | 4 ++--
 drivers/hwmon/adm1031.c      | 4 ++--
 drivers/hwmon/adm9240.c      | 4 ++--
 drivers/hwmon/ads7828.c      | 4 ++--
 drivers/hwmon/adt7462.c      | 4 ++--
 drivers/hwmon/adt7470.c      | 4 ++--
 drivers/hwmon/adt7473.c      | 4 ++--
 drivers/hwmon/adt7475.c      | 2 +-
 drivers/hwmon/asb100.c       | 4 ++--
 drivers/hwmon/atxp1.c        | 5 ++---
 drivers/hwmon/dme1737.c      | 2 +-
 drivers/hwmon/ds1621.c       | 2 +-
 drivers/hwmon/f75375s.c      | 4 ++--
 drivers/hwmon/fschmd.c       | 4 ++--
 drivers/hwmon/gl518sm.c      | 6 ++----
 drivers/hwmon/gl520sm.c      | 6 ++----
 drivers/hwmon/lm63.c         | 5 ++---
 drivers/hwmon/lm73.c         | 2 +-
 drivers/hwmon/lm75.c         | 2 +-
 drivers/hwmon/lm77.c         | 5 ++---
 drivers/hwmon/lm78.c         | 4 ++--
 drivers/hwmon/lm80.c         | 6 ++----
 drivers/hwmon/lm83.c         | 4 ++--
 drivers/hwmon/lm85.c         | 6 ++----
 drivers/hwmon/lm87.c         | 4 ++--
 drivers/hwmon/lm90.c         | 5 ++---
 drivers/hwmon/lm92.c         | 2 +-
 drivers/hwmon/lm93.c         | 3 +--
 drivers/hwmon/lm95241.c      | 2 +-
 drivers/hwmon/max1619.c      | 4 ++--
 drivers/hwmon/max6650.c      | 4 ++--
 drivers/hwmon/pcf8591.c      | 2 +-
 drivers/hwmon/smsc47m192.c   | 4 ++--
 drivers/hwmon/thmc50.c       | 4 ++--
 drivers/hwmon/tmp401.c       | 4 ++--
 drivers/hwmon/tmp421.c       | 2 +-
 drivers/hwmon/w83781d.c      | 3 +--
 drivers/hwmon/w83791d.c      | 4 ++--
 drivers/hwmon/w83792d.c      | 4 ++--
 drivers/hwmon/w83793.c       | 4 ++--
 drivers/hwmon/w83l785ts.c    | 4 ++--
 drivers/hwmon/w83l786ng.c    | 5 ++---
 drivers/i2c/i2c-core.c       | 2 +-
 drivers/misc/eeprom/eeprom.c | 3 +--
 drivers/misc/ics932s401.c    | 4 ++--
 include/linux/i2c.h          | 2 +-
 49 files changed, 84 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index 33acf29531af..3ebcdd9c25a5 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -97,7 +97,7 @@ struct adm1021_data {
 
 static int adm1021_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm1021_detect(struct i2c_client *client, int kind,
+static int adm1021_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void adm1021_init_client(struct i2c_client *client);
 static int adm1021_remove(struct i2c_client *client);
@@ -284,7 +284,7 @@ static const struct attribute_group adm1021_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm1021_detect(struct i2c_client *client, int kind,
+static int adm1021_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index db6ac2b04f6f..357c9ffa147e 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -111,7 +111,7 @@ static const int in_scale[6] = { 2500, 2250, 3300, 5000, 12000, 3300 };
 
 static int adm1025_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm1025_detect(struct i2c_client *client, int kind,
+static int adm1025_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void adm1025_init_client(struct i2c_client *client);
 static int adm1025_remove(struct i2c_client *client);
@@ -409,7 +409,7 @@ static const struct attribute_group adm1025_group_in4 = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm1025_detect(struct i2c_client *client, int kind,
+static int adm1025_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index fb5363985e21..8deb17a402da 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -293,7 +293,7 @@ struct adm1026_data {
 
 static int adm1026_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm1026_detect(struct i2c_client *client, int kind,
+static int adm1026_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int adm1026_remove(struct i2c_client *client);
 static int adm1026_read_value(struct i2c_client *client, u8 reg);
@@ -1650,7 +1650,7 @@ static const struct attribute_group adm1026_group_in8_9 = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm1026_detect(struct i2c_client *client, int kind,
+static int adm1026_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c
index ef91e2a4a567..9bc9dbcacdbd 100644
--- a/drivers/hwmon/adm1029.c
+++ b/drivers/hwmon/adm1029.c
@@ -117,7 +117,7 @@ static const u8 ADM1029_REG_FAN_DIV[] = {
 
 static int adm1029_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm1029_detect(struct i2c_client *client, int kind,
+static int adm1029_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int adm1029_remove(struct i2c_client *client);
 static struct adm1029_data *adm1029_update_device(struct device *dev);
@@ -297,7 +297,7 @@ static const struct attribute_group adm1029_group = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm1029_detect(struct i2c_client *client, int kind,
+static int adm1029_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c
index 0e722175aae0..cebfbf6926da 100644
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -102,7 +102,7 @@ struct adm1031_data {
 
 static int adm1031_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm1031_detect(struct i2c_client *client, int kind,
+static int adm1031_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void adm1031_init_client(struct i2c_client *client);
 static int adm1031_remove(struct i2c_client *client);
@@ -813,7 +813,7 @@ static const struct attribute_group adm1031_group_opt = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm1031_detect(struct i2c_client *client, int kind,
+static int adm1031_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index 20e0481cc206..9316e074d690 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -132,7 +132,7 @@ static inline unsigned int AOUT_FROM_REG(u8 reg)
 
 static int adm9240_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adm9240_detect(struct i2c_client *client, int kind,
+static int adm9240_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void adm9240_init_client(struct i2c_client *client);
 static int adm9240_remove(struct i2c_client *client);
@@ -545,7 +545,7 @@ static const struct attribute_group adm9240_group = {
 /*** sensor chip detect and driver install ***/
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adm9240_detect(struct i2c_client *new_client, int kind,
+static int adm9240_detect(struct i2c_client *new_client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index 451977bca7d6..b50893111532 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -72,7 +72,7 @@ struct ads7828_data {
 };
 
 /* Function declaration - necessary due to function dependencies */
-static int ads7828_detect(struct i2c_client *client, int kind,
+static int ads7828_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int ads7828_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
@@ -187,7 +187,7 @@ static struct i2c_driver ads7828_driver = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int ads7828_detect(struct i2c_client *client, int kind,
+static int ads7828_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index f9c9562b6a94..30cf002c677f 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -237,7 +237,7 @@ struct adt7462_data {
 
 static int adt7462_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adt7462_detect(struct i2c_client *client, int kind,
+static int adt7462_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int adt7462_remove(struct i2c_client *client);
 
@@ -1902,7 +1902,7 @@ static struct attribute *adt7462_attr[] =
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adt7462_detect(struct i2c_client *client, int kind,
+static int adt7462_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 32b1750a6890..9ffe5c6e495d 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -177,7 +177,7 @@ struct adt7470_data {
 
 static int adt7470_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adt7470_detect(struct i2c_client *client, int kind,
+static int adt7470_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int adt7470_remove(struct i2c_client *client);
 
@@ -1225,7 +1225,7 @@ static struct attribute *adt7470_attr[] =
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adt7470_detect(struct i2c_client *client, int kind,
+static int adt7470_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index aea244db974e..78fadaa431e2 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -166,7 +166,7 @@ struct adt7473_data {
 
 static int adt7473_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int adt7473_detect(struct i2c_client *client, int kind,
+static int adt7473_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int adt7473_remove(struct i2c_client *client);
 
@@ -1085,7 +1085,7 @@ static struct attribute *adt7473_attr[] =
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int adt7473_detect(struct i2c_client *client, int kind,
+static int adt7473_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 99abfddedbc3..3c0571551a9f 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -1172,7 +1172,7 @@ static struct attribute_group in4_attr_group = { .attrs = in4_attrs };
 static struct attribute_group in5_attr_group = { .attrs = in5_attrs };
 static struct attribute_group vid_attr_group = { .attrs = vid_attrs };
 
-static int adt7475_detect(struct i2c_client *client, int kind,
+static int adt7475_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index 480f80ea1fa0..507e116d5456 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -209,7 +209,7 @@ static void asb100_write_value(struct i2c_client *client, u16 reg, u16 val);
 
 static int asb100_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
-static int asb100_detect(struct i2c_client *client, int kind,
+static int asb100_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int asb100_remove(struct i2c_client *client);
 static struct asb100_data *asb100_update_device(struct device *dev);
@@ -697,7 +697,7 @@ ERROR_SC_2:
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int asb100_detect(struct i2c_client *client, int kind,
+static int asb100_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index d6b490d3e36f..6b7459745b66 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -50,8 +50,7 @@ static int atxp1_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id);
 static int atxp1_remove(struct i2c_client *client);
 static struct atxp1_data * atxp1_update_device(struct device *dev);
-static int atxp1_detect(struct i2c_client *client, int kind,
-			struct i2c_board_info *info);
+static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info);
 
 static const struct i2c_device_id atxp1_id[] = {
 	{ "atxp1", atxp1 },
@@ -275,7 +274,7 @@ static const struct attribute_group atxp1_group = {
 
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int atxp1_detect(struct i2c_client *new_client, int kind,
+static int atxp1_detect(struct i2c_client *new_client,
 			struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4377bb0cc526..7024617c1194 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -2208,7 +2208,7 @@ exit:
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int dme1737_i2c_detect(struct i2c_client *client, int kind,
+static int dme1737_i2c_detect(struct i2c_client *client,
 			      struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c
index 2a4c6a05b14f..5fde2f5139fd 100644
--- a/drivers/hwmon/ds1621.c
+++ b/drivers/hwmon/ds1621.c
@@ -224,7 +224,7 @@ static const struct attribute_group ds1621_group = {
 
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int ds1621_detect(struct i2c_client *client, int kind,
+static int ds1621_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
index 40dfbcd3f3f2..2ffcf56b6f4e 100644
--- a/drivers/hwmon/f75375s.c
+++ b/drivers/hwmon/f75375s.c
@@ -113,7 +113,7 @@ struct f75375_data {
 	s8 temp_max_hyst[2];
 };
 
-static int f75375_detect(struct i2c_client *client, int kind,
+static int f75375_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int f75375_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
@@ -677,7 +677,7 @@ static int f75375_remove(struct i2c_client *client)
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int f75375_detect(struct i2c_client *client, int kind,
+static int f75375_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 281829cd1533..bce18e0f1d61 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -221,7 +221,7 @@ static const int FSCHMD_NO_TEMP_SENSORS[7] = { 3, 3, 4, 3, 5, 5, 11 };
 
 static int fschmd_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
-static int fschmd_detect(struct i2c_client *client, int kind,
+static int fschmd_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int fschmd_remove(struct i2c_client *client);
 static struct fschmd_data *fschmd_update_device(struct device *dev);
@@ -1000,7 +1000,7 @@ static void fschmd_dmi_decode(const struct dmi_header *header, void *dummy)
 	}
 }
 
-static int fschmd_detect(struct i2c_client *client, int _kind,
+static int fschmd_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	enum chips kind;
diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c
index 1d69458aa0b6..34f83c6a3f45 100644
--- a/drivers/hwmon/gl518sm.c
+++ b/drivers/hwmon/gl518sm.c
@@ -139,8 +139,7 @@ struct gl518_data {
 
 static int gl518_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id);
-static int gl518_detect(struct i2c_client *client, int kind,
-			struct i2c_board_info *info);
+static int gl518_detect(struct i2c_client *client, struct i2c_board_info *info);
 static void gl518_init_client(struct i2c_client *client);
 static int gl518_remove(struct i2c_client *client);
 static int gl518_read_value(struct i2c_client *client, u8 reg);
@@ -484,8 +483,7 @@ static const struct attribute_group gl518_group_r80 = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int gl518_detect(struct i2c_client *client, int kind,
-			struct i2c_board_info *info)
+static int gl518_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int rev;
diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index 92b5720ceaff..d03ba692fc42 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -81,8 +81,7 @@ static const u8 GL520_REG_TEMP_MAX_HYST[]	= { 0x06, 0x18 };
 
 static int gl520_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id);
-static int gl520_detect(struct i2c_client *client, int kind,
-			struct i2c_board_info *info);
+static int gl520_detect(struct i2c_client *client, struct i2c_board_info *info);
 static void gl520_init_client(struct i2c_client *client);
 static int gl520_remove(struct i2c_client *client);
 static int gl520_read_value(struct i2c_client *client, u8 reg);
@@ -681,8 +680,7 @@ static const struct attribute_group gl520_group_opt = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int gl520_detect(struct i2c_client *client, int kind,
-			struct i2c_board_info *info)
+static int gl520_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 5da66ab04f74..26844fc4a66d 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -134,8 +134,7 @@ static int lm63_remove(struct i2c_client *client);
 
 static struct lm63_data *lm63_update_device(struct device *dev);
 
-static int lm63_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info);
+static int lm63_detect(struct i2c_client *client, struct i2c_board_info *info);
 static void lm63_init_client(struct i2c_client *client);
 
 /*
@@ -423,7 +422,7 @@ static const struct attribute_group lm63_group_fan1 = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm63_detect(struct i2c_client *new_client, int kind,
+static int lm63_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm73.c b/drivers/hwmon/lm73.c
index 0bf8b2a8e9f0..e610da9bd80c 100644
--- a/drivers/hwmon/lm73.c
+++ b/drivers/hwmon/lm73.c
@@ -151,7 +151,7 @@ static const struct i2c_device_id lm73_ids[] = {
 MODULE_DEVICE_TABLE(i2c, lm73_ids);
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm73_detect(struct i2c_client *new_client, int kind,
+static int lm73_detect(struct i2c_client *new_client,
 			struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index e392548cccb8..8fd759d28ddf 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -234,7 +234,7 @@ static const struct i2c_device_id lm75_ids[] = {
 MODULE_DEVICE_TABLE(i2c, lm75_ids);
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm75_detect(struct i2c_client *new_client, int kind,
+static int lm75_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c
index ac067fd19482..6373ab2cd952 100644
--- a/drivers/hwmon/lm77.c
+++ b/drivers/hwmon/lm77.c
@@ -66,8 +66,7 @@ struct lm77_data {
 
 static int lm77_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
-static int lm77_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info);
+static int lm77_detect(struct i2c_client *client, struct i2c_board_info *info);
 static void lm77_init_client(struct i2c_client *client);
 static int lm77_remove(struct i2c_client *client);
 static u16 lm77_read_value(struct i2c_client *client, u8 reg);
@@ -245,7 +244,7 @@ static const struct attribute_group lm77_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm77_detect(struct i2c_client *new_client, int kind,
+static int lm77_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index 5978291cebb3..f58850a9d9e0 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -142,7 +142,7 @@ struct lm78_data {
 };
 
 
-static int lm78_i2c_detect(struct i2c_client *client, int kind,
+static int lm78_i2c_detect(struct i2c_client *client,
 			   struct i2c_board_info *info);
 static int lm78_i2c_probe(struct i2c_client *client,
 			  const struct i2c_device_id *id);
@@ -558,7 +558,7 @@ static int lm78_alias_detect(struct i2c_client *client, u8 chipid)
 	return 1;
 }
 
-static int lm78_i2c_detect(struct i2c_client *client, int kind,
+static int lm78_i2c_detect(struct i2c_client *client,
 			   struct i2c_board_info *info)
 {
 	int i;
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index bcffc1899403..e3222f3d4a5e 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -133,8 +133,7 @@ struct lm80_data {
 
 static int lm80_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
-static int lm80_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info);
+static int lm80_detect(struct i2c_client *client, struct i2c_board_info *info);
 static void lm80_init_client(struct i2c_client *client);
 static int lm80_remove(struct i2c_client *client);
 static struct lm80_data *lm80_update_device(struct device *dev);
@@ -447,8 +446,7 @@ static const struct attribute_group lm80_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm80_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info)
+static int lm80_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int i, cur;
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 08b03e6ed0b7..bfb7477cb6b3 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -118,7 +118,7 @@ static const u8 LM83_REG_W_HIGH[] = {
  * Functions declaration
  */
 
-static int lm83_detect(struct i2c_client *new_client, int kind,
+static int lm83_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info);
 static int lm83_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
@@ -291,7 +291,7 @@ static const struct attribute_group lm83_group_opt = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm83_detect(struct i2c_client *new_client, int kind,
+static int lm83_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index d56da2e74708..f5fc45ac6fe5 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -323,8 +323,7 @@ struct lm85_data {
 	struct lm85_zone zone[3];
 };
 
-static int lm85_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info);
+static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info);
 static int lm85_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
 static int lm85_remove(struct i2c_client *client);
@@ -1156,8 +1155,7 @@ static int lm85_is_fake(struct i2c_client *client)
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm85_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info)
+static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int address = client->addr;
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index 4929b1815eee..d545b8596b05 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -158,7 +158,7 @@ static u8 LM87_REG_TEMP_LOW[3] = { 0x3A, 0x38, 0x2C };
 
 static int lm87_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
-static int lm87_detect(struct i2c_client *new_client, int kind,
+static int lm87_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info);
 static void lm87_init_client(struct i2c_client *client);
 static int lm87_remove(struct i2c_client *client);
@@ -662,7 +662,7 @@ static const struct attribute_group lm87_group_opt = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm87_detect(struct i2c_client *new_client, int kind,
+static int lm87_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index b7c905f50ed4..3acbacadac77 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -152,8 +152,7 @@ I2C_CLIENT_INSMOD_8(lm90, adm1032, lm99, lm86, max6657, adt7461, max6680,
  * Functions declaration
  */
 
-static int lm90_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info);
+static int lm90_detect(struct i2c_client *client, struct i2c_board_info *info);
 static int lm90_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id);
 static void lm90_init_client(struct i2c_client *client);
@@ -656,7 +655,7 @@ static int lm90_read_reg(struct i2c_client* client, u8 reg, u8 *value)
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm90_detect(struct i2c_client *new_client, int kind,
+static int lm90_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index 47ac698709dc..da354222468d 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -319,7 +319,7 @@ static const struct attribute_group lm92_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm92_detect(struct i2c_client *new_client, int kind,
+static int lm92_detect(struct i2c_client *new_client,
 		       struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index 124dd7cea54c..7b9c97b72eb8 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -2501,8 +2501,7 @@ static void lm93_init_client(struct i2c_client *client)
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm93_detect(struct i2c_client *client, int kind,
-		       struct i2c_board_info *info)
+static int lm93_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int mfr, ver;
diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c
index 906b896cf1d0..05ede4137e22 100644
--- a/drivers/hwmon/lm95241.c
+++ b/drivers/hwmon/lm95241.c
@@ -310,7 +310,7 @@ static const struct attribute_group lm95241_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm95241_detect(struct i2c_client *new_client, int kind,
+static int lm95241_detect(struct i2c_client *new_client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = new_client->adapter;
diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c
index 7fcf5ff89e7f..4baf94efd372 100644
--- a/drivers/hwmon/max1619.c
+++ b/drivers/hwmon/max1619.c
@@ -88,7 +88,7 @@ static int temp_to_reg(int val)
 
 static int max1619_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int max1619_detect(struct i2c_client *client, int kind,
+static int max1619_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void max1619_init_client(struct i2c_client *client);
 static int max1619_remove(struct i2c_client *client);
@@ -226,7 +226,7 @@ static const struct attribute_group max1619_group = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int max1619_detect(struct i2c_client *client, int kind,
+static int max1619_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index 1da561e0cb37..fd5d1acfcc95 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -116,7 +116,7 @@ I2C_CLIENT_INSMOD_1(max6650);
 
 static int max6650_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int max6650_detect(struct i2c_client *client, int kind,
+static int max6650_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int max6650_init_client(struct i2c_client *client);
 static int max6650_remove(struct i2c_client *client);
@@ -528,7 +528,7 @@ static struct attribute_group max6650_attr_grp = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int max6650_detect(struct i2c_client *client, int kind,
+static int max6650_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/pcf8591.c b/drivers/hwmon/pcf8591.c
index 1d7ffebd679d..4355aada01f2 100644
--- a/drivers/hwmon/pcf8591.c
+++ b/drivers/hwmon/pcf8591.c
@@ -169,7 +169,7 @@ static const struct attribute_group pcf8591_attr_group_opt = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int pcf8591_detect(struct i2c_client *client, int kind,
+static int pcf8591_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c
index 4d88c045781c..1683fc76759f 100644
--- a/drivers/hwmon/smsc47m192.c
+++ b/drivers/hwmon/smsc47m192.c
@@ -115,7 +115,7 @@ struct smsc47m192_data {
 
 static int smsc47m192_probe(struct i2c_client *client,
 			    const struct i2c_device_id *id);
-static int smsc47m192_detect(struct i2c_client *client, int kind,
+static int smsc47m192_detect(struct i2c_client *client,
 			     struct i2c_board_info *info);
 static int smsc47m192_remove(struct i2c_client *client);
 static struct smsc47m192_data *smsc47m192_update_device(struct device *dev);
@@ -481,7 +481,7 @@ static void smsc47m192_init_client(struct i2c_client *client)
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int smsc47m192_detect(struct i2c_client *client, int kind,
+static int smsc47m192_detect(struct i2c_client *client,
 			     struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/thmc50.c b/drivers/hwmon/thmc50.c
index 4b793849c738..02ac0d4323a4 100644
--- a/drivers/hwmon/thmc50.c
+++ b/drivers/hwmon/thmc50.c
@@ -84,7 +84,7 @@ struct thmc50_data {
 	u8 alarms;
 };
 
-static int thmc50_detect(struct i2c_client *client, int kind,
+static int thmc50_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int thmc50_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
@@ -286,7 +286,7 @@ static const struct attribute_group temp3_group = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int thmc50_detect(struct i2c_client *client, int kind,
+static int thmc50_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	unsigned company;
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index ee9673467c4a..7cf1d541a5fa 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -98,7 +98,7 @@ static const u8 TMP411_TEMP_HIGHEST_LSB[2]		= { 0x33, 0x37 };
 
 static int tmp401_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
-static int tmp401_detect(struct i2c_client *client, int kind,
+static int tmp401_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int tmp401_remove(struct i2c_client *client);
 static struct tmp401_data *tmp401_update_device(struct device *dev);
@@ -488,7 +488,7 @@ static void tmp401_init_client(struct i2c_client *client)
 		i2c_smbus_write_byte_data(client, TMP401_CONFIG_WRITE, config);
 }
 
-static int tmp401_detect(struct i2c_client *client, int _kind,
+static int tmp401_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	enum chips kind;
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index bb5464a289ca..34eb34c548ff 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -223,7 +223,7 @@ static int tmp421_init_client(struct i2c_client *client)
 	return 0;
 }
 
-static int tmp421_detect(struct i2c_client *client, int _kind,
+static int tmp421_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	enum chips kind;
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 7ab7967da0a0..44704d2dee63 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -1051,8 +1051,7 @@ w83781d_create_files(struct device *dev, int kind, int is_isa)
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int
-w83781d_detect(struct i2c_client *client, int kind,
-	       struct i2c_board_info *info)
+w83781d_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	int val1, val2;
 	struct w83781d_data *isa = w83781d_data_if_isa();
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index 0410bf12c521..b3e91b6e651a 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -326,7 +326,7 @@ struct w83791d_data {
 
 static int w83791d_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int w83791d_detect(struct i2c_client *client, int kind,
+static int w83791d_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int w83791d_remove(struct i2c_client *client);
 
@@ -1259,7 +1259,7 @@ error_sc_0:
 
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int w83791d_detect(struct i2c_client *client, int kind,
+static int w83791d_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index 38978851333f..03b836cdafa6 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -302,7 +302,7 @@ struct w83792d_data {
 
 static int w83792d_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int w83792d_detect(struct i2c_client *client, int kind,
+static int w83792d_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int w83792d_remove(struct i2c_client *client);
 static struct w83792d_data *w83792d_update_device(struct device *dev);
@@ -1263,7 +1263,7 @@ static const struct attribute_group w83792d_group = {
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int
-w83792d_detect(struct i2c_client *client, int kind, struct i2c_board_info *info)
+w83792d_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int val1, val2;
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 80a2191bf127..acf35e6e2ab9 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -230,7 +230,7 @@ static u8 w83793_read_value(struct i2c_client *client, u16 reg);
 static int w83793_write_value(struct i2c_client *client, u16 reg, u8 value);
 static int w83793_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
-static int w83793_detect(struct i2c_client *client, int kind,
+static int w83793_detect(struct i2c_client *client,
 			 struct i2c_board_info *info);
 static int w83793_remove(struct i2c_client *client);
 static void w83793_init_client(struct i2c_client *client);
@@ -1161,7 +1161,7 @@ ERROR_SC_0:
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int w83793_detect(struct i2c_client *client, int kind,
+static int w83793_detect(struct i2c_client *client,
 			 struct i2c_board_info *info)
 {
 	u8 tmp, bank, chip_id;
diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c
index 9b6c4c10fba7..ec6e4b7fb741 100644
--- a/drivers/hwmon/w83l785ts.c
+++ b/drivers/hwmon/w83l785ts.c
@@ -83,7 +83,7 @@ I2C_CLIENT_INSMOD_1(w83l785ts);
 
 static int w83l785ts_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id);
-static int w83l785ts_detect(struct i2c_client *client, int kind,
+static int w83l785ts_detect(struct i2c_client *client,
 			    struct i2c_board_info *info);
 static int w83l785ts_remove(struct i2c_client *client);
 static u8 w83l785ts_read_value(struct i2c_client *client, u8 reg, u8 defval);
@@ -146,7 +146,7 @@ static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL, 1);
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int w83l785ts_detect(struct i2c_client *client, int kind,
+static int w83l785ts_detect(struct i2c_client *client,
 			    struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c
index 27da7d2b15fb..12a5fd67bee0 100644
--- a/drivers/hwmon/w83l786ng.c
+++ b/drivers/hwmon/w83l786ng.c
@@ -147,7 +147,7 @@ struct w83l786ng_data {
 
 static int w83l786ng_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id);
-static int w83l786ng_detect(struct i2c_client *client, int kind,
+static int w83l786ng_detect(struct i2c_client *client,
 			    struct i2c_board_info *info);
 static int w83l786ng_remove(struct i2c_client *client);
 static void w83l786ng_init_client(struct i2c_client *client);
@@ -586,8 +586,7 @@ static const struct attribute_group w83l786ng_group = {
 };
 
 static int
-w83l786ng_detect(struct i2c_client *client, int kind,
-		 struct i2c_board_info *info)
+w83l786ng_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	u16 man_id;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 4f34823e86b1..c1047d644d8f 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1184,7 +1184,7 @@ static int i2c_detect_address(struct i2c_client *temp_client,
 	/* Finally call the custom detection function */
 	memset(&info, 0, sizeof(struct i2c_board_info));
 	info.addr = addr;
-	err = driver->detect(temp_client, -1, &info);
+	err = driver->detect(temp_client, &info);
 	if (err) {
 		/* -ENODEV is returned if the detection fails. We catch it
 		   here as this isn't an error. */
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 2c27193aeaa0..2c428f464539 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -135,8 +135,7 @@ static struct bin_attribute eeprom_attr = {
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int eeprom_detect(struct i2c_client *client, int kind,
-			 struct i2c_board_info *info)
+static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
 
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 4bb7a3af9ad9..75097ec43edd 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -106,7 +106,7 @@ struct ics932s401_data {
 
 static int ics932s401_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id);
-static int ics932s401_detect(struct i2c_client *client, int kind,
+static int ics932s401_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int ics932s401_remove(struct i2c_client *client);
 
@@ -413,7 +413,7 @@ static ssize_t show_spread(struct device *dev,
 }
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int ics932s401_detect(struct i2c_client *client, int kind,
+static int ics932s401_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
 {
 	struct i2c_adapter *adapter = client->adapter;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 419ab546b266..f6f2c080ba67 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -161,7 +161,7 @@ struct i2c_driver {
 	const struct i2c_device_id *id_table;
 
 	/* Device detection callback for automatic device creation */
-	int (*detect)(struct i2c_client *, int kind, struct i2c_board_info *);
+	int (*detect)(struct i2c_client *, struct i2c_board_info *);
 	const struct i2c_client_address_data *address_data;
 	struct list_head clients;
 };
-- 
cgit v1.2.3


From c3813d6af177fab19e322f3114b1f64fbcf08d71 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 14 Dec 2009 21:17:25 +0100
Subject: i2c: Get rid of struct i2c_client_address_data

Struct i2c_client_address_data only contains one field at this point,
which makes its usefulness questionable. Get rid of it and pass simple
address lists around instead.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 Documentation/i2c/writing-clients |  2 +-
 drivers/hwmon/adm1021.c           |  2 +-
 drivers/hwmon/adm1025.c           |  2 +-
 drivers/hwmon/adm1026.c           |  2 +-
 drivers/hwmon/adm1029.c           |  2 +-
 drivers/hwmon/adm1031.c           |  2 +-
 drivers/hwmon/adm9240.c           |  2 +-
 drivers/hwmon/ads7828.c           |  2 +-
 drivers/hwmon/adt7462.c           |  2 +-
 drivers/hwmon/adt7470.c           |  2 +-
 drivers/hwmon/adt7473.c           |  2 +-
 drivers/hwmon/adt7475.c           |  2 +-
 drivers/hwmon/asb100.c            |  2 +-
 drivers/hwmon/atxp1.c             |  2 +-
 drivers/hwmon/dme1737.c           |  2 +-
 drivers/hwmon/ds1621.c            |  2 +-
 drivers/hwmon/f75375s.c           |  2 +-
 drivers/hwmon/fschmd.c            |  2 +-
 drivers/hwmon/gl518sm.c           |  2 +-
 drivers/hwmon/gl520sm.c           |  2 +-
 drivers/hwmon/lm63.c              |  2 +-
 drivers/hwmon/lm73.c              |  2 +-
 drivers/hwmon/lm75.c              |  2 +-
 drivers/hwmon/lm77.c              |  2 +-
 drivers/hwmon/lm78.c              |  2 +-
 drivers/hwmon/lm80.c              |  2 +-
 drivers/hwmon/lm83.c              |  2 +-
 drivers/hwmon/lm85.c              |  2 +-
 drivers/hwmon/lm87.c              |  2 +-
 drivers/hwmon/lm90.c              |  2 +-
 drivers/hwmon/lm92.c              |  2 +-
 drivers/hwmon/lm93.c              |  2 +-
 drivers/hwmon/lm95241.c           |  2 +-
 drivers/hwmon/max1619.c           |  2 +-
 drivers/hwmon/max6650.c           |  2 +-
 drivers/hwmon/pcf8591.c           |  2 +-
 drivers/hwmon/smsc47m192.c        |  2 +-
 drivers/hwmon/thmc50.c            |  2 +-
 drivers/hwmon/tmp401.c            |  2 +-
 drivers/hwmon/tmp421.c            |  2 +-
 drivers/hwmon/w83781d.c           |  2 +-
 drivers/hwmon/w83791d.c           |  2 +-
 drivers/hwmon/w83792d.c           |  2 +-
 drivers/hwmon/w83793.c            |  2 +-
 drivers/hwmon/w83l785ts.c         |  2 +-
 drivers/hwmon/w83l786ng.c         |  2 +-
 drivers/i2c/i2c-core.c            | 15 +++++++------
 drivers/misc/eeprom/eeprom.c      |  2 +-
 drivers/misc/ics932s401.c         |  2 +-
 include/linux/i2c.h               | 44 ++++++++++-----------------------------
 50 files changed, 66 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 7860aafb483d..0a74603eb671 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -44,7 +44,7 @@ static struct i2c_driver foo_driver = {
 	/* if device autodetection is needed: */
 	.class		= I2C_CLASS_SOMETHING,
 	.detect		= foo_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 
 	.shutdown	= foo_shutdown,	/* optional */
 	.suspend	= foo_suspend,	/* optional */
diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index 3ebcdd9c25a5..d7af039a4d43 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -130,7 +130,7 @@ static struct i2c_driver adm1021_driver = {
 	.remove		= adm1021_remove,
 	.id_table	= adm1021_id,
 	.detect		= adm1021_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static ssize_t show_temp(struct device *dev,
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index 357c9ffa147e..e17651083b09 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -137,7 +137,7 @@ static struct i2c_driver adm1025_driver = {
 	.remove		= adm1025_remove,
 	.id_table	= adm1025_id,
 	.detect		= adm1025_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index 8deb17a402da..85bf23aea7db 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -319,7 +319,7 @@ static struct i2c_driver adm1026_driver = {
 	.remove		= adm1026_remove,
 	.id_table	= adm1026_id,
 	.detect		= adm1026_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int adm1026_read_value(struct i2c_client *client, u8 reg)
diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c
index 9bc9dbcacdbd..a006ae5fbd2b 100644
--- a/drivers/hwmon/adm1029.c
+++ b/drivers/hwmon/adm1029.c
@@ -142,7 +142,7 @@ static struct i2c_driver adm1029_driver = {
 	.remove		= adm1029_remove,
 	.id_table	= adm1029_id,
 	.detect		= adm1029_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c
index cebfbf6926da..1e02799b870e 100644
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -125,7 +125,7 @@ static struct i2c_driver adm1031_driver = {
 	.remove		= adm1031_remove,
 	.id_table	= adm1031_id,
 	.detect		= adm1031_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static inline u8 adm1031_read_value(struct i2c_client *client, u8 reg)
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index 9316e074d690..d9942e74ed4a 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -156,7 +156,7 @@ static struct i2c_driver adm9240_driver = {
 	.remove		= adm9240_remove,
 	.id_table	= adm9240_id,
 	.detect		= adm9240_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* per client data */
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index b50893111532..3827ce4be071 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -183,7 +183,7 @@ static struct i2c_driver ads7828_driver = {
 	.remove = ads7828_remove,
 	.id_table = ads7828_id,
 	.detect = ads7828_detect,
-	.address_data = &addr_data,
+	.address_list = normal_i2c,
 };
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index 30cf002c677f..325700428ef0 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -256,7 +256,7 @@ static struct i2c_driver adt7462_driver = {
 	.remove		= adt7462_remove,
 	.id_table	= adt7462_id,
 	.detect		= adt7462_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 9ffe5c6e495d..33aa0fa3e990 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -196,7 +196,7 @@ static struct i2c_driver adt7470_driver = {
 	.remove		= adt7470_remove,
 	.id_table	= adt7470_id,
 	.detect		= adt7470_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index 78fadaa431e2..1535733ddf19 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -184,7 +184,7 @@ static struct i2c_driver adt7473_driver = {
 	.remove		= adt7473_remove,
 	.id_table	= adt7473_id,
 	.detect		= adt7473_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 3c0571551a9f..1fb8940428c6 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -1412,7 +1412,7 @@ static struct i2c_driver adt7475_driver = {
 	.remove		= adt7475_remove,
 	.id_table	= adt7475_id,
 	.detect		= adt7475_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static void adt7475_read_hystersis(struct i2c_client *client)
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index 507e116d5456..a92512a4a366 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -230,7 +230,7 @@ static struct i2c_driver asb100_driver = {
 	.remove		= asb100_remove,
 	.id_table	= asb100_id,
 	.detect		= asb100_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* 7 Voltages */
diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index 6b7459745b66..b0c3051d8a4f 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -67,7 +67,7 @@ static struct i2c_driver atxp1_driver = {
 	.remove		= atxp1_remove,
 	.id_table	= atxp1_id,
 	.detect		= atxp1_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 struct atxp1_data {
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 7024617c1194..a3af09f9dbad 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -2318,7 +2318,7 @@ static struct i2c_driver dme1737_i2c_driver = {
 	.remove = dme1737_i2c_remove,
 	.id_table = dme1737_id,
 	.detect = dme1737_i2c_detect,
-	.address_data = &addr_data,
+	.address_list = normal_i2c,
 };
 
 /* ---------------------------------------------------------------------
diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c
index 5fde2f5139fd..dfa4329090d4 100644
--- a/drivers/hwmon/ds1621.c
+++ b/drivers/hwmon/ds1621.c
@@ -321,7 +321,7 @@ static struct i2c_driver ds1621_driver = {
 	.remove		= ds1621_remove,
 	.id_table	= ds1621_id,
 	.detect		= ds1621_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init ds1621_init(void)
diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
index 2ffcf56b6f4e..f8992c935666 100644
--- a/drivers/hwmon/f75375s.c
+++ b/drivers/hwmon/f75375s.c
@@ -135,7 +135,7 @@ static struct i2c_driver f75375_driver = {
 	.remove = f75375_remove,
 	.id_table = f75375_id,
 	.detect = f75375_detect,
-	.address_data = &addr_data,
+	.address_list = normal_i2c,
 };
 
 static inline int f75375_read8(struct i2c_client *client, u8 reg)
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index bce18e0f1d61..4eebbbeba2d1 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -251,7 +251,7 @@ static struct i2c_driver fschmd_driver = {
 	.remove		= fschmd_remove,
 	.id_table	= fschmd_id,
 	.detect		= fschmd_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c
index 34f83c6a3f45..e9407acd72cb 100644
--- a/drivers/hwmon/gl518sm.c
+++ b/drivers/hwmon/gl518sm.c
@@ -162,7 +162,7 @@ static struct i2c_driver gl518_driver = {
 	.remove		= gl518_remove,
 	.id_table	= gl518_id,
 	.detect		= gl518_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index d03ba692fc42..c0ec8c28731e 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -104,7 +104,7 @@ static struct i2c_driver gl520_driver = {
 	.remove		= gl520_remove,
 	.id_table	= gl520_id,
 	.detect		= gl520_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* Client data */
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 26844fc4a66d..1426a455071c 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -156,7 +156,7 @@ static struct i2c_driver lm63_driver = {
 	.remove		= lm63_remove,
 	.id_table	= lm63_id,
 	.detect		= lm63_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/lm73.c b/drivers/hwmon/lm73.c
index e610da9bd80c..fb6ab9a9a608 100644
--- a/drivers/hwmon/lm73.c
+++ b/drivers/hwmon/lm73.c
@@ -182,7 +182,7 @@ static struct i2c_driver lm73_driver = {
 	.remove		= lm73_remove,
 	.id_table	= lm73_ids,
 	.detect		= lm73_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* module glue */
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 8fd759d28ddf..ce2423cd8198 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -295,7 +295,7 @@ static struct i2c_driver lm75_driver = {
 	.remove		= lm75_remove,
 	.id_table	= lm75_ids,
 	.detect		= lm75_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*-----------------------------------------------------------------------*/
diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c
index 6373ab2cd952..b6105e570b0e 100644
--- a/drivers/hwmon/lm77.c
+++ b/drivers/hwmon/lm77.c
@@ -91,7 +91,7 @@ static struct i2c_driver lm77_driver = {
 	.remove		= lm77_remove,
 	.id_table	= lm77_id,
 	.detect		= lm77_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* straight from the datasheet */
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index f58850a9d9e0..cd6a9ea921f6 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -173,7 +173,7 @@ static struct i2c_driver lm78_driver = {
 	.remove		= lm78_i2c_remove,
 	.id_table	= lm78_i2c_id,
 	.detect		= lm78_i2c_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static struct platform_driver lm78_isa_driver = {
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index e3222f3d4a5e..1cf5ff5bfa43 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -159,7 +159,7 @@ static struct i2c_driver lm80_driver = {
 	.remove		= lm80_remove,
 	.id_table	= lm80_id,
 	.detect		= lm80_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index bfb7477cb6b3..b80ae182f851 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -145,7 +145,7 @@ static struct i2c_driver lm83_driver = {
 	.remove		= lm83_remove,
 	.id_table	= lm83_id,
 	.detect		= lm83_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index f5fc45ac6fe5..d29bd34a265e 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -356,7 +356,7 @@ static struct i2c_driver lm85_driver = {
 	.remove		= lm85_remove,
 	.id_table	= lm85_id,
 	.detect		= lm85_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index d545b8596b05..60d34bc578cb 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -184,7 +184,7 @@ static struct i2c_driver lm87_driver = {
 	.remove		= lm87_remove,
 	.id_table	= lm87_id,
 	.detect		= lm87_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 3acbacadac77..3e916ac97ea8 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -191,7 +191,7 @@ static struct i2c_driver lm90_driver = {
 	.remove		= lm90_remove,
 	.id_table	= lm90_id,
 	.detect		= lm90_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index da354222468d..b582b3b7fdee 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -416,7 +416,7 @@ static struct i2c_driver lm92_driver = {
 	.remove		= lm92_remove,
 	.id_table	= lm92_id,
 	.detect		= lm92_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init sensors_lm92_init(void)
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index 7b9c97b72eb8..d160dfdb513f 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -2616,7 +2616,7 @@ static struct i2c_driver lm93_driver = {
 	.remove		= lm93_remove,
 	.id_table	= lm93_id,
 	.detect		= lm93_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init lm93_init(void)
diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c
index 05ede4137e22..55e3bfd49706 100644
--- a/drivers/hwmon/lm95241.c
+++ b/drivers/hwmon/lm95241.c
@@ -460,7 +460,7 @@ static struct i2c_driver lm95241_driver = {
 	.remove		= lm95241_remove,
 	.id_table	= lm95241_id,
 	.detect		= lm95241_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init sensors_lm95241_init(void)
diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c
index 4baf94efd372..94cea29f157b 100644
--- a/drivers/hwmon/max1619.c
+++ b/drivers/hwmon/max1619.c
@@ -113,7 +113,7 @@ static struct i2c_driver max1619_driver = {
 	.remove		= max1619_remove,
 	.id_table	= max1619_id,
 	.detect		= max1619_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index fd5d1acfcc95..c7c126cf22dd 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -141,7 +141,7 @@ static struct i2c_driver max6650_driver = {
 	.remove		= max6650_remove,
 	.id_table	= max6650_id,
 	.detect		= max6650_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/pcf8591.c b/drivers/hwmon/pcf8591.c
index 4355aada01f2..c19e61bd393c 100644
--- a/drivers/hwmon/pcf8591.c
+++ b/drivers/hwmon/pcf8591.c
@@ -299,7 +299,7 @@ static struct i2c_driver pcf8591_driver = {
 
 	.class		= I2C_CLASS_HWMON,	/* Nearest choice */
 	.detect		= pcf8591_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init pcf8591_init(void)
diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c
index 1683fc76759f..34df2e2ee28a 100644
--- a/drivers/hwmon/smsc47m192.c
+++ b/drivers/hwmon/smsc47m192.c
@@ -135,7 +135,7 @@ static struct i2c_driver smsc47m192_driver = {
 	.remove		= smsc47m192_remove,
 	.id_table	= smsc47m192_id,
 	.detect		= smsc47m192_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* Voltages */
diff --git a/drivers/hwmon/thmc50.c b/drivers/hwmon/thmc50.c
index 02ac0d4323a4..866d66507596 100644
--- a/drivers/hwmon/thmc50.c
+++ b/drivers/hwmon/thmc50.c
@@ -108,7 +108,7 @@ static struct i2c_driver thmc50_driver = {
 	.remove = thmc50_remove,
 	.id_table = thmc50_id,
 	.detect = thmc50_detect,
-	.address_data = &addr_data,
+	.address_list = normal_i2c,
 };
 
 static ssize_t show_analog_out(struct device *dev,
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index 7cf1d541a5fa..ed086491cc9a 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -123,7 +123,7 @@ static struct i2c_driver tmp401_driver = {
 	.remove		= tmp401_remove,
 	.id_table	= tmp401_id,
 	.detect		= tmp401_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index 34eb34c548ff..018ad028c179 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -322,7 +322,7 @@ static struct i2c_driver tmp421_driver = {
 	.remove = tmp421_remove,
 	.id_table = tmp421_id,
 	.detect = tmp421_detect,
-	.address_data = &addr_data,
+	.address_list = normal_i2c,
 };
 
 static int __init tmp421_init(void)
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 44704d2dee63..bfaa888f6e47 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -1536,7 +1536,7 @@ static struct i2c_driver w83781d_driver = {
 	.remove		= w83781d_remove,
 	.id_table	= w83781d_ids,
 	.detect		= w83781d_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index b3e91b6e651a..e059cf0471b0 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -355,7 +355,7 @@ static struct i2c_driver w83791d_driver = {
 	.remove		= w83791d_remove,
 	.id_table	= w83791d_id,
 	.detect		= w83791d_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /* following are the sysfs callback functions */
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index 03b836cdafa6..c6f198a3d924 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -328,7 +328,7 @@ static struct i2c_driver w83792d_driver = {
 	.remove		= w83792d_remove,
 	.id_table	= w83792d_id,
 	.detect		= w83792d_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static inline long in_count_from_reg(int nr, struct w83792d_data *data)
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index acf35e6e2ab9..ed32b18fbc42 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -252,7 +252,7 @@ static struct i2c_driver w83793_driver = {
 	.remove		= w83793_remove,
 	.id_table	= w83793_id,
 	.detect		= w83793_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static ssize_t
diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c
index ec6e4b7fb741..81c59937cf0d 100644
--- a/drivers/hwmon/w83l785ts.c
+++ b/drivers/hwmon/w83l785ts.c
@@ -108,7 +108,7 @@ static struct i2c_driver w83l785ts_driver = {
 	.remove		= w83l785ts_remove,
 	.id_table	= w83l785ts_id,
 	.detect		= w83l785ts_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 /*
diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c
index 12a5fd67bee0..a427347ae82b 100644
--- a/drivers/hwmon/w83l786ng.c
+++ b/drivers/hwmon/w83l786ng.c
@@ -168,7 +168,7 @@ static struct i2c_driver w83l786ng_driver = {
 	.remove		= w83l786ng_remove,
 	.id_table	= w83l786ng_id,
 	.detect		= w83l786ng_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static u8
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c1047d644d8f..9065c7238b5e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1214,13 +1214,13 @@ static int i2c_detect_address(struct i2c_client *temp_client,
 
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 {
-	const struct i2c_client_address_data *address_data;
+	const unsigned short *address_list;
 	struct i2c_client *temp_client;
 	int i, err = 0;
 	int adap_id = i2c_adapter_id(adapter);
 
-	address_data = driver->address_data;
-	if (!driver->detect || !address_data)
+	address_list = driver->address_list;
+	if (!driver->detect || !address_list)
 		return 0;
 
 	/* Set up a temporary client to help detect callback */
@@ -1235,7 +1235,7 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 
 	/* Stop here if we can't use SMBUS_QUICK */
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
-		if (address_data->normal_i2c[0] == I2C_CLIENT_END)
+		if (address_list[0] == I2C_CLIENT_END)
 			goto exit_free;
 
 		dev_warn(&adapter->dev, "SMBus Quick command not supported, "
@@ -1244,11 +1244,10 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 		goto exit_free;
 	}
 
-	for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
+	for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) {
 		dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
-			"addr 0x%02x\n", adap_id,
-			address_data->normal_i2c[i]);
-		temp_client->addr = address_data->normal_i2c[i];
+			"addr 0x%02x\n", adap_id, address_list[i]);
+		temp_client->addr = address_list[i];
 		err = i2c_detect_address(temp_client, driver);
 		if (err)
 			goto exit_free;
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 2c428f464539..3dc5e3db2c12 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -232,7 +232,7 @@ static struct i2c_driver eeprom_driver = {
 
 	.class		= I2C_CLASS_DDC | I2C_CLASS_SPD,
 	.detect		= eeprom_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static int __init eeprom_init(void)
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 75097ec43edd..d8a84718d687 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -125,7 +125,7 @@ static struct i2c_driver ics932s401_driver = {
 	.remove		= ics932s401_remove,
 	.id_table	= ics932s401_id,
 	.detect		= ics932s401_detect,
-	.address_data	= &addr_data,
+	.address_list	= normal_i2c,
 };
 
 static struct ics932s401_data *ics932s401_update_device(struct device *dev)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f6f2c080ba67..fb9df1416ad5 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -110,7 +110,7 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * @driver: Device driver model driver
  * @id_table: List of I2C devices supported by this driver
  * @detect: Callback for device detection
- * @address_data: The I2C addresses to probe (for detect)
+ * @address_list: The I2C addresses to probe (for detect)
  * @clients: List of detected clients we created (for i2c-core use only)
  *
  * The driver.owner field should be set to the module owner of this driver.
@@ -162,7 +162,7 @@ struct i2c_driver {
 
 	/* Device detection callback for automatic device creation */
 	int (*detect)(struct i2c_client *, struct i2c_board_info *);
-	const struct i2c_client_address_data *address_data;
+	const unsigned short *address_list;
 	struct list_head clients;
 };
 #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)
@@ -391,14 +391,6 @@ static inline void i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
 #define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
 
-/* i2c_client_address_data is the struct for holding default client
- * addresses for a driver and for the parameters supplied on the
- * command line
- */
-struct i2c_client_address_data {
-	const unsigned short *normal_i2c;
-};
-
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
 
@@ -610,48 +602,34 @@ union i2c_smbus_data {
   module_param_array(var, short, &var##_num, 0); \
   MODULE_PARM_DESC(var, desc)
 
-#define I2C_CLIENT_INSMOD_COMMON					\
-static const struct i2c_client_address_data addr_data = {		\
-	.normal_i2c	= normal_i2c,					\
-}
-
 /* These are the ones you want to use in your own drivers. Pick the one
    which matches the number of devices the driver differenciates between. */
-#define I2C_CLIENT_INSMOD						\
-I2C_CLIENT_INSMOD_COMMON
+#define I2C_CLIENT_INSMOD
 
 #define I2C_CLIENT_INSMOD_1(chip1)					\
-enum chips { any_chip, chip1 };						\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1 }
 
 #define I2C_CLIENT_INSMOD_2(chip1, chip2)				\
-enum chips { any_chip, chip1, chip2 };					\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1, chip2 }
 
 #define I2C_CLIENT_INSMOD_3(chip1, chip2, chip3)			\
-enum chips { any_chip, chip1, chip2, chip3 };				\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1, chip2, chip3 }
 
 #define I2C_CLIENT_INSMOD_4(chip1, chip2, chip3, chip4)			\
-enum chips { any_chip, chip1, chip2, chip3, chip4 };			\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1, chip2, chip3, chip4 }
 
 #define I2C_CLIENT_INSMOD_5(chip1, chip2, chip3, chip4, chip5)		\
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 };		\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 }
 
 #define I2C_CLIENT_INSMOD_6(chip1, chip2, chip3, chip4, chip5, chip6)	\
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 };	\
-I2C_CLIENT_INSMOD_COMMON
+enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 }
 
 #define I2C_CLIENT_INSMOD_7(chip1, chip2, chip3, chip4, chip5, chip6, chip7) \
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
-	     chip7 };							\
-I2C_CLIENT_INSMOD_COMMON
+	     chip7 }
 
 #define I2C_CLIENT_INSMOD_8(chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8) \
 enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
-	     chip7, chip8 };						\
-I2C_CLIENT_INSMOD_COMMON
+	     chip7, chip8 }
 #endif /* __KERNEL__ */
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 1f86df49ddfd0067cce941187d57b2fd2f749a9e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 14 Dec 2009 21:17:26 +0100
Subject: i2c: Drop I2C_CLIENT_INSMOD_1

This macro simply declares an enum, so drivers might as well declare
it themselves.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/hwmon/adm1026.c      |  5 +----
 drivers/hwmon/adm1029.c      |  8 +-------
 drivers/hwmon/ads7828.c      |  7 ++-----
 drivers/hwmon/adt7462.c      |  5 +----
 drivers/hwmon/adt7470.c      |  5 +----
 drivers/hwmon/adt7473.c      |  5 +----
 drivers/hwmon/asb100.c       |  5 +----
 drivers/hwmon/atxp1.c        |  4 +---
 drivers/hwmon/ds1621.c       |  5 ++---
 drivers/hwmon/gl520sm.c      |  5 +----
 drivers/hwmon/lm63.c         |  8 +-------
 drivers/hwmon/lm73.c         |  5 +----
 drivers/hwmon/lm75.c         | 10 ++--------
 drivers/hwmon/lm77.c         |  5 +----
 drivers/hwmon/lm80.c         |  5 +----
 drivers/hwmon/lm92.c         |  5 +----
 drivers/hwmon/lm93.c         |  3 +--
 drivers/hwmon/lm95241.c      |  5 +----
 drivers/hwmon/max1619.c      |  8 +-------
 drivers/hwmon/max6650.c      |  4 +---
 drivers/hwmon/pcf8591.c      |  1 -
 drivers/hwmon/smsc47m192.c   |  5 +----
 drivers/hwmon/w83791d.c      |  3 +--
 drivers/hwmon/w83792d.c      |  3 +--
 drivers/hwmon/w83793.c       |  3 +--
 drivers/hwmon/w83l785ts.c    |  8 +-------
 drivers/hwmon/w83l786ng.c    |  3 +--
 drivers/misc/eeprom/eeprom.c |  3 ---
 drivers/misc/ics932s401.c    |  5 +----
 include/linux/i2c.h          |  5 -----
 30 files changed, 30 insertions(+), 121 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index 85bf23aea7db..65335b268fa9 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -37,9 +37,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(adm1026);
-
 static int gpio_input[17] = { -1, -1, -1, -1, -1, -1, -1, -1, -1,
 				-1, -1, -1, -1, -1, -1, -1, -1 };
 static int gpio_output[17] = { -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -305,7 +302,7 @@ static void adm1026_init_client(struct i2c_client *client);
 
 
 static const struct i2c_device_id adm1026_id[] = {
-	{ "adm1026", adm1026 },
+	{ "adm1026", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adm1026_id);
diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c
index a006ae5fbd2b..0b8a3b145bd2 100644
--- a/drivers/hwmon/adm1029.c
+++ b/drivers/hwmon/adm1029.c
@@ -43,12 +43,6 @@ static const unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d,
 						0x2e, 0x2f, I2C_CLIENT_END
 };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(adm1029);
-
 /*
  * The ADM1029 registers
  * Manufacturer ID is 0x41 for Analog Devices
@@ -128,7 +122,7 @@ static int adm1029_init_client(struct i2c_client *client);
  */
 
 static const struct i2c_device_id adm1029_id[] = {
-	{ "adm1029", adm1029 },
+	{ "adm1029", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adm1029_id);
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index 3827ce4be071..aac85f3aed50 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -47,10 +47,7 @@
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 	I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(ads7828);
-
-/* Other module parameters */
+/* Module parameters */
 static int se_input = 1; /* Default is SE, 0 == diff */
 static int int_vref = 1; /* Default is internal ref ON */
 static int vref_mv = ADS7828_INT_VREF_MV; /* set if vref != 2.5V */
@@ -168,7 +165,7 @@ static int ads7828_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id ads7828_id[] = {
-	{ "ads7828", ads7828 },
+	{ "ads7828", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ads7828_id);
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index 325700428ef0..a1a7ef14b519 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -32,9 +32,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x58, 0x5C, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(adt7462);
-
 /* ADT7462 registers */
 #define ADT7462_REG_DEVICE			0x3D
 #define ADT7462_REG_VENDOR			0x3E
@@ -242,7 +239,7 @@ static int adt7462_detect(struct i2c_client *client,
 static int adt7462_remove(struct i2c_client *client);
 
 static const struct i2c_device_id adt7462_id[] = {
-	{ "adt7462", adt7462 },
+	{ "adt7462", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adt7462_id);
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 33aa0fa3e990..3445ce1cba81 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -33,9 +33,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(adt7470);
-
 /* ADT7470 registers */
 #define ADT7470_REG_BASE_ADDR			0x20
 #define ADT7470_REG_TEMP_BASE_ADDR		0x20
@@ -182,7 +179,7 @@ static int adt7470_detect(struct i2c_client *client,
 static int adt7470_remove(struct i2c_client *client);
 
 static const struct i2c_device_id adt7470_id[] = {
-	{ "adt7470", adt7470 },
+	{ "adt7470", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adt7470_id);
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index 1535733ddf19..434576f61c84 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -32,9 +32,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2C, 0x2D, 0x2E, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(adt7473);
-
 /* ADT7473 registers */
 #define ADT7473_REG_BASE_ADDR			0x20
 
@@ -171,7 +168,7 @@ static int adt7473_detect(struct i2c_client *client,
 static int adt7473_remove(struct i2c_client *client);
 
 static const struct i2c_device_id adt7473_id[] = {
-	{ "adt7473", adt7473 },
+	{ "adt7473", 0 },
 	{ }
 };
 
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index a92512a4a366..7dada559b3a1 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -51,9 +51,6 @@
 /* I2C addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2d, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(asb100);
-
 static unsigned short force_subclients[4];
 module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
@@ -216,7 +213,7 @@ static struct asb100_data *asb100_update_device(struct device *dev);
 static void asb100_init_client(struct i2c_client *client);
 
 static const struct i2c_device_id asb100_id[] = {
-	{ "asb100", asb100 },
+	{ "asb100", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, asb100_id);
diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index b0c3051d8a4f..94cadc19f0c5 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -44,8 +44,6 @@ MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
 
 static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END };
 
-I2C_CLIENT_INSMOD_1(atxp1);
-
 static int atxp1_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id);
 static int atxp1_remove(struct i2c_client *client);
@@ -53,7 +51,7 @@ static struct atxp1_data * atxp1_update_device(struct device *dev);
 static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info);
 
 static const struct i2c_device_id atxp1_id[] = {
-	{ "atxp1", atxp1 },
+	{ "atxp1", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, atxp1_id);
diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c
index dfa4329090d4..e11363467a8d 100644
--- a/drivers/hwmon/ds1621.c
+++ b/drivers/hwmon/ds1621.c
@@ -38,7 +38,6 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c,
 					0x4d, 0x4e, 0x4f, I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(ds1621);
 static int polarity = -1;
 module_param(polarity, int, 0);
 MODULE_PARM_DESC(polarity, "Output's polarity: 0 = active high, 1 = active low");
@@ -305,8 +304,8 @@ static int ds1621_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id ds1621_id[] = {
-	{ "ds1621", ds1621 },
-	{ "ds1625", ds1621 },
+	{ "ds1621", 0 },
+	{ "ds1625", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ds1621_id);
diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index c0ec8c28731e..ec588026f0a9 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -41,9 +41,6 @@ MODULE_PARM_DESC(extra_sensor_type, "Type of extra sensor (0=autodetect, 1=tempe
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(gl520sm);
-
 /* Many GL520 constants specified below
 One of the inputs can be configured as either temp or voltage.
 That's why _TEMP2 and _IN4 access the same register
@@ -90,7 +87,7 @@ static struct gl520_data *gl520_update_device(struct device *dev);
 
 /* Driver data */
 static const struct i2c_device_id gl520_id[] = {
-	{ "gl520sm", gl520sm },
+	{ "gl520sm", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, gl520_id);
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 1426a455071c..bf81aff7051d 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -55,12 +55,6 @@
 
 static const unsigned short normal_i2c[] = { 0x4c, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(lm63);
-
 /*
  * The LM63 registers
  */
@@ -142,7 +136,7 @@ static void lm63_init_client(struct i2c_client *client);
  */
 
 static const struct i2c_device_id lm63_id[] = {
-	{ "lm63", lm63 },
+	{ "lm63", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm63_id);
diff --git a/drivers/hwmon/lm73.c b/drivers/hwmon/lm73.c
index fb6ab9a9a608..c5f39ba103c0 100644
--- a/drivers/hwmon/lm73.c
+++ b/drivers/hwmon/lm73.c
@@ -27,9 +27,6 @@
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c,
 					0x4d, 0x4e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm73);
-
 /* LM73 registers */
 #define LM73_REG_INPUT		0x00
 #define LM73_REG_CONF		0x01
@@ -145,7 +142,7 @@ static int lm73_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id lm73_ids[] = {
-	{ "lm73", lm73 },
+	{ "lm73", 0 },
 	{ /* LIST END */ }
 };
 MODULE_DEVICE_TABLE(i2c, lm73_ids);
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index ce2423cd8198..8ae2cfe2d827 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -32,15 +32,12 @@
 
 /*
  * This driver handles the LM75 and compatible digital temperature sensors.
- * Only types which are _not_ listed in I2C_CLIENT_INSMOD_*() need to be
- * listed here.  We start at 9 since I2C_CLIENT_INSMOD_*() currently allow
- * definition of up to 8 chip types (plus zero).
  */
 
 enum lm75_type {		/* keep sorted in alphabetical order */
-	ds1775 = 9,
+	ds1775,
 	ds75,
-	/* lm75 -- in I2C_CLIENT_INSMOD_1() */
+	lm75,
 	lm75a,
 	max6625,
 	max6626,
@@ -58,9 +55,6 @@ enum lm75_type {		/* keep sorted in alphabetical order */
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c,
 					0x4d, 0x4e, 0x4f, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm75);
-
 
 /* The LM75 registers */
 #define LM75_REG_CONF		0x01
diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c
index b6105e570b0e..b28a297be50c 100644
--- a/drivers/hwmon/lm77.c
+++ b/drivers/hwmon/lm77.c
@@ -39,9 +39,6 @@
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 						I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm77);
-
 /* The LM77 registers */
 #define LM77_REG_TEMP		0x00
 #define LM77_REG_CONF		0x01
@@ -76,7 +73,7 @@ static struct lm77_data *lm77_update_device(struct device *dev);
 
 
 static const struct i2c_device_id lm77_id[] = {
-	{ "lm77", lm77 },
+	{ "lm77", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm77_id);
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index 1cf5ff5bfa43..18a0e6c5fe88 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -35,9 +35,6 @@
 static const unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d,
 						0x2e, 0x2f, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm80);
-
 /* Many LM80 constants specified below */
 
 /* The LM80 registers */
@@ -145,7 +142,7 @@ static int lm80_write_value(struct i2c_client *client, u8 reg, u8 value);
  */
 
 static const struct i2c_device_id lm80_id[] = {
-	{ "lm80", lm80 },
+	{ "lm80", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm80_id);
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index b582b3b7fdee..7c31e6205f85 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -54,9 +54,6 @@
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 						I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm92);
-
 /* The LM92 registers */
 #define LM92_REG_CONFIG			0x01 /* 8-bit, RW */
 #define LM92_REG_TEMP			0x00 /* 16-bit, RO */
@@ -401,7 +398,7 @@ static int lm92_remove(struct i2c_client *client)
  */
 
 static const struct i2c_device_id lm92_id[] = {
-	{ "lm92", lm92 },
+	{ "lm92", 0 },
 	/* max6635 could be added here */
 	{ }
 };
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index d160dfdb513f..6669255aadcf 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -145,7 +145,6 @@
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm93);
 
 static int disable_block;
 module_param(disable_block, bool, 0);
@@ -2602,7 +2601,7 @@ static int lm93_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id lm93_id[] = {
-	{ "lm93", lm93 },
+	{ "lm93", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm93_id);
diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c
index 55e3bfd49706..8fc8eb8cba47 100644
--- a/drivers/hwmon/lm95241.c
+++ b/drivers/hwmon/lm95241.c
@@ -39,9 +39,6 @@
 static const unsigned short normal_i2c[] = {
 	0x19, 0x2a, 0x2b, I2C_CLIENT_END};
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(lm95241);
-
 /* LM95241 registers */
 #define LM95241_REG_R_MAN_ID		0xFE
 #define LM95241_REG_R_CHIP_ID		0xFF
@@ -446,7 +443,7 @@ static struct lm95241_data *lm95241_update_device(struct device *dev)
 
 /* Driver data (common to all clients) */
 static const struct i2c_device_id lm95241_id[] = {
-	{ "lm95241", lm95241 },
+	{ "lm95241", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm95241_id);
diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c
index 94cea29f157b..022ded098100 100644
--- a/drivers/hwmon/max1619.c
+++ b/drivers/hwmon/max1619.c
@@ -40,12 +40,6 @@
 static const unsigned short normal_i2c[] = {
 	0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(max1619);
-
 /*
  * The MAX1619 registers
  */
@@ -99,7 +93,7 @@ static struct max1619_data *max1619_update_device(struct device *dev);
  */
 
 static const struct i2c_device_id max1619_id[] = {
-	{ "max1619", max1619 },
+	{ "max1619", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max1619_id);
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index c7c126cf22dd..a0160ee5caef 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -62,8 +62,6 @@ module_param(fan_voltage, int, S_IRUGO);
 module_param(prescaler, int, S_IRUGO);
 module_param(clock, int, S_IRUGO);
 
-I2C_CLIENT_INSMOD_1(max6650);
-
 /*
  * MAX 6650/6651 registers
  */
@@ -127,7 +125,7 @@ static struct max6650_data *max6650_update_device(struct device *dev);
  */
 
 static const struct i2c_device_id max6650_id[] = {
-	{ "max6650", max6650 },
+	{ "max6650", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max6650_id);
diff --git a/drivers/hwmon/pcf8591.c b/drivers/hwmon/pcf8591.c
index c19e61bd393c..d44787949851 100644
--- a/drivers/hwmon/pcf8591.c
+++ b/drivers/hwmon/pcf8591.c
@@ -29,7 +29,6 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c,
 					0x4d, 0x4e, 0x4f, I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(pcf8591);
 
 static int input_mode;
 module_param(input_mode, int, 0);
diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c
index 34df2e2ee28a..40b26673d87f 100644
--- a/drivers/hwmon/smsc47m192.c
+++ b/drivers/hwmon/smsc47m192.c
@@ -36,9 +36,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(smsc47m192);
-
 /* SMSC47M192 registers */
 #define SMSC47M192_REG_IN(nr)		((nr)<6 ? (0x20 + (nr)) : \
 					(0x50 + (nr) - 6))
@@ -121,7 +118,7 @@ static int smsc47m192_remove(struct i2c_client *client);
 static struct smsc47m192_data *smsc47m192_update_device(struct device *dev);
 
 static const struct i2c_device_id smsc47m192_id[] = {
-	{ "smsc47m192", smsc47m192 },
+	{ "smsc47m192", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, smsc47m192_id);
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index e059cf0471b0..400a88bde278 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -52,7 +52,6 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f,
 						I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(w83791d);
 
 static unsigned short force_subclients[4];
 module_param_array(force_subclients, short, NULL, 0);
@@ -341,7 +340,7 @@ static void w83791d_print_debug(struct w83791d_data *data, struct device *dev);
 static void w83791d_init_client(struct i2c_client *client);
 
 static const struct i2c_device_id w83791d_id[] = {
-	{ "w83791d", w83791d },
+	{ "w83791d", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, w83791d_id);
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index c6f198a3d924..679718e6b017 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -50,7 +50,6 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f,
 						I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(w83792d);
 
 static unsigned short force_subclients[4];
 module_param_array(force_subclients, short, NULL, 0);
@@ -314,7 +313,7 @@ static void w83792d_print_debug(struct w83792d_data *data, struct device *dev);
 static void w83792d_init_client(struct i2c_client *client);
 
 static const struct i2c_device_id w83792d_id[] = {
-	{ "w83792d", w83792d },
+	{ "w83792d", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, w83792d_id);
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index ed32b18fbc42..9a2022b67495 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -41,7 +41,6 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f,
 						I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(w83793);
 
 static unsigned short force_subclients[4];
 module_param_array(force_subclients, short, NULL, 0);
@@ -238,7 +237,7 @@ static void w83793_update_nonvolatile(struct device *dev);
 static struct w83793_data *w83793_update_device(struct device *dev);
 
 static const struct i2c_device_id w83793_id[] = {
-	{ "w83793", w83793 },
+	{ "w83793", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, w83793_id);
diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c
index 81c59937cf0d..20781def65ed 100644
--- a/drivers/hwmon/w83l785ts.c
+++ b/drivers/hwmon/w83l785ts.c
@@ -51,12 +51,6 @@
 
 static const unsigned short normal_i2c[] = { 0x2e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_1(w83l785ts);
-
 /*
  * The W83L785TS-S registers
  * Manufacturer ID is 0x5CA3 for Winbond.
@@ -94,7 +88,7 @@ static struct w83l785ts_data *w83l785ts_update_device(struct device *dev);
  */
  
 static const struct i2c_device_id w83l785ts_id[] = {
-	{ "w83l785ts", w83l785ts },
+	{ "w83l785ts", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, w83l785ts_id);
diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c
index a427347ae82b..0254e181893d 100644
--- a/drivers/hwmon/w83l786ng.c
+++ b/drivers/hwmon/w83l786ng.c
@@ -38,7 +38,6 @@
 static const unsigned short normal_i2c[] = { 0x2e, 0x2f, I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_1(w83l786ng);
 
 static int reset;
 module_param(reset, bool, 0);
@@ -154,7 +153,7 @@ static void w83l786ng_init_client(struct i2c_client *client);
 static struct w83l786ng_data *w83l786ng_update_device(struct device *dev);
 
 static const struct i2c_device_id w83l786ng_id[] = {
-	{ "w83l786ng", w83l786ng },
+	{ "w83l786ng", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, w83l786ng_id);
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 3dc5e3db2c12..f939ebc2507c 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -32,9 +32,6 @@
 static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54,
 					0x55, 0x56, 0x57, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(eeprom);
-
 
 /* Size of EEPROM in bytes */
 #define EEPROM_SIZE		256
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index d8a84718d687..395a4ea64e9c 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -30,9 +30,6 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(ics932s401);
-
 /* ICS932S401 registers */
 #define ICS932S401_REG_CFG2			0x01
 #define 	ICS932S401_CFG1_SPREAD		0x01
@@ -111,7 +108,7 @@ static int ics932s401_detect(struct i2c_client *client,
 static int ics932s401_remove(struct i2c_client *client);
 
 static const struct i2c_device_id ics932s401_id[] = {
-	{ "ics932s401", ics932s401 },
+	{ "ics932s401", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ics932s401_id);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index fb9df1416ad5..7178b27146ed 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -604,11 +604,6 @@ union i2c_smbus_data {
 
 /* These are the ones you want to use in your own drivers. Pick the one
    which matches the number of devices the driver differenciates between. */
-#define I2C_CLIENT_INSMOD
-
-#define I2C_CLIENT_INSMOD_1(chip1)					\
-enum chips { any_chip, chip1 }
-
 #define I2C_CLIENT_INSMOD_2(chip1, chip2)				\
 enum chips { any_chip, chip1, chip2 }
 
-- 
cgit v1.2.3


From e5e9f44c246fbafe723e579e9fe887677beb40e4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 14 Dec 2009 21:17:27 +0100
Subject: i2c: Drop I2C_CLIENT_INSMOD_2 to 8

These macros simply declare an enum, so drivers might as well declare
it themselves. This puts an end to the arbitrary limit of 8 chip types
per i2c driver.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/hwmon/adm1021.c |  5 ++---
 drivers/hwmon/adm1025.c |  6 +-----
 drivers/hwmon/adm1031.c |  3 +--
 drivers/hwmon/adm9240.c |  3 +--
 drivers/hwmon/adt7475.c |  2 +-
 drivers/hwmon/dme1737.c |  6 +-----
 drivers/hwmon/f75375s.c |  3 +--
 drivers/hwmon/fschmd.c  |  3 ++-
 drivers/hwmon/gl518sm.c |  3 +--
 drivers/hwmon/lm78.c    |  3 +--
 drivers/hwmon/lm83.c    |  6 +-----
 drivers/hwmon/lm85.c    |  8 +++++---
 drivers/hwmon/lm87.c    |  6 +-----
 drivers/hwmon/lm90.c    |  7 +------
 drivers/hwmon/thmc50.c  |  2 +-
 drivers/hwmon/tmp401.c  |  3 +--
 drivers/hwmon/tmp421.c  |  3 +--
 drivers/hwmon/w83781d.c |  5 +++--
 include/linux/i2c.h     | 24 ------------------------
 19 files changed, 26 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index d7af039a4d43..1ad0a885c5a5 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -34,9 +34,8 @@
 static const unsigned short normal_i2c[] = {
 	0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_8(adm1021, adm1023, max1617, max1617a, thmc10, lm84, gl523sm,
-			mc1066);
+enum chips {
+	adm1021, adm1023, max1617, max1617a, thmc10, lm84, gl523sm, mc1066 };
 
 /* adm1021 constants specified below */
 
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index e17651083b09..251b63165e2a 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -64,11 +64,7 @@
 
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_2(adm1025, ne1619);
+enum chips { adm1025, ne1619 };
 
 /*
  * The ADM1025 registers
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c
index 1e02799b870e..1644b92e7cc4 100644
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -64,8 +64,7 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(adm1030, adm1031);
+enum chips { adm1030, adm1031 };
 
 typedef u8 auto_chan_table_t[8][2];
 
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index d9942e74ed4a..0727ad250793 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -55,8 +55,7 @@
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f,
 					I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_3(adm9240, ds1780, lm81);
+enum chips { adm9240, ds1780, lm81 };
 
 /* ADM9240 registers */
 #define ADM9240_REG_MAN_ID		0x3e
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 1fb8940428c6..a0c385145686 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -148,7 +148,7 @@
 
 static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-I2C_CLIENT_INSMOD_4(adt7473, adt7475, adt7476, adt7490);
+enum chips { adt7473, adt7475, adt7476, adt7490 };
 
 static const struct i2c_device_id adt7475_id[] = {
 	{ "adt7473", adt7473 },
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index a3af09f9dbad..823dd28a902c 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -57,11 +57,7 @@ MODULE_PARM_DESC(probe_all_addr, "Include probing of non-standard LPC "
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = {0x2c, 0x2d, 0x2e, I2C_CLIENT_END};
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(dme1737, sch5027);
-
-/* ISA chip types */
-enum isa_chips { sch311x = sch5027 + 1 };
+enum chips { dme1737, sch5027, sch311x };
 
 /* ---------------------------------------------------------------------
  * Registers
diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
index f8992c935666..277398f9c938 100644
--- a/drivers/hwmon/f75375s.c
+++ b/drivers/hwmon/f75375s.c
@@ -39,8 +39,7 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2d, 0x2e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(f75373, f75375);
+enum chips { f75373, f75375 };
 
 /* Fintek F75375 registers  */
 #define F75375_REG_CONFIG0		0x0
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 4eebbbeba2d1..bd0fc67e804b 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -56,7 +56,8 @@ static int nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, int, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-I2C_CLIENT_INSMOD_7(fscpos, fscher, fscscy, fschrc, fschmd, fschds, fscsyl);
+
+enum chips { fscpos, fscher, fscscy, fschrc, fschmd, fschds, fscsyl };
 
 /*
  * The FSCHMD registers and other defines
diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c
index e9407acd72cb..e7ae5743e181 100644
--- a/drivers/hwmon/gl518sm.c
+++ b/drivers/hwmon/gl518sm.c
@@ -46,8 +46,7 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(gl518sm_r00, gl518sm_r80);
+enum chips { gl518sm_r00, gl518sm_r80 };
 
 /* Many GL518 constants specified below */
 
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index cd6a9ea921f6..cadcbd90ff3b 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -41,8 +41,7 @@ static const unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d,
 						0x2e, 0x2f, I2C_CLIENT_END };
 static unsigned short isa_address = 0x290;
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(lm78, lm79);
+enum chips { lm78, lm79 };
 
 /* Many LM78 constants specified below */
 
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index b80ae182f851..8290476aee4a 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -51,11 +51,7 @@
 static const unsigned short normal_i2c[] = {
 	0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_2(lm83, lm82);
+enum chips { lm83, lm82 };
 
 /*
  * The LM83 registers
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index d29bd34a265e..b3841a615595 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -38,9 +38,11 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_7(lm85b, lm85c, adm1027, adt7463, adt7468, emc6d100,
-		    emc6d102);
+enum chips {
+	any_chip, lm85b, lm85c,
+	adm1027, adt7463, adt7468,
+	emc6d100, emc6d102
+};
 
 /* The LM85 registers */
 
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index 60d34bc578cb..f1e6e7512ffa 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -74,11 +74,7 @@
 
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_2(lm87, adm1024);
+enum chips { lm87, adm1024 };
 
 /*
  * The LM87 registers
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 3e916ac97ea8..7c9bdc167426 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -93,12 +93,7 @@
 static const unsigned short normal_i2c[] = {
 	0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END };
 
-/*
- * Insmod parameters
- */
-
-I2C_CLIENT_INSMOD_8(lm90, adm1032, lm99, lm86, max6657, adt7461, max6680,
-		    max6646);
+enum chips { lm90, adm1032, lm99, lm86, max6657, adt7461, max6680, max6646 };
 
 /*
  * The LM90 registers
diff --git a/drivers/hwmon/thmc50.c b/drivers/hwmon/thmc50.c
index 866d66507596..7dfb4dec4c5f 100644
--- a/drivers/hwmon/thmc50.c
+++ b/drivers/hwmon/thmc50.c
@@ -35,7 +35,7 @@ MODULE_LICENSE("GPL");
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 /* Insmod parameters */
-I2C_CLIENT_INSMOD_2(thmc50, adm1022);
+enum chips { thmc50, adm1022 };
 
 static unsigned short adm1022_temp3[16];
 static unsigned int adm1022_temp3_num;
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index ed086491cc9a..a13b30e8d8d8 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -42,8 +42,7 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x4c, I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(tmp401, tmp411);
+enum chips { tmp401, tmp411 };
 
 /*
  * The TMP401 registers, note some registers have different addresses for
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index 018ad028c179..4f7c051e2d7b 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -39,8 +39,7 @@
 static unsigned short normal_i2c[] = { 0x2a, 0x4c, 0x4d, 0x4e, 0x4f,
 				       I2C_CLIENT_END };
 
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_3(tmp421, tmp422, tmp423);
+enum chips { tmp421, tmp422, tmp423 };
 
 /* The TMP421 registers */
 #define TMP421_CONFIG_REG_1			0x09
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index bfaa888f6e47..05f9225b6f94 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -56,9 +56,10 @@
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d,
 						0x2e, 0x2f, I2C_CLIENT_END };
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_4(w83781d, w83782d, w83783s, as99127f);
 
+enum chips { w83781d, w83782d, w83783s, as99127f };
+
+/* Insmod parameters */
 static unsigned short force_subclients[4];
 module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7178b27146ed..cc6e1508ae90 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -602,29 +602,5 @@ union i2c_smbus_data {
   module_param_array(var, short, &var##_num, 0); \
   MODULE_PARM_DESC(var, desc)
 
-/* These are the ones you want to use in your own drivers. Pick the one
-   which matches the number of devices the driver differenciates between. */
-#define I2C_CLIENT_INSMOD_2(chip1, chip2)				\
-enum chips { any_chip, chip1, chip2 }
-
-#define I2C_CLIENT_INSMOD_3(chip1, chip2, chip3)			\
-enum chips { any_chip, chip1, chip2, chip3 }
-
-#define I2C_CLIENT_INSMOD_4(chip1, chip2, chip3, chip4)			\
-enum chips { any_chip, chip1, chip2, chip3, chip4 }
-
-#define I2C_CLIENT_INSMOD_5(chip1, chip2, chip3, chip4, chip5)		\
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 }
-
-#define I2C_CLIENT_INSMOD_6(chip1, chip2, chip3, chip4, chip5, chip6)	\
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 }
-
-#define I2C_CLIENT_INSMOD_7(chip1, chip2, chip3, chip4, chip5, chip6, chip7) \
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
-	     chip7 }
-
-#define I2C_CLIENT_INSMOD_8(chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8) \
-enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6,	\
-	     chip7, chip8 }
 #endif /* __KERNEL__ */
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 7f508118b1c1f9856a1c899a2bd4867a962b0225 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 14 Dec 2009 21:17:29 +0100
Subject: i2c: Get rid of I2C_CLIENT_MODULE_PARM

There is no user left of I2C_CLIENT_MODULE_PARM, so we can finally
get rid of this ugly macro.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c.h | 35 -----------------------------------
 1 file changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cc6e1508ae90..02fc617782ef 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -568,39 +568,4 @@ union i2c_smbus_data {
 #define I2C_SMBUS_BLOCK_PROC_CALL   7		/* SMBus 2.0 */
 #define I2C_SMBUS_I2C_BLOCK_DATA    8
 
-
-#ifdef __KERNEL__
-
-/* These defines are used for probing i2c client addresses */
-/* The length of the option lists */
-#define I2C_CLIENT_MAX_OPTS 48
-
-/* Default fill of many variables */
-#define I2C_CLIENT_DEFAULTS {I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END, \
-			     I2C_CLIENT_END, I2C_CLIENT_END, I2C_CLIENT_END}
-
-/* I2C_CLIENT_MODULE_PARM creates a module parameter, and puts it in the
-   module header */
-
-#define I2C_CLIENT_MODULE_PARM(var,desc) \
-  static unsigned short var[I2C_CLIENT_MAX_OPTS] = I2C_CLIENT_DEFAULTS; \
-  static unsigned int var##_num; \
-  module_param_array(var, short, &var##_num, 0); \
-  MODULE_PARM_DESC(var, desc)
-
-#endif /* __KERNEL__ */
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From ef12f10994281e2e44526fa0abf23fdd7d5bd87f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 7 Nov 2009 23:04:15 +0100
Subject: locking: Split rwlock from spinlock headers

Move the rwlock defines and inlines into separate header files. This
makes the selection for -rt easier.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rwlock.h         | 125 +++++++++++++++++++++++++++++++++++++++++
 include/linux/rwlock_types.h   |  56 ++++++++++++++++++
 include/linux/spinlock.h       | 100 +++------------------------------
 include/linux/spinlock_types.h |  43 ++------------
 4 files changed, 195 insertions(+), 129 deletions(-)
 create mode 100644 include/linux/rwlock.h
 create mode 100644 include/linux/rwlock_types.h

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
new file mode 100644
index 000000000000..73785b0bd6b9
--- /dev/null
+++ b/include/linux/rwlock.h
@@ -0,0 +1,125 @@
+#ifndef __LINUX_RWLOCK_H
+#define __LINUX_RWLOCK_H
+
+#ifndef __LINUX_SPINLOCK_H
+# error "please don't include this file directly"
+#endif
+
+/*
+ * rwlock related methods
+ *
+ * split out from spinlock.h
+ *
+ * portions Copyright 2005, Red Hat, Inc., Ingo Molnar
+ * Released under the General Public License (GPL).
+ */
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+  extern void __rwlock_init(rwlock_t *lock, const char *name,
+			    struct lock_class_key *key);
+# define rwlock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__rwlock_init((lock), #lock, &__key);			\
+} while (0)
+#else
+# define rwlock_init(lock)					\
+	do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+ extern void _raw_read_lock(rwlock_t *lock);
+#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
+ extern int _raw_read_trylock(rwlock_t *lock);
+ extern void _raw_read_unlock(rwlock_t *lock);
+ extern void _raw_write_lock(rwlock_t *lock);
+#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
+ extern int _raw_write_trylock(rwlock_t *lock);
+ extern void _raw_write_unlock(rwlock_t *lock);
+#else
+# define _raw_read_lock(rwlock)		__raw_read_lock(&(rwlock)->raw_lock)
+# define _raw_read_lock_flags(lock, flags) \
+		__raw_read_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_read_trylock(rwlock)	__raw_read_trylock(&(rwlock)->raw_lock)
+# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
+# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
+# define _raw_write_lock_flags(lock, flags) \
+		__raw_write_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_write_trylock(rwlock)	__raw_write_trylock(&(rwlock)->raw_lock)
+# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
+#endif
+
+#define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
+#define write_can_lock(rwlock)		__raw_write_can_lock(&(rwlock)->raw_lock)
+
+/*
+ * Define the various rw_lock methods.  Note we define these
+ * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
+ * methods are defined as nops in the case they are not required.
+ */
+#define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
+#define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
+
+#define write_lock(lock)		_write_lock(lock)
+#define read_lock(lock)			_read_lock(lock)
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _read_lock_irqsave(lock);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _write_lock_irqsave(lock);	\
+	} while (0)
+
+#else
+
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_lock_irqsave(lock, flags);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_lock_irqsave(lock, flags);	\
+	} while (0)
+
+#endif
+
+#define read_lock_irq(lock)		_read_lock_irq(lock)
+#define read_lock_bh(lock)		_read_lock_bh(lock)
+#define write_lock_irq(lock)		_write_lock_irq(lock)
+#define write_lock_bh(lock)		_write_lock_bh(lock)
+#define read_unlock(lock)		_read_unlock(lock)
+#define write_unlock(lock)		_write_unlock(lock)
+#define read_unlock_irq(lock)		_read_unlock_irq(lock)
+#define write_unlock_irq(lock)		_write_unlock_irq(lock)
+
+#define read_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_unlock_irqrestore(lock, flags);	\
+	} while (0)
+#define read_unlock_bh(lock)		_read_unlock_bh(lock)
+
+#define write_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_unlock_irqrestore(lock, flags);	\
+	} while (0)
+#define write_unlock_bh(lock)		_write_unlock_bh(lock)
+
+#define write_trylock_irqsave(lock, flags) \
+({ \
+	local_irq_save(flags); \
+	write_trylock(lock) ? \
+	1 : ({ local_irq_restore(flags); 0; }); \
+})
+
+#endif /* __LINUX_RWLOCK_H */
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
new file mode 100644
index 000000000000..f8c935206a41
--- /dev/null
+++ b/include/linux/rwlock_types.h
@@ -0,0 +1,56 @@
+#ifndef __LINUX_RWLOCK_TYPES_H
+#define __LINUX_RWLOCK_TYPES_H
+
+/*
+ * include/linux/rwlock_types.h - generic rwlock type definitions
+ *				  and initializers
+ *
+ * portions Copyright 2005, Red Hat, Inc., Ingo Molnar
+ * Released under the General Public License (GPL).
+ */
+typedef struct {
+	raw_rwlock_t raw_lock;
+#ifdef CONFIG_GENERIC_LOCKBREAK
+	unsigned int break_lock;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+	unsigned int magic, owner_cpu;
+	void *owner;
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC		0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define __RW_LOCK_UNLOCKED(lockname)					\
+	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+				.magic = RWLOCK_MAGIC,			\
+				.owner = SPINLOCK_OWNER_INIT,		\
+				.owner_cpu = -1,			\
+				RW_DEP_MAP_INIT(lockname) }
+#else
+#define __RW_LOCK_UNLOCKED(lockname) \
+	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+				RW_DEP_MAP_INIT(lockname) }
+#endif
+
+/*
+ * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
+ * deprecated.
+ *
+ * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
+ */
+#define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
+
+#define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
+
+#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 71dccfeb0d88..a9aaa709fb93 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -103,20 +103,6 @@ do {								\
 	do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
-#ifdef CONFIG_DEBUG_SPINLOCK
-  extern void __rwlock_init(rwlock_t *lock, const char *name,
-			    struct lock_class_key *key);
-# define rwlock_init(lock)					\
-do {								\
-	static struct lock_class_key __key;			\
-								\
-	__rwlock_init((lock), #lock, &__key);			\
-} while (0)
-#else
-# define rwlock_init(lock)					\
-	do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0)
-#endif
-
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
 
 #ifdef CONFIG_GENERIC_LOCKBREAK
@@ -146,43 +132,21 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
  extern int _raw_spin_trylock(spinlock_t *lock);
  extern void _raw_spin_unlock(spinlock_t *lock);
- extern void _raw_read_lock(rwlock_t *lock);
-#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
- extern int _raw_read_trylock(rwlock_t *lock);
- extern void _raw_read_unlock(rwlock_t *lock);
- extern void _raw_write_lock(rwlock_t *lock);
-#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
- extern int _raw_write_trylock(rwlock_t *lock);
- extern void _raw_write_unlock(rwlock_t *lock);
 #else
 # define _raw_spin_lock(lock)		__raw_spin_lock(&(lock)->raw_lock)
 # define _raw_spin_lock_flags(lock, flags) \
 		__raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
 # define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
 # define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
-# define _raw_read_lock(rwlock)		__raw_read_lock(&(rwlock)->raw_lock)
-# define _raw_read_lock_flags(lock, flags) \
-		__raw_read_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_read_trylock(rwlock)	__raw_read_trylock(&(rwlock)->raw_lock)
-# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
-# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
-# define _raw_write_lock_flags(lock, flags) \
-		__raw_write_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_write_trylock(rwlock)	__raw_write_trylock(&(rwlock)->raw_lock)
-# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
 #endif
 
-#define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
-#define write_can_lock(rwlock)		__raw_write_can_lock(&(rwlock)->raw_lock)
-
 /*
- * Define the various spin_lock and rw_lock methods.  Note we define these
- * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
- * methods are defined as nops in the case they are not required.
+ * Define the various spin_lock methods.  Note we define these
+ * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The
+ * various methods are defined as nops in the case they are not
+ * required.
  */
 #define spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
-#define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
-#define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
 
 #define spin_lock(lock)			_spin_lock(lock)
 
@@ -198,9 +162,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 # define spin_lock_nest_lock(lock, nest_lock) _spin_lock(lock)
 #endif
 
-#define write_lock(lock)		_write_lock(lock)
-#define read_lock(lock)			_read_lock(lock)
-
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
 #define spin_lock_irqsave(lock, flags)			\
@@ -208,16 +169,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 		typecheck(unsigned long, flags);	\
 		flags = _spin_lock_irqsave(lock);	\
 	} while (0)
-#define read_lock_irqsave(lock, flags)			\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		flags = _read_lock_irqsave(lock);	\
-	} while (0)
-#define write_lock_irqsave(lock, flags)			\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		flags = _write_lock_irqsave(lock);	\
-	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define spin_lock_irqsave_nested(lock, flags, subclass)			\
@@ -240,16 +191,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 		typecheck(unsigned long, flags);	\
 		_spin_lock_irqsave(lock, flags);	\
 	} while (0)
-#define read_lock_irqsave(lock, flags)			\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		_read_lock_irqsave(lock, flags);	\
-	} while (0)
-#define write_lock_irqsave(lock, flags)			\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		_write_lock_irqsave(lock, flags);	\
-	} while (0)
+
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
 	spin_lock_irqsave(lock, flags)
 
@@ -257,16 +199,8 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 
 #define spin_lock_irq(lock)		_spin_lock_irq(lock)
 #define spin_lock_bh(lock)		_spin_lock_bh(lock)
-#define read_lock_irq(lock)		_read_lock_irq(lock)
-#define read_lock_bh(lock)		_read_lock_bh(lock)
-#define write_lock_irq(lock)		_write_lock_irq(lock)
-#define write_lock_bh(lock)		_write_lock_bh(lock)
 #define spin_unlock(lock)		_spin_unlock(lock)
-#define read_unlock(lock)		_read_unlock(lock)
-#define write_unlock(lock)		_write_unlock(lock)
 #define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
-#define read_unlock_irq(lock)		_read_unlock_irq(lock)
-#define write_unlock_irq(lock)		_write_unlock_irq(lock)
 
 #define spin_unlock_irqrestore(lock, flags)		\
 	do {						\
@@ -275,20 +209,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 	} while (0)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
-#define read_unlock_irqrestore(lock, flags)		\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		_read_unlock_irqrestore(lock, flags);	\
-	} while (0)
-#define read_unlock_bh(lock)		_read_unlock_bh(lock)
-
-#define write_unlock_irqrestore(lock, flags)		\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		_write_unlock_irqrestore(lock, flags);	\
-	} while (0)
-#define write_unlock_bh(lock)		_write_unlock_bh(lock)
-
 #define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
 
 #define spin_trylock_irq(lock) \
@@ -305,13 +225,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
 
-#define write_trylock_irqsave(lock, flags) \
-({ \
-	local_irq_save(flags); \
-	write_trylock(lock) ? \
-	1 : ({ local_irq_restore(flags); 0; }); \
-})
-
 /*
  * Pull the atomic_t declaration:
  * (asm-mips/atomic.h needs above definitions)
@@ -335,6 +248,9 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
  */
 #define spin_can_lock(lock)	(!spin_is_locked(lock))
 
+/* Include rwlock functions */
+#include <linux/rwlock.h>
+
 /*
  * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  */
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 68d88f71f1a2..f979d5d8a160 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -33,22 +33,6 @@ typedef struct {
 
 #define SPINLOCK_MAGIC		0xdead4ead
 
-typedef struct {
-	raw_rwlock_t raw_lock;
-#ifdef CONFIG_GENERIC_LOCKBREAK
-	unsigned int break_lock;
-#endif
-#ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned int magic, owner_cpu;
-	void *owner;
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC		0xdeaf1eed
-
 #define SPINLOCK_OWNER_INIT	((void *)-1L)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -57,12 +41,6 @@ typedef struct {
 # define SPIN_DEP_MAP_INIT(lockname)
 #endif
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
-#else
-# define RW_DEP_MAP_INIT(lockname)
-#endif
-
 #ifdef CONFIG_DEBUG_SPINLOCK
 # define __SPIN_LOCK_UNLOCKED(lockname)					\
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
@@ -70,31 +48,22 @@ typedef struct {
 				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1,			\
 				SPIN_DEP_MAP_INIT(lockname) }
-#define __RW_LOCK_UNLOCKED(lockname)					\
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
-				.magic = RWLOCK_MAGIC,			\
-				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1,			\
-				RW_DEP_MAP_INIT(lockname) }
 #else
 # define __SPIN_LOCK_UNLOCKED(lockname) \
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
 				SPIN_DEP_MAP_INIT(lockname) }
-#define __RW_LOCK_UNLOCKED(lockname) \
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
-				RW_DEP_MAP_INIT(lockname) }
 #endif
 
 /*
- * SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED defeat lockdep state tracking and
- * are hence deprecated.
- * Please use DEFINE_SPINLOCK()/DEFINE_RWLOCK() or
- * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate.
+ * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
+ * deprecated.
+ * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
+ * appropriate.
  */
 #define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
-#define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
 
 #define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
-#define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
+
+#include <linux/rwlock_types.h>
 
 #endif /* __LINUX_SPINLOCK_TYPES_H */
-- 
cgit v1.2.3


From 6b6b4792f89346e47437682c7ba3438e6681c0f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 18:48:37 +0100
Subject: locking: Separate rwlock api from spinlock api

Move the rwlock smp api defines and functions into a separate header
file. Makes the -rt selection simpler and less intrusive.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rwlock_api_smp.h   | 277 +++++++++++++++++++++++++++++++++++++++
 include/linux/spinlock_api_smp.h | 252 +----------------------------------
 2 files changed, 280 insertions(+), 249 deletions(-)
 create mode 100644 include/linux/rwlock_api_smp.h

(limited to 'include/linux')

diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
new file mode 100644
index 000000000000..090f876f828d
--- /dev/null
+++ b/include/linux/rwlock_api_smp.h
@@ -0,0 +1,277 @@
+#ifndef __LINUX_RWLOCK_API_SMP_H
+#define __LINUX_RWLOCK_API_SMP_H
+
+#ifndef __LINUX_SPINLOCK_API_SMP_H
+# error "please don't include this file directly"
+#endif
+
+/*
+ * include/linux/rwlock_api_smp.h
+ *
+ * spinlock API declarations on SMP (and debug)
+ * (implemented in kernel/spinlock.c)
+ *
+ * portions Copyright 2005, Red Hat, Inc., Ingo Molnar
+ * Released under the General Public License (GPL).
+ */
+
+void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
+unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+							__acquires(lock);
+unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+							__acquires(lock);
+int __lockfunc _read_trylock(rwlock_t *lock);
+int __lockfunc _write_trylock(rwlock_t *lock);
+void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
+void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
+void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
+void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
+void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
+void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
+void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+							__releases(lock);
+void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+							__releases(lock);
+
+#ifdef CONFIG_INLINE_READ_LOCK
+#define _read_lock(lock) __read_lock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_LOCK
+#define _write_lock(lock) __write_lock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_LOCK_BH
+#define _read_lock_bh(lock) __read_lock_bh(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_LOCK_BH
+#define _write_lock_bh(lock) __write_lock_bh(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_LOCK_IRQ
+#define _read_lock_irq(lock) __read_lock_irq(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ
+#define _write_lock_irq(lock) __write_lock_irq(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE
+#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
+#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_TRYLOCK
+#define _read_trylock(lock) __read_trylock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_TRYLOCK
+#define _write_trylock(lock) __write_trylock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_UNLOCK
+#define _read_unlock(lock) __read_unlock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_UNLOCK
+#define _write_unlock(lock) __write_unlock(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_UNLOCK_BH
+#define _read_unlock_bh(lock) __read_unlock_bh(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH
+#define _write_unlock_bh(lock) __write_unlock_bh(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ
+#define _read_unlock_irq(lock) __read_unlock_irq(lock)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ
+#define _write_unlock_irq(lock) __write_unlock_irq(lock)
+#endif
+
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
+#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
+#endif
+
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
+#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+#endif
+
+static inline int __read_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_read_trylock(lock)) {
+		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static inline int __write_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_write_trylock(lock)) {
+		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+
+static inline void __read_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
+			     _raw_read_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __read_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline void __read_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
+			     _raw_write_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __write_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __write_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __write_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+#endif /* CONFIG_PREEMPT */
+
+static inline void __write_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __read_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __read_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __read_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline void __write_unlock_irqrestore(rwlock_t *lock,
+					     unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __write_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __write_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+#endif /* __LINUX_RWLOCK_API_SMP_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 8264a7f459bc..a2b2c9df91de 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -24,102 +24,41 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
 							__acquires(lock);
 void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map)
 							__acquires(lock);
-void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
 void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(lock);
-void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
 void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(lock);
-void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
+
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 							__acquires(lock);
 unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
 							__acquires(lock);
-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
-							__acquires(lock);
-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
-							__acquires(lock);
 int __lockfunc _spin_trylock(spinlock_t *lock);
-int __lockfunc _read_trylock(rwlock_t *lock);
-int __lockfunc _write_trylock(rwlock_t *lock);
 int __lockfunc _spin_trylock_bh(spinlock_t *lock);
 void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(lock);
-void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
-void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(lock);
-void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
-void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(lock);
-void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
-void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 							__releases(lock);
-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-							__releases(lock);
-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-							__releases(lock);
 
 #ifdef CONFIG_INLINE_SPIN_LOCK
 #define _spin_lock(lock) __spin_lock(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_LOCK
-#define _read_lock(lock) __read_lock(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_LOCK
-#define _write_lock(lock) __write_lock(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_LOCK_BH
 #define _spin_lock_bh(lock) __spin_lock_bh(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_LOCK_BH
-#define _read_lock_bh(lock) __read_lock_bh(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_LOCK_BH
-#define _write_lock_bh(lock) __write_lock_bh(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_LOCK_IRQ
 #define _spin_lock_irq(lock) __spin_lock_irq(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_LOCK_IRQ
-#define _read_lock_irq(lock) __read_lock_irq(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ
-#define _write_lock_irq(lock) __write_lock_irq(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
 #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE
-#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_TRYLOCK
 #define _spin_trylock(lock) __spin_trylock(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_TRYLOCK
-#define _read_trylock(lock) __read_trylock(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_TRYLOCK
-#define _write_trylock(lock) __write_trylock(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH
 #define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
 #endif
@@ -128,50 +67,18 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 #define _spin_unlock(lock) __spin_unlock(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_UNLOCK
-#define _read_unlock(lock) __read_unlock(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_UNLOCK
-#define _write_unlock(lock) __write_unlock(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_BH
 #define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_UNLOCK_BH
-#define _read_unlock_bh(lock) __read_unlock_bh(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH
-#define _write_unlock_bh(lock) __write_unlock_bh(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
 #define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
 #endif
 
-#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ
-#define _read_unlock_irq(lock) __read_unlock_irq(lock)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-#define _write_unlock_irq(lock) __write_unlock_irq(lock)
-#endif
-
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
 #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
 #endif
 
-#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
-#endif
-
-#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
-#endif
-
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
@@ -183,28 +90,6 @@ static inline int __spin_trylock(spinlock_t *lock)
 	return 0;
 }
 
-static inline int __read_trylock(rwlock_t *lock)
-{
-	preempt_disable();
-	if (_raw_read_trylock(lock)) {
-		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-	preempt_enable();
-	return 0;
-}
-
-static inline int __write_trylock(rwlock_t *lock)
-{
-	preempt_disable();
-	if (_raw_write_trylock(lock)) {
-		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-	preempt_enable();
-	return 0;
-}
-
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
@@ -212,13 +97,6 @@ static inline int __write_trylock(rwlock_t *lock)
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-static inline void __read_lock(rwlock_t *lock)
-{
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
-}
-
 static inline unsigned long __spin_lock_irqsave(spinlock_t *lock)
 {
 	unsigned long flags;
@@ -255,62 +133,6 @@ static inline void __spin_lock_bh(spinlock_t *lock)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
 
-static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
-			     _raw_read_lock_flags, &flags);
-	return flags;
-}
-
-static inline void __read_lock_irq(rwlock_t *lock)
-{
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
-}
-
-static inline void __read_lock_bh(rwlock_t *lock)
-{
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
-}
-
-static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
-			     _raw_write_lock_flags, &flags);
-	return flags;
-}
-
-static inline void __write_lock_irq(rwlock_t *lock)
-{
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
-}
-
-static inline void __write_lock_bh(rwlock_t *lock)
-{
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
-}
-
 static inline void __spin_lock(spinlock_t *lock)
 {
 	preempt_disable();
@@ -318,13 +140,6 @@ static inline void __spin_lock(spinlock_t *lock)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
 
-static inline void __write_lock(rwlock_t *lock)
-{
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
-}
-
 #endif /* CONFIG_PREEMPT */
 
 static inline void __spin_unlock(spinlock_t *lock)
@@ -334,20 +149,6 @@ static inline void __spin_unlock(spinlock_t *lock)
 	preempt_enable();
 }
 
-static inline void __write_unlock(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable();
-}
-
-static inline void __read_unlock(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable();
-}
-
 static inline void __spin_unlock_irqrestore(spinlock_t *lock,
 					    unsigned long flags)
 {
@@ -373,55 +174,6 @@ static inline void __spin_unlock_bh(spinlock_t *lock)
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 
-static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
-}
-
-static inline void __read_unlock_irq(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
-}
-
-static inline void __read_unlock_bh(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
-}
-
-static inline void __write_unlock_irqrestore(rwlock_t *lock,
-					     unsigned long flags)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
-}
-
-static inline void __write_unlock_irq(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
-}
-
-static inline void __write_unlock_bh(rwlock_t *lock)
-{
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
-}
-
 static inline int __spin_trylock_bh(spinlock_t *lock)
 {
 	local_bh_disable();
@@ -435,4 +187,6 @@ static inline int __spin_trylock_bh(spinlock_t *lock)
 	return 0;
 }
 
+#include <linux/rwlock_api_smp.h>
+
 #endif /* __LINUX_SPINLOCK_API_SMP_H */
-- 
cgit v1.2.3


From 445c89514be242b1b0080056d50bdc1b72adeb5c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 2 Dec 2009 19:49:50 +0100
Subject: locking: Convert raw_spinlock to arch_spinlock

The raw_spin* namespace was taken by lockdep for the architecture
specific implementations. raw_spin_* would be the ideal name space for
the spinlocks which are not converted to sleeping locks in preempt-rt.

Linus suggested to convert the raw_ to arch_ locks and cleanup the
name space instead of using an artifical name like core_spin,
atomic_spin or whatever

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/spinlock.h          |  6 +++---
 arch/alpha/include/asm/spinlock_types.h    |  2 +-
 arch/arm/include/asm/spinlock.h            |  6 +++---
 arch/arm/include/asm/spinlock_types.h      |  2 +-
 arch/blackfin/include/asm/spinlock.h       | 10 +++++-----
 arch/blackfin/include/asm/spinlock_types.h |  2 +-
 arch/cris/include/arch-v32/arch/spinlock.h | 12 ++++++------
 arch/ia64/include/asm/spinlock.h           | 26 +++++++++++++-------------
 arch/ia64/include/asm/spinlock_types.h     |  2 +-
 arch/m32r/include/asm/spinlock.h           |  6 +++---
 arch/m32r/include/asm/spinlock_types.h     |  2 +-
 arch/mips/include/asm/spinlock.h           | 10 +++++-----
 arch/mips/include/asm/spinlock_types.h     |  2 +-
 arch/parisc/include/asm/atomic.h           |  6 +++---
 arch/parisc/include/asm/spinlock.h         |  8 ++++----
 arch/parisc/include/asm/spinlock_types.h   |  4 ++--
 arch/parisc/lib/bitops.c                   |  2 +-
 arch/powerpc/include/asm/rtas.h            |  2 +-
 arch/powerpc/include/asm/spinlock.h        | 14 +++++++-------
 arch/powerpc/include/asm/spinlock_types.h  |  2 +-
 arch/powerpc/kernel/rtas.c                 |  2 +-
 arch/powerpc/lib/locks.c                   |  4 ++--
 arch/powerpc/platforms/pasemi/setup.c      |  2 +-
 arch/s390/include/asm/spinlock.h           | 16 ++++++++--------
 arch/s390/include/asm/spinlock_types.h     |  2 +-
 arch/s390/lib/spinlock.c                   |  8 ++++----
 arch/sh/include/asm/spinlock.h             |  6 +++---
 arch/sh/include/asm/spinlock_types.h       |  2 +-
 arch/sparc/include/asm/spinlock_32.h       |  6 +++---
 arch/sparc/include/asm/spinlock_64.h       |  8 ++++----
 arch/sparc/include/asm/spinlock_types.h    |  2 +-
 arch/x86/include/asm/paravirt.h            | 12 ++++++------
 arch/x86/include/asm/paravirt_types.h      | 14 +++++++-------
 arch/x86/include/asm/spinlock.h            | 30 +++++++++++++++---------------
 arch/x86/include/asm/spinlock_types.h      |  4 ++--
 arch/x86/kernel/dumpstack.c                |  2 +-
 arch/x86/kernel/paravirt-spinlocks.c       |  2 +-
 arch/x86/kernel/tsc_sync.c                 |  2 +-
 arch/x86/xen/spinlock.c                    | 16 ++++++++--------
 include/asm-generic/bitops/atomic.h        |  6 +++---
 include/linux/spinlock.h                   |  4 ++--
 include/linux/spinlock_types.h             |  2 +-
 include/linux/spinlock_types_up.h          |  4 ++--
 include/linux/spinlock_up.h                |  8 ++++----
 kernel/lockdep.c                           |  2 +-
 kernel/trace/ring_buffer.c                 |  4 ++--
 kernel/trace/trace.c                       | 18 +++++++++---------
 kernel/trace/trace_clock.c                 |  4 ++--
 kernel/trace/trace_sched_wakeup.c          |  4 ++--
 kernel/trace/trace_stack.c                 |  4 ++--
 lib/spinlock_debug.c                       |  2 +-
 51 files changed, 164 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index e38fb95cb335..bdb26a1940b4 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -17,13 +17,13 @@
 #define __raw_spin_unlock_wait(x) \
 		do { cpu_relax(); } while ((x)->lock)
 
-static inline void __raw_spin_unlock(raw_spinlock_t * lock)
+static inline void __raw_spin_unlock(arch_spinlock_t * lock)
 {
 	mb();
 	lock->lock = 0;
 }
 
-static inline void __raw_spin_lock(raw_spinlock_t * lock)
+static inline void __raw_spin_lock(arch_spinlock_t * lock)
 {
 	long tmp;
 
@@ -43,7 +43,7 @@ static inline void __raw_spin_lock(raw_spinlock_t * lock)
 	: "m"(lock->lock) : "memory");
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	return !test_and_set_bit(0, &lock->lock);
 }
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index 8141eb5ebf0d..bb94a51e53d2 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index c13681ac1ede..4e7712ee9394 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -23,7 +23,7 @@
 
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -43,7 +43,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	smp_mb();
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -63,7 +63,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	}
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	smp_mb();
 
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 43e83f6d2ee5..5e9d3eadd167 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index b0c7f0ee4b03..fc16b4c5309b 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -24,29 +24,29 @@ asmlinkage void __raw_write_lock_asm(volatile int *ptr);
 asmlinkage int __raw_write_trylock_asm(volatile int *ptr);
 asmlinkage void __raw_write_unlock_asm(volatile int *ptr);
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __raw_spin_is_locked_asm(&lock->lock);
 }
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	__raw_spin_lock_asm(&lock->lock);
 }
 
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	return __raw_spin_trylock_asm(&lock->lock);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__raw_spin_unlock_asm(&lock->lock);
 }
 
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
 		cpu_relax();
diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h
index be75762c0610..03b377abf5c0 100644
--- a/arch/blackfin/include/asm/spinlock_types.h
+++ b/arch/blackfin/include/asm/spinlock_types.h
@@ -15,7 +15,7 @@
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index 367a53ea10c5..e253457765f2 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -9,12 +9,12 @@ extern void cris_spin_unlock(void *l, int val);
 extern void cris_spin_lock(void *l);
 extern int cris_spin_trylock(void *l);
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+static inline int __raw_spin_is_locked(arch_spinlock_t *x)
 {
 	return *(volatile signed char *)(&(x)->slock) <= 0;
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ volatile ("move.d %1,%0" \
 			  : "=m" (lock->slock) \
@@ -22,24 +22,24 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 			  : "memory");
 }
 
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
 		cpu_relax();
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	return cris_spin_trylock((void *)&lock->slock);
 }
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	cris_spin_lock((void *)&lock->slock);
 }
 
 static inline void
-__raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	__raw_spin_lock(lock);
 }
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 239ecdc9516d..9fbdf7e61087 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -38,7 +38,7 @@
 #define TICKET_BITS	15
 #define	TICKET_MASK	((1 << TICKET_BITS) - 1)
 
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
 	int	*p = (int *)&lock->lock, ticket, serve;
 
@@ -58,7 +58,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 	}
 }
 
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->lock);
 
@@ -67,7 +67,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 	return 0;
 }
 
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
 
@@ -75,7 +75,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
 }
 
-static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	int	*p = (int *)&lock->lock, ticket;
 
@@ -89,53 +89,53 @@ static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock)
 	}
 }
 
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
 
 	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK);
 }
 
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
 
 	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
 }
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
 }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_contended(lock);
 }
 #define __raw_spin_is_contended	__raw_spin_is_contended
 
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	__ticket_spin_lock(lock);
 }
 
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	return __ticket_spin_trylock(lock);
 }
 
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock(lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
 	__raw_spin_lock(lock);
 }
 
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock_wait(lock);
 }
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 474e46f1ab4a..447ccc6ca7a8 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index dded923883b2..0c0164225bc0 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -36,7 +36,7 @@
  * __raw_spin_trylock() tries to get the lock and returns a result.
  * On the m32r, the result value is 1 (= Success) or 0 (= Failure).
  */
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	int oldval;
 	unsigned long tmp1, tmp2;
@@ -69,7 +69,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return (oldval > 0);
 }
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp0, tmp1;
 
@@ -111,7 +111,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	mb();
 	lock->slock = 1;
diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h
index 83f52105c0e4..17d15bd6322d 100644
--- a/arch/m32r/include/asm/spinlock_types.h
+++ b/arch/m32r/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile int slock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
 
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 5b60a09a0f08..0f16d0673b4a 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -34,7 +34,7 @@
  * becomes equal to the the initial value of the tail.
  */
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
 {
 	unsigned int counters = ACCESS_ONCE(lock->lock);
 
@@ -45,7 +45,7 @@ static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 #define __raw_spin_unlock_wait(x) \
 	while (__raw_spin_is_locked(x)) { cpu_relax(); }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
 {
 	unsigned int counters = ACCESS_ONCE(lock->lock);
 
@@ -53,7 +53,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
 }
 #define __raw_spin_is_contended	__raw_spin_is_contended
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	int my_ticket;
 	int tmp;
@@ -134,7 +134,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	smp_llsc_mb();
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	int tmp;
 
@@ -174,7 +174,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 	}
 }
 
-static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp, tmp2, tmp3;
 
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index adeedaa116c1..2e1060892d3b 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -12,7 +12,7 @@ typedef struct {
 	 * bits 15..28: ticket
 	 */
 	unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 8bc9e96699b2..3a4ea778d4b6 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -27,18 +27,18 @@
 #  define ATOMIC_HASH_SIZE 4
 #  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) (a))/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
 
-extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
+extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 
 /* Can't use raw_spin_lock_irq because of #include problems, so
  * this is the substitute */
 #define _atomic_spin_lock_irqsave(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);		\
+	arch_spinlock_t *s = ATOMIC_HASH(l);		\
 	local_irq_save(f);			\
 	__raw_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);			\
+	arch_spinlock_t *s = ATOMIC_HASH(l);			\
 	__raw_spin_unlock(s);				\
 	local_irq_restore(f);				\
 } while(0)
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index fae03e136fa8..69e8dca26744 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -5,7 +5,7 @@
 #include <asm/processor.h>
 #include <asm/spinlock_types.h>
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
+static inline int __raw_spin_is_locked(arch_spinlock_t *x)
 {
 	volatile unsigned int *a = __ldcw_align(x);
 	return *a == 0;
@@ -15,7 +15,7 @@ static inline int __raw_spin_is_locked(raw_spinlock_t *x)
 #define __raw_spin_unlock_wait(x) \
 		do { cpu_relax(); } while (__raw_spin_is_locked(x))
 
-static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
+static inline void __raw_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
 {
 	volatile unsigned int *a;
@@ -33,7 +33,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
 	mb();
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *x)
+static inline void __raw_spin_unlock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
 	mb();
@@ -42,7 +42,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *x)
 	mb();
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *x)
+static inline int __raw_spin_trylock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
 	int ret;
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 3f72f47cf4b2..735caafb81f5 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -9,10 +9,10 @@ typedef struct {
 	volatile unsigned int lock[4];
 # define __RAW_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
 #endif
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 typedef struct {
-	raw_spinlock_t lock;
+	arch_spinlock_t lock;
 	volatile int counter;
 } raw_rwlock_t;
 
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
index e3eb739fab19..fdd7f583de54 100644
--- a/arch/parisc/lib/bitops.c
+++ b/arch/parisc/lib/bitops.c
@@ -12,7 +12,7 @@
 #include <asm/atomic.h>
 
 #ifdef CONFIG_SMP
-raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
+arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
 	[0 ... (ATOMIC_HASH_SIZE-1)]  = __RAW_SPIN_LOCK_UNLOCKED
 };
 #endif
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 168fce726201..20de73c36682 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -58,7 +58,7 @@ struct rtas_t {
 	unsigned long entry;		/* physical address pointer */
 	unsigned long base;		/* physical address pointer */
 	unsigned long size;
-	raw_spinlock_t lock;
+	arch_spinlock_t lock;
 	struct rtas_args args;
 	struct device_node *dev;	/* virtual address pointer */
 };
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 198266cf9e2d..c0d44c92ff0e 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,7 +54,7 @@
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.
  */
-static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock)
+static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp, token;
 
@@ -73,7 +73,7 @@ static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
 	return arch_spin_trylock(lock) == 0;
@@ -96,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 #if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (get_lppaca()->shared_proc)
-extern void __spin_yield(raw_spinlock_t *lock);
+extern void __spin_yield(arch_spinlock_t *lock);
 extern void __rw_yield(raw_rwlock_t *lock);
 #else /* SPLPAR || ISERIES */
 #define __spin_yield(x)	barrier()
@@ -104,7 +104,7 @@ extern void __rw_yield(raw_rwlock_t *lock);
 #define SHARED_PROCESSOR	0
 #endif
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
 	while (1) {
@@ -120,7 +120,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 }
 
 static inline
-void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	unsigned long flags_dis;
 
@@ -140,7 +140,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 	}
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	SYNC_IO;
 	__asm__ __volatile__("# __raw_spin_unlock\n\t"
@@ -149,7 +149,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 }
 
 #ifdef CONFIG_PPC64
-extern void __raw_spin_unlock_wait(raw_spinlock_t *lock);
+extern void __raw_spin_unlock_wait(arch_spinlock_t *lock);
 #else
 #define __raw_spin_unlock_wait(lock) \
 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 74236c9f05b1..4312e5baaf88 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int slock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index bf90361bb70f..579069c12152 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -978,7 +978,7 @@ int __init early_init_dt_scan_rtas(unsigned long node,
 	return 1;
 }
 
-static raw_spinlock_t timebase_lock;
+static arch_spinlock_t timebase_lock;
 static u64 timebase = 0;
 
 void __cpuinit rtas_give_timebase(void)
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 79d0fa3a470d..b06294cde499 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -25,7 +25,7 @@
 #include <asm/smp.h>
 #include <asm/firmware.h>
 
-void __spin_yield(raw_spinlock_t *lock)
+void __spin_yield(arch_spinlock_t *lock)
 {
 	unsigned int lock_value, holder_cpu, yield_count;
 
@@ -82,7 +82,7 @@ void __rw_yield(raw_rwlock_t *rw)
 }
 #endif
 
-void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (lock->slock) {
 		HMT_low();
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index a4619347aa7e..be36fece41d7 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -71,7 +71,7 @@ static void pas_restart(char *cmd)
 }
 
 #ifdef CONFIG_SMP
-static raw_spinlock_t timebase_lock;
+static arch_spinlock_t timebase_lock;
 static unsigned long timebase;
 
 static void __devinit pas_give_timebase(void)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c9af0d19c7ab..6121fa4b83d9 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -57,12 +57,12 @@ _raw_compare_and_swap(volatile unsigned int *lock,
 	do { while (__raw_spin_is_locked(lock)) \
 		 _raw_spin_relax(lock); } while (0)
 
-extern void _raw_spin_lock_wait(raw_spinlock_t *);
-extern void _raw_spin_lock_wait_flags(raw_spinlock_t *, unsigned long flags);
-extern int _raw_spin_trylock_retry(raw_spinlock_t *);
-extern void _raw_spin_relax(raw_spinlock_t *lock);
+extern void _raw_spin_lock_wait(arch_spinlock_t *);
+extern void _raw_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
+extern int _raw_spin_trylock_retry(arch_spinlock_t *);
+extern void _raw_spin_relax(arch_spinlock_t *lock);
 
-static inline void __raw_spin_lock(raw_spinlock_t *lp)
+static inline void __raw_spin_lock(arch_spinlock_t *lp)
 {
 	int old;
 
@@ -72,7 +72,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lp)
 	_raw_spin_lock_wait(lp);
 }
 
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lp,
+static inline void __raw_spin_lock_flags(arch_spinlock_t *lp,
 					 unsigned long flags)
 {
 	int old;
@@ -83,7 +83,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lp,
 	_raw_spin_lock_wait_flags(lp, flags);
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lp)
+static inline int __raw_spin_trylock(arch_spinlock_t *lp)
 {
 	int old;
 
@@ -93,7 +93,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lp)
 	return _raw_spin_trylock_retry(lp);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lp)
+static inline void __raw_spin_unlock(arch_spinlock_t *lp)
 {
 	_raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0);
 }
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index 654abc40de04..a93638eee3f7 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int owner_cpu;
-} __attribute__ ((aligned (4))) raw_spinlock_t;
+} __attribute__ ((aligned (4))) arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index f7e0d30250b7..d4cbf71a6077 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -39,7 +39,7 @@ static inline void _raw_yield_cpu(int cpu)
 		_raw_yield();
 }
 
-void _raw_spin_lock_wait(raw_spinlock_t *lp)
+void _raw_spin_lock_wait(arch_spinlock_t *lp)
 {
 	int count = spin_retry;
 	unsigned int cpu = ~smp_processor_id();
@@ -59,7 +59,7 @@ void _raw_spin_lock_wait(raw_spinlock_t *lp)
 }
 EXPORT_SYMBOL(_raw_spin_lock_wait);
 
-void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags)
+void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
 	int count = spin_retry;
 	unsigned int cpu = ~smp_processor_id();
@@ -82,7 +82,7 @@ void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags)
 }
 EXPORT_SYMBOL(_raw_spin_lock_wait_flags);
 
-int _raw_spin_trylock_retry(raw_spinlock_t *lp)
+int _raw_spin_trylock_retry(arch_spinlock_t *lp)
 {
 	unsigned int cpu = ~smp_processor_id();
 	int count;
@@ -97,7 +97,7 @@ int _raw_spin_trylock_retry(raw_spinlock_t *lp)
 }
 EXPORT_SYMBOL(_raw_spin_trylock_retry);
 
-void _raw_spin_relax(raw_spinlock_t *lock)
+void _raw_spin_relax(arch_spinlock_t *lock)
 {
 	unsigned int cpu = lock->owner_cpu;
 	if (cpu != 0)
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index a28c9f0053fd..5a05b3fcefbe 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -34,7 +34,7 @@
  *
  * We make no fairness assumptions.  They have a cost.
  */
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 	unsigned long oldval;
@@ -54,7 +54,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -67,7 +67,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 	);
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp, oldval;
 
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index b4d244e7b60c..37712c32ba99 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED		{ 1 }
 
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 857630cff636..b2d8a67f727e 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -15,7 +15,7 @@
 #define __raw_spin_unlock_wait(lock) \
 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
 	"\n1:\n\t"
@@ -35,7 +35,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	: "g2", "memory", "cc");
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned int result;
 	__asm__ __volatile__("ldstub [%1], %0"
@@ -45,7 +45,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return (result == 0);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
 }
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 43e514783582..38e16c40efc4 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -27,7 +27,7 @@
 	do {	rmb();			\
 	} while((lp)->lock)
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -46,7 +46,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	: "memory");
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long result;
 
@@ -59,7 +59,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return (result == 0UL);
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stb		%%g0, [%0]"
@@ -68,7 +68,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 	: "memory");
 }
 
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+static inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	unsigned long tmp1, tmp2;
 
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
index 37cbe01c585b..41d9a8fec13d 100644
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct {
 	volatile unsigned char lock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index efb38994859c..5655f75f10b7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+static inline int __raw_spin_is_locked(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
 }
 
-static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+static inline int __raw_spin_is_contended(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
 }
 #define __raw_spin_is_contended	__raw_spin_is_contended
 
-static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+static __always_inline void __raw_spin_lock(struct arch_spinlock *lock)
 {
 	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
+static __always_inline void __raw_spin_lock_flags(struct arch_spinlock *lock,
 						  unsigned long flags)
 {
 	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
 }
 
-static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+static __always_inline int __raw_spin_trylock(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
 }
 
-static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+static __always_inline void __raw_spin_unlock(struct arch_spinlock *lock)
 {
 	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
 }
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9357473c8da0..b1e70d51e40c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -318,14 +318,14 @@ struct pv_mmu_ops {
 			   phys_addr_t phys, pgprot_t flags);
 };
 
-struct raw_spinlock;
+struct arch_spinlock;
 struct pv_lock_ops {
-	int (*spin_is_locked)(struct raw_spinlock *lock);
-	int (*spin_is_contended)(struct raw_spinlock *lock);
-	void (*spin_lock)(struct raw_spinlock *lock);
-	void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct raw_spinlock *lock);
-	void (*spin_unlock)(struct raw_spinlock *lock);
+	int (*spin_is_locked)(struct arch_spinlock *lock);
+	int (*spin_is_contended)(struct arch_spinlock *lock);
+	void (*spin_lock)(struct arch_spinlock *lock);
+	void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
+	int (*spin_trylock)(struct arch_spinlock *lock);
+	void (*spin_unlock)(struct arch_spinlock *lock);
 };
 
 /* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 4e77853321db..204b524fcf57 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -58,7 +58,7 @@
 #if (NR_CPUS < 256)
 #define TICKET_SHIFT 8
 
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
 	short inc = 0x0100;
 
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 		: "memory", "cc");
 }
 
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp, new;
 
@@ -96,7 +96,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
 		     : "+m" (lock->slock)
@@ -106,7 +106,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 #else
 #define TICKET_SHIFT 16
 
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
 	int inc = 0x00010000;
 	int tmp;
@@ -127,7 +127,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 		     : "memory", "cc");
 }
 
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp;
 	int new;
@@ -149,7 +149,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 	return tmp;
 }
 
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
 		     : "+m" (lock->slock)
@@ -158,14 +158,14 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 }
 #endif
 
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
 	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
 }
 
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
 
@@ -174,33 +174,33 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
 
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
 }
 
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_contended(lock);
 }
 #define __raw_spin_is_contended	__raw_spin_is_contended
 
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	__ticket_spin_lock(lock);
 }
 
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	return __ticket_spin_trylock(lock);
 }
 
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock(lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
 	__raw_spin_lock(lock);
@@ -208,7 +208,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 
 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
 
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
 		cpu_relax();
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 845f81c87091..2ae7637ed524 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,9 +5,9 @@
 # error "please don't include this file directly"
 #endif
 
-typedef struct raw_spinlock {
+typedef struct arch_spinlock {
 	unsigned int slock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b8ce165dde5d..0862d9d89c92 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -188,7 +188,7 @@ void dump_stack(void)
 }
 EXPORT_SYMBOL(dump_stack);
 
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 3a7c5a44082e..a0f39e090684 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,7 +8,7 @@
 #include <asm/paravirt.h>
 
 static inline void
-default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	__raw_spin_lock(lock);
 }
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index eed156851f5d..9f908b9d1abe 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count;
  * we want to have the fastest, inlined, non-debug version
  * of a critical section, to be able to prove TSC time-warps:
  */
-static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static __cpuinitdata arch_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
 
 static __cpuinitdata cycles_t last_tsc;
 static __cpuinitdata cycles_t max_warp;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 36a5141108df..24ded31b5aec 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -120,14 +120,14 @@ struct xen_spinlock {
 	unsigned short spinners;	/* count of waiting cpus */
 };
 
-static int xen_spin_is_locked(struct raw_spinlock *lock)
+static int xen_spin_is_locked(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 
 	return xl->lock != 0;
 }
 
-static int xen_spin_is_contended(struct raw_spinlock *lock)
+static int xen_spin_is_contended(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct raw_spinlock *lock)
 	return xl->spinners != 0;
 }
 
-static int xen_spin_trylock(struct raw_spinlock *lock)
+static int xen_spin_trylock(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	u8 old = 1;
@@ -181,7 +181,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
 	__get_cpu_var(lock_spinners) = prev;
 }
 
-static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
+static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	struct xen_spinlock *prev;
@@ -254,7 +254,7 @@ out:
 	return ret;
 }
 
-static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
+static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	unsigned timeout;
@@ -291,12 +291,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
 	spin_time_accum_total(start_spin);
 }
 
-static void xen_spin_lock(struct raw_spinlock *lock)
+static void xen_spin_lock(struct arch_spinlock *lock)
 {
 	__xen_spin_lock(lock, false);
 }
 
-static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
 {
 	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
 }
@@ -317,7 +317,7 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 	}
 }
 
-static void xen_spin_unlock(struct raw_spinlock *lock)
+static void xen_spin_unlock(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index c8946465e63a..dcf0afad4a7f 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -15,18 +15,18 @@
 #  define ATOMIC_HASH_SIZE 4
 #  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
 
-extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
+extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 
 /* Can't use raw_spin_lock_irq because of #include problems, so
  * this is the substitute */
 #define _atomic_spin_lock_irqsave(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);	\
+	arch_spinlock_t *s = ATOMIC_HASH(l);	\
 	local_irq_save(f);			\
 	__raw_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
-	raw_spinlock_t *s = ATOMIC_HASH(l);		\
+	arch_spinlock_t *s = ATOMIC_HASH(l);		\
 	__raw_spin_unlock(s);				\
 	local_irq_restore(f);				\
 } while(0)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index a9aaa709fb93..5ef7a4c060b5 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -8,7 +8,7 @@
  *
  * on SMP builds:
  *
- *  asm/spinlock_types.h: contains the raw_spinlock_t/raw_rwlock_t and the
+ *  asm/spinlock_types.h: contains the arch_spinlock_t/raw_rwlock_t and the
  *                        initializers
  *
  *  linux/spinlock_types.h:
@@ -75,7 +75,7 @@
 #define __lockfunc __attribute__((section(".spinlock.text")))
 
 /*
- * Pull the raw_spinlock_t and raw_rwlock_t definitions:
+ * Pull the arch_spinlock_t and raw_rwlock_t definitions:
  */
 #include <linux/spinlock_types.h>
 
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index f979d5d8a160..d4af2d7a86ea 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -18,7 +18,7 @@
 #include <linux/lockdep.h>
 
 typedef struct {
-	raw_spinlock_t raw_lock;
+	arch_spinlock_t raw_lock;
 #ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 04135b0e198e..34d36691c4ec 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -16,13 +16,13 @@
 
 typedef struct {
 	volatile unsigned int slock;
-} raw_spinlock_t;
+} arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { 1 }
 
 #else
 
-typedef struct { } raw_spinlock_t;
+typedef struct { } arch_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { }
 
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index d4841ed8215b..8ee2ac1bf636 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -20,19 +20,19 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define __raw_spin_is_locked(x)		((x)->slock == 0)
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
 }
 
 static inline void
-__raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	local_irq_save(flags);
 	lock->slock = 0;
 }
 
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 {
 	char oldval = lock->slock;
 
@@ -41,7 +41,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 	return oldval > 0;
 }
 
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 {
 	lock->slock = 1;
 }
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 429540c70d3f..7cc50c62af59 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,7 +73,7 @@ module_param(lock_stat, int, 0644);
  * to use a raw spinlock - we really dont want the spinlock
  * code to recurse back into the lockdep code...
  */
-static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 static int graph_lock(void)
 {
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..5ac8ee0a9e35 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
 	int				cpu;
 	struct ring_buffer		*buffer;
 	spinlock_t			reader_lock;	/* serialize readers */
-	raw_spinlock_t			lock;
+	arch_spinlock_t			lock;
 	struct lock_class_key		lock_key;
 	struct list_head		*pages;
 	struct buffer_page		*head_page;	/* read from head */
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
 	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
-	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
 			    GFP_KERNEL, cpu_to_node(cpu));
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c82dfd92fdfd..7d56cecc2c6e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -493,15 +493,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
  * protected by per_cpu spinlocks. But the action of the swap
  * needs its own lock.
  *
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
  * with performance when lockdep debugging is enabled.
  *
  * It is also used in other places outside the update_max_tr
  * so it needs to be defined outside of the
  * CONFIG_TRACER_MAX_TRACE.
  */
-static raw_spinlock_t ftrace_max_lock =
-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t ftrace_max_lock =
+	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 unsigned long __read_mostly	tracing_max_latency;
@@ -802,7 +802,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
 static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
 
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -1251,8 +1251,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
  */
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-	static raw_spinlock_t trace_buf_lock =
-		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	static arch_spinlock_t trace_buf_lock =
+		(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	static u32 trace_buf[TRACE_BUF_SIZE];
 
 	struct ftrace_event_call *call = &event_bprint;
@@ -1334,7 +1334,7 @@ int trace_array_printk(struct trace_array *tr,
 int trace_array_vprintk(struct trace_array *tr,
 			unsigned long ip, const char *fmt, va_list args)
 {
-	static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+	static arch_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
 	static char trace_buf[TRACE_BUF_SIZE];
 
 	struct ftrace_event_call *call = &event_print;
@@ -4307,8 +4307,8 @@ trace_printk_seq(struct trace_seq *s)
 
 static void __ftrace_dump(bool disable_tracing)
 {
-	static raw_spinlock_t ftrace_dump_lock =
-		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	static arch_spinlock_t ftrace_dump_lock =
+		(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
 	unsigned int old_userobj;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..206ec3d4b3c2 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
 /* keep prev_time and lock in the same cacheline. */
 static struct {
 	u64 prev_time;
-	raw_spinlock_t lock;
+	arch_spinlock_t lock;
 } trace_clock_struct ____cacheline_aligned_in_smp =
 	{
-		.lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+		.lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
 	};
 
 u64 notrace trace_clock_global(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..4cf7e83ec235 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int			wakeup_current_cpu;
 static unsigned			wakeup_prio = -1;
 static int			wakeup_rt;
 
-static raw_spinlock_t wakeup_lock =
-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t wakeup_lock =
+	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 static void __wakeup_reset(struct trace_array *tr);
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..9a82d568fdec 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
 };
 
 static unsigned long max_stack_size;
-static raw_spinlock_t max_stack_lock =
-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t max_stack_lock =
+	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 static int stack_trace_disabled __read_mostly;
 static DEFINE_PER_CPU(int, trace_active);
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b0256490b..2acd501b3826 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -23,7 +23,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
-	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	lock->raw_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
-- 
cgit v1.2.3


From edc35bd72e2079b25f99c5da7d7a65dbbffc4a26 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 12:38:57 +0100
Subject: locking: Rename __RAW_SPIN_LOCK_UNLOCKED to __ARCH_SPIN_LOCK_UNLOCKED

Further name space cleanup. No functional change

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/spinlock_types.h    |  2 +-
 arch/arm/include/asm/spinlock_types.h      |  2 +-
 arch/blackfin/include/asm/spinlock_types.h |  2 +-
 arch/ia64/include/asm/spinlock_types.h     |  2 +-
 arch/m32r/include/asm/spinlock_types.h     |  2 +-
 arch/mips/include/asm/spinlock_types.h     |  2 +-
 arch/parisc/include/asm/spinlock_types.h   |  6 +++---
 arch/parisc/lib/bitops.c                   |  2 +-
 arch/powerpc/include/asm/spinlock_types.h  |  2 +-
 arch/powerpc/kernel/rtas.c                 |  2 +-
 arch/s390/include/asm/spinlock_types.h     |  2 +-
 arch/sh/include/asm/spinlock_types.h       |  2 +-
 arch/sparc/include/asm/spinlock_types.h    |  2 +-
 arch/x86/include/asm/spinlock_types.h      |  2 +-
 arch/x86/kernel/dumpstack.c                |  2 +-
 arch/x86/kernel/tsc_sync.c                 |  2 +-
 include/linux/spinlock_types.h             |  4 ++--
 include/linux/spinlock_types_up.h          |  4 ++--
 kernel/lockdep.c                           |  2 +-
 kernel/trace/ring_buffer.c                 |  2 +-
 kernel/trace/trace.c                       | 10 +++++-----
 kernel/trace/trace_clock.c                 |  2 +-
 kernel/trace/trace_sched_wakeup.c          |  2 +-
 kernel/trace/trace_stack.c                 |  2 +-
 lib/spinlock_debug.c                       |  2 +-
 25 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index bb94a51e53d2..08975ee0a100 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 5e9d3eadd167..9622e126a8de 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h
index 03b377abf5c0..c8a3928a58c5 100644
--- a/arch/blackfin/include/asm/spinlock_types.h
+++ b/arch/blackfin/include/asm/spinlock_types.h
@@ -17,7 +17,7 @@ typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 447ccc6ca7a8..6a11b65fa66d 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int read_counter	: 31;
diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h
index 17d15bd6322d..5873a8701107 100644
--- a/arch/m32r/include/asm/spinlock_types.h
+++ b/arch/m32r/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile int slock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 1 }
 
 typedef struct {
 	volatile int lock;
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index 2e1060892d3b..b4c5efaadb9c 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -14,7 +14,7 @@ typedef struct {
 	unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 735caafb81f5..396d2746ca57 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -4,10 +4,10 @@
 typedef struct {
 #ifdef CONFIG_PA20
 	volatile unsigned int slock;
-# define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+# define __ARCH_SPIN_LOCK_UNLOCKED { 1 }
 #else
 	volatile unsigned int lock[4];
-# define __RAW_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
+# define __ARCH_SPIN_LOCK_UNLOCKED	{ { 1, 1, 1, 1 } }
 #endif
 } arch_spinlock_t;
 
@@ -16,6 +16,6 @@ typedef struct {
 	volatile int counter;
 } raw_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ __RAW_SPIN_LOCK_UNLOCKED, 0 }
+#define __RAW_RW_LOCK_UNLOCKED		{ __ARCH_SPIN_LOCK_UNLOCKED, 0 }
 
 #endif
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
index fdd7f583de54..353963d42059 100644
--- a/arch/parisc/lib/bitops.c
+++ b/arch/parisc/lib/bitops.c
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_SMP
 arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
-	[0 ... (ATOMIC_HASH_SIZE-1)]  = __RAW_SPIN_LOCK_UNLOCKED
+	[0 ... (ATOMIC_HASH_SIZE-1)]  = __ARCH_SPIN_LOCK_UNLOCKED
 };
 #endif
 
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 4312e5baaf88..f5f39d82711f 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile signed int lock;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 579069c12152..57dfa414cfb8 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,7 +42,7 @@
 #include <asm/mmu.h>
 
 struct rtas_t rtas = {
-	.lock = __RAW_SPIN_LOCK_UNLOCKED
+	.lock = __ARCH_SPIN_LOCK_UNLOCKED
 };
 EXPORT_SYMBOL(rtas);
 
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index a93638eee3f7..e25c0370f6cd 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int owner_cpu;
 } __attribute__ ((aligned (4))) arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index 37712c32ba99..a3be2db960ed 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED		{ 1 }
+#define __ARCH_SPIN_LOCK_UNLOCKED		{ 1 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
index 41d9a8fec13d..c145e63a5d66 100644
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	volatile unsigned char lock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 2ae7637ed524..696f8364a4f3 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct arch_spinlock {
 	unsigned int slock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	unsigned int lock;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 0862d9d89c92..5b75afac8a38 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -188,7 +188,7 @@ void dump_stack(void)
 }
 EXPORT_SYMBOL(dump_stack);
 
-static arch_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9f908b9d1abe..f1714697a09a 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count;
  * we want to have the fastest, inlined, non-debug version
  * of a critical section, to be able to prove TSC time-warps:
  */
-static __cpuinitdata arch_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 static __cpuinitdata cycles_t last_tsc;
 static __cpuinitdata cycles_t max_warp;
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index d4af2d7a86ea..7dadce303ebf 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -43,14 +43,14 @@ typedef struct {
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 # define __SPIN_LOCK_UNLOCKED(lockname)					\
-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+	(spinlock_t)	{	.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
 				.magic = SPINLOCK_MAGIC,		\
 				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1,			\
 				SPIN_DEP_MAP_INIT(lockname) }
 #else
 # define __SPIN_LOCK_UNLOCKED(lockname) \
-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+	(spinlock_t)	{	.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
 				SPIN_DEP_MAP_INIT(lockname) }
 #endif
 
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 34d36691c4ec..10db021f4875 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -18,13 +18,13 @@ typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { 1 }
 
 #else
 
 typedef struct { } arch_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED { }
+#define __ARCH_SPIN_LOCK_UNLOCKED { }
 
 #endif
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 7cc50c62af59..2389e3f85cf6 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,7 +73,7 @@ module_param(lock_stat, int, 0644);
  * to use a raw spinlock - we really dont want the spinlock
  * code to recurse back into the lockdep code...
  */
-static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static int graph_lock(void)
 {
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5ac8ee0a9e35..fb7a0fa508b9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
 	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
-	cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
 			    GFP_KERNEL, cpu_to_node(cpu));
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7d56cecc2c6e..63bc1cc38219 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -501,7 +501,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
  * CONFIG_TRACER_MAX_TRACE.
  */
 static arch_spinlock_t ftrace_max_lock =
-	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 unsigned long __read_mostly	tracing_max_latency;
@@ -802,7 +802,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
 static int cmdline_idx;
-static arch_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 /* temporary disable recording */
 static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -1252,7 +1252,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	static arch_spinlock_t trace_buf_lock =
-		(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+		(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 	static u32 trace_buf[TRACE_BUF_SIZE];
 
 	struct ftrace_event_call *call = &event_bprint;
@@ -1334,7 +1334,7 @@ int trace_array_printk(struct trace_array *tr,
 int trace_array_vprintk(struct trace_array *tr,
 			unsigned long ip, const char *fmt, va_list args)
 {
-	static arch_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+	static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 	static char trace_buf[TRACE_BUF_SIZE];
 
 	struct ftrace_event_call *call = &event_print;
@@ -4308,7 +4308,7 @@ trace_printk_seq(struct trace_seq *s)
 static void __ftrace_dump(bool disable_tracing)
 {
 	static arch_spinlock_t ftrace_dump_lock =
-		(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+		(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
 	unsigned int old_userobj;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 206ec3d4b3c2..433e2eda2d01 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -74,7 +74,7 @@ static struct {
 	arch_spinlock_t lock;
 } trace_clock_struct ____cacheline_aligned_in_smp =
 	{
-		.lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+		.lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
 	};
 
 u64 notrace trace_clock_global(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4cf7e83ec235..e347853564e9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -29,7 +29,7 @@ static unsigned			wakeup_prio = -1;
 static int			wakeup_rt;
 
 static arch_spinlock_t wakeup_lock =
-	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static void __wakeup_reset(struct trace_array *tr);
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 9a82d568fdec..728c35221483 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -28,7 +28,7 @@ static struct stack_trace max_stack_trace = {
 
 static unsigned long max_stack_size;
 static arch_spinlock_t max_stack_lock =
-	(arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static int stack_trace_disabled __read_mostly;
 static DEFINE_PER_CPU(int, trace_active);
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 2acd501b3826..f73004137141 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -23,7 +23,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
-	lock->raw_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
-- 
cgit v1.2.3


From 0199c4e68d1f02894bdefe4b5d9e9ee4aedd8d62 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 2 Dec 2009 20:01:25 +0100
Subject: locking: Convert __raw_spin* functions to arch_spin*

Name space cleanup. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/spinlock.h          | 18 ++++++------
 arch/arm/include/asm/spinlock.h            | 20 ++++++-------
 arch/blackfin/include/asm/spinlock.h       | 20 ++++++-------
 arch/cris/include/arch-v32/arch/spinlock.h | 46 +++++++++++++++---------------
 arch/ia64/include/asm/bitops.h             |  2 +-
 arch/ia64/include/asm/spinlock.h           | 26 ++++++++---------
 arch/m32r/include/asm/spinlock.h           | 28 +++++++++---------
 arch/mips/include/asm/spinlock.h           | 36 +++++++++++------------
 arch/parisc/include/asm/atomic.h           |  4 +--
 arch/parisc/include/asm/spinlock.h         | 44 ++++++++++++++--------------
 arch/powerpc/include/asm/spinlock.h        | 32 ++++++++++-----------
 arch/powerpc/kernel/rtas.c                 | 12 ++++----
 arch/powerpc/lib/locks.c                   |  4 +--
 arch/powerpc/platforms/pasemi/setup.c      |  8 +++---
 arch/s390/include/asm/spinlock.h           | 34 +++++++++++-----------
 arch/s390/lib/spinlock.c                   | 22 +++++++-------
 arch/sh/include/asm/spinlock.h             | 26 ++++++++---------
 arch/sparc/include/asm/spinlock_32.h       | 20 ++++++-------
 arch/sparc/include/asm/spinlock_64.h       | 18 ++++++------
 arch/x86/include/asm/paravirt.h            | 14 ++++-----
 arch/x86/include/asm/spinlock.h            | 26 ++++++++---------
 arch/x86/kernel/dumpstack.c                |  6 ++--
 arch/x86/kernel/paravirt-spinlocks.c       |  2 +-
 arch/x86/kernel/tsc_sync.c                 |  8 +++---
 include/asm-generic/bitops/atomic.h        |  4 +--
 include/linux/spinlock.h                   | 22 +++++++-------
 include/linux/spinlock_up.h                | 26 ++++++++---------
 kernel/lockdep.c                           | 18 ++++++------
 kernel/mutex-debug.h                       |  4 +--
 kernel/spinlock.c                          |  4 +--
 kernel/trace/ring_buffer.c                 | 12 ++++----
 kernel/trace/trace.c                       | 32 ++++++++++-----------
 kernel/trace/trace_clock.c                 |  4 +--
 kernel/trace/trace_sched_wakeup.c          | 12 ++++----
 kernel/trace/trace_selftest.c              |  4 +--
 kernel/trace/trace_stack.c                 | 12 ++++----
 lib/spinlock_debug.c                       |  8 +++---
 37 files changed, 319 insertions(+), 319 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index bdb26a1940b4..4dac79f504c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -12,18 +12,18 @@
  * We make no fairness assumptions. They have a cost.
  */
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-#define __raw_spin_is_locked(x)	((x)->lock != 0)
-#define __raw_spin_unlock_wait(x) \
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+#define arch_spin_is_locked(x)	((x)->lock != 0)
+#define arch_spin_unlock_wait(x) \
 		do { cpu_relax(); } while ((x)->lock)
 
-static inline void __raw_spin_unlock(arch_spinlock_t * lock)
+static inline void arch_spin_unlock(arch_spinlock_t * lock)
 {
 	mb();
 	lock->lock = 0;
 }
 
-static inline void __raw_spin_lock(arch_spinlock_t * lock)
+static inline void arch_spin_lock(arch_spinlock_t * lock)
 {
 	long tmp;
 
@@ -43,7 +43,7 @@ static inline void __raw_spin_lock(arch_spinlock_t * lock)
 	: "m"(lock->lock) : "memory");
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return !test_and_set_bit(0, &lock->lock);
 }
@@ -169,8 +169,8 @@ static inline void __raw_write_unlock(raw_rwlock_t * lock)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* _ALPHA_SPINLOCK_H */
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 4e7712ee9394..de62eb098f68 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -17,13 +17,13 @@
  * Locked value: 1
  */
 
-#define __raw_spin_is_locked(x)		((x)->lock != 0)
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+#define arch_spin_is_locked(x)		((x)->lock != 0)
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -43,7 +43,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	smp_mb();
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -63,7 +63,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	}
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	smp_mb();
 
@@ -220,8 +220,8 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index fc16b4c5309b..62d49540e02b 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -24,31 +24,31 @@ asmlinkage void __raw_write_lock_asm(volatile int *ptr);
 asmlinkage int __raw_write_trylock_asm(volatile int *ptr);
 asmlinkage void __raw_write_unlock_asm(volatile int *ptr);
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __raw_spin_is_locked_asm(&lock->lock);
 }
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__raw_spin_lock_asm(&lock->lock);
 }
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return __raw_spin_trylock_asm(&lock->lock);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__raw_spin_unlock_asm(&lock->lock);
 }
 
-static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (__raw_spin_is_locked(lock))
+	while (arch_spin_is_locked(lock))
 		cpu_relax();
 }
 
@@ -92,9 +92,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 	__raw_write_unlock_asm(&rw->lock);
 }
 
-#define _raw_spin_relax(lock)  	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)  	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif
 
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index e253457765f2..a2e8a394d555 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -9,12 +9,12 @@ extern void cris_spin_unlock(void *l, int val);
 extern void cris_spin_lock(void *l);
 extern int cris_spin_trylock(void *l);
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *x)
+static inline int arch_spin_is_locked(arch_spinlock_t *x)
 {
 	return *(volatile signed char *)(&(x)->slock) <= 0;
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ volatile ("move.d %1,%0" \
 			  : "=m" (lock->slock) \
@@ -22,26 +22,26 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 			  : "memory");
 }
 
-static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (__raw_spin_is_locked(lock))
+	while (arch_spin_is_locked(lock))
 		cpu_relax();
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return cris_spin_trylock((void *)&lock->slock);
 }
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	cris_spin_lock((void *)&lock->slock);
 }
 
 static inline void
-__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
-	__raw_spin_lock(lock);
+	arch_spin_lock(lock);
 }
 
 /*
@@ -68,64 +68,64 @@ static inline int __raw_write_can_lock(raw_rwlock_t *x)
 
 static  inline void __raw_read_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	while (rw->lock == 0);
 	rw->lock--;
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 }
 
 static  inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
 	rw->lock = 0;
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 }
 
 static  inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	rw->lock++;
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 }
 
 static  inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
 	rw->lock = RW_LOCK_BIAS;
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 }
 
 static  inline int __raw_read_trylock(raw_rwlock_t *rw)
 {
 	int ret = 0;
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	if (rw->lock != 0) {
 		rw->lock--;
 		ret = 1;
 	}
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 	return ret;
 }
 
 static  inline int __raw_write_trylock(raw_rwlock_t *rw)
 {
 	int ret = 0;
-	__raw_spin_lock(&rw->slock);
+	arch_spin_lock(&rw->slock);
 	if (rw->lock == RW_LOCK_BIAS) {
 		rw->lock = 0;
 		ret = 1;
 	}
-	__raw_spin_unlock(&rw->slock);
+	arch_spin_unlock(&rw->slock);
 	return 1;
 }
 
 #define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
 #define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* __ASM_ARCH_SPINLOCK_H */
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 57a2787bc9fb..6ebc229a1c51 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -127,7 +127,7 @@ clear_bit_unlock (int nr, volatile void *addr)
  * @addr: Address to start counting from
  *
  * Similarly to clear_bit_unlock, the implementation uses a store
- * with release semantics. See also __raw_spin_unlock().
+ * with release semantics. See also arch_spin_unlock().
  */
 static __inline__ void
 __clear_bit_unlock(int nr, void *addr)
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 9fbdf7e61087..b06165f6352f 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -17,7 +17,7 @@
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
-#define __raw_spin_lock_init(x)			((x)->lock = 0)
+#define arch_spin_lock_init(x)			((x)->lock = 0)
 
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
@@ -103,39 +103,39 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
 }
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
 }
 
-static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_contended(lock);
 }
-#define __raw_spin_is_contended	__raw_spin_is_contended
+#define arch_spin_is_contended	arch_spin_is_contended
 
-static __always_inline void __raw_spin_lock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__ticket_spin_lock(lock);
 }
 
-static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return __ticket_spin_trylock(lock);
 }
 
-static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock(lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock,
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
-	__raw_spin_lock(lock);
+	arch_spin_lock(lock);
 }
 
-static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock_wait(lock);
 }
@@ -285,8 +285,8 @@ static inline int __raw_read_trylock(raw_rwlock_t *x)
 	return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word;
 }
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /*  _ASM_IA64_SPINLOCK_H */
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 0c0164225bc0..8acac950a43c 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -24,19 +24,19 @@
  * We make no fairness assumptions. They have a cost.
  */
 
-#define __raw_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-#define __raw_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (__raw_spin_is_locked(x))
+#define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+#define arch_spin_unlock_wait(x) \
+		do { cpu_relax(); } while (arch_spin_is_locked(x))
 
 /**
- * __raw_spin_trylock - Try spin lock and return a result
+ * arch_spin_trylock - Try spin lock and return a result
  * @lock: Pointer to the lock variable
  *
- * __raw_spin_trylock() tries to get the lock and returns a result.
+ * arch_spin_trylock() tries to get the lock and returns a result.
  * On the m32r, the result value is 1 (= Success) or 0 (= Failure).
  */
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	int oldval;
 	unsigned long tmp1, tmp2;
@@ -50,7 +50,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	 * }
 	 */
 	__asm__ __volatile__ (
-		"# __raw_spin_trylock		\n\t"
+		"# arch_spin_trylock		\n\t"
 		"ldi	%1, #0;			\n\t"
 		"mvfc	%2, psw;		\n\t"
 		"clrpsw	#0x40 -> nop;		\n\t"
@@ -69,7 +69,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	return (oldval > 0);
 }
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp0, tmp1;
 
@@ -84,7 +84,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	 * }
 	 */
 	__asm__ __volatile__ (
-		"# __raw_spin_lock		\n\t"
+		"# arch_spin_lock		\n\t"
 		".fillinsn			\n"
 		"1:				\n\t"
 		"mvfc	%1, psw;		\n\t"
@@ -111,7 +111,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	mb();
 	lock->slock = 1;
@@ -319,8 +319,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif	/* _ASM_M32R_SPINLOCK_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 0f16d0673b4a..95edebaaf22a 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -34,33 +34,33 @@
  * becomes equal to the the initial value of the tail.
  */
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	unsigned int counters = ACCESS_ONCE(lock->lock);
 
 	return ((counters >> 14) ^ counters) & 0x1fff;
 }
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-#define __raw_spin_unlock_wait(x) \
-	while (__raw_spin_is_locked(x)) { cpu_relax(); }
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+#define arch_spin_unlock_wait(x) \
+	while (arch_spin_is_locked(x)) { cpu_relax(); }
 
-static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
 	unsigned int counters = ACCESS_ONCE(lock->lock);
 
 	return (((counters >> 14) - counters) & 0x1fff) > 1;
 }
-#define __raw_spin_is_contended	__raw_spin_is_contended
+#define arch_spin_is_contended	arch_spin_is_contended
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	int my_ticket;
 	int tmp;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__ (
-		"	.set push		# __raw_spin_lock	\n"
+		"	.set push		# arch_spin_lock	\n"
 		"	.set noreorder					\n"
 		"							\n"
 		"1:	ll	%[ticket], %[ticket_ptr]		\n"
@@ -94,7 +94,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 		  [my_ticket] "=&r" (my_ticket));
 	} else {
 		__asm__ __volatile__ (
-		"	.set push		# __raw_spin_lock	\n"
+		"	.set push		# arch_spin_lock	\n"
 		"	.set noreorder					\n"
 		"							\n"
 		"	ll	%[ticket], %[ticket_ptr]		\n"
@@ -134,7 +134,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	smp_llsc_mb();
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	int tmp;
 
@@ -142,7 +142,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__ (
-		"				# __raw_spin_unlock	\n"
+		"				# arch_spin_unlock	\n"
 		"1:	ll	%[ticket], %[ticket_ptr]		\n"
 		"	addiu	%[ticket], %[ticket], 1			\n"
 		"	ori	%[ticket], %[ticket], 0x2000		\n"
@@ -153,7 +153,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 		  [ticket] "=&r" (tmp));
 	} else {
 		__asm__ __volatile__ (
-		"	.set push		# __raw_spin_unlock	\n"
+		"	.set push		# arch_spin_unlock	\n"
 		"	.set noreorder					\n"
 		"							\n"
 		"	ll	%[ticket], %[ticket_ptr]		\n"
@@ -174,13 +174,13 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 	}
 }
 
-static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp, tmp2, tmp3;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__ (
-		"	.set push		# __raw_spin_trylock	\n"
+		"	.set push		# arch_spin_trylock	\n"
 		"	.set noreorder					\n"
 		"							\n"
 		"1:	ll	%[ticket], %[ticket_ptr]		\n"
@@ -204,7 +204,7 @@ static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock)
 		  [now_serving] "=&r" (tmp3));
 	} else {
 		__asm__ __volatile__ (
-		"	.set push		# __raw_spin_trylock	\n"
+		"	.set push		# arch_spin_trylock	\n"
 		"	.set noreorder					\n"
 		"							\n"
 		"	ll	%[ticket], %[ticket_ptr]		\n"
@@ -483,8 +483,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* _ASM_SPINLOCK_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 3a4ea778d4b6..716634d1f546 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -34,12 +34,12 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 #define _atomic_spin_lock_irqsave(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);		\
 	local_irq_save(f);			\
-	__raw_spin_lock(s);			\
+	arch_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);			\
-	__raw_spin_unlock(s);				\
+	arch_spin_unlock(s);				\
 	local_irq_restore(f);				\
 } while(0)
 
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 69e8dca26744..235e7e386e2a 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -5,17 +5,17 @@
 #include <asm/processor.h>
 #include <asm/spinlock_types.h>
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *x)
+static inline int arch_spin_is_locked(arch_spinlock_t *x)
 {
 	volatile unsigned int *a = __ldcw_align(x);
 	return *a == 0;
 }
 
-#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
-#define __raw_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (__raw_spin_is_locked(x))
+#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
+#define arch_spin_unlock_wait(x) \
+		do { cpu_relax(); } while (arch_spin_is_locked(x))
 
-static inline void __raw_spin_lock_flags(arch_spinlock_t *x,
+static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
 {
 	volatile unsigned int *a;
@@ -33,7 +33,7 @@ static inline void __raw_spin_lock_flags(arch_spinlock_t *x,
 	mb();
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *x)
+static inline void arch_spin_unlock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
 	mb();
@@ -42,7 +42,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *x)
 	mb();
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *x)
+static inline int arch_spin_trylock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
 	int ret;
@@ -73,9 +73,9 @@ static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
+	arch_spin_lock_flags(&rw->lock, flags);
 	rw->counter++;
-	__raw_spin_unlock(&rw->lock);
+	arch_spin_unlock(&rw->lock);
 	local_irq_restore(flags);
 }
 
@@ -85,9 +85,9 @@ static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
+	arch_spin_lock_flags(&rw->lock, flags);
 	rw->counter--;
-	__raw_spin_unlock(&rw->lock);
+	arch_spin_unlock(&rw->lock);
 	local_irq_restore(flags);
 }
 
@@ -98,9 +98,9 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
 	unsigned long flags;
  retry:
 	local_irq_save(flags);
-	if (__raw_spin_trylock(&rw->lock)) {
+	if (arch_spin_trylock(&rw->lock)) {
 		rw->counter++;
-		__raw_spin_unlock(&rw->lock);
+		arch_spin_unlock(&rw->lock);
 		local_irq_restore(flags);
 		return 1;
 	}
@@ -111,7 +111,7 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
 		return 0;
 
 	/* Wait until we have a realistic chance at the lock */
-	while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0)
+	while (arch_spin_is_locked(&rw->lock) && rw->counter >= 0)
 		cpu_relax();
 
 	goto retry;
@@ -124,10 +124,10 @@ static __inline__ void __raw_write_lock(raw_rwlock_t *rw)
 	unsigned long flags;
 retry:
 	local_irq_save(flags);
-	__raw_spin_lock_flags(&rw->lock, flags);
+	arch_spin_lock_flags(&rw->lock, flags);
 
 	if (rw->counter != 0) {
-		__raw_spin_unlock(&rw->lock);
+		arch_spin_unlock(&rw->lock);
 		local_irq_restore(flags);
 
 		while (rw->counter != 0)
@@ -144,7 +144,7 @@ retry:
 static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
 {
 	rw->counter = 0;
-	__raw_spin_unlock(&rw->lock);
+	arch_spin_unlock(&rw->lock);
 }
 
 /* Note that we have to ensure interrupts are disabled in case we're
@@ -155,13 +155,13 @@ static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
 	int result = 0;
 
 	local_irq_save(flags);
-	if (__raw_spin_trylock(&rw->lock)) {
+	if (arch_spin_trylock(&rw->lock)) {
 		if (rw->counter == 0) {
 			rw->counter = -1;
 			result = 1;
 		} else {
 			/* Read-locked.  Oh well. */
-			__raw_spin_unlock(&rw->lock);
+			arch_spin_unlock(&rw->lock);
 		}
 	}
 	local_irq_restore(flags);
@@ -190,8 +190,8 @@ static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index c0d44c92ff0e..cdcaf6b97087 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,7 +28,7 @@
 #include <asm/asm-compat.h>
 #include <asm/synch.h>
 
-#define __raw_spin_is_locked(x)		((x)->slock != 0)
+#define arch_spin_is_locked(x)		((x)->slock != 0)
 
 #ifdef CONFIG_PPC64
 /* use 0x800000yy when locked, where yy == CPU number */
@@ -54,7 +54,7 @@
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.
  */
-static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock)
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp, token;
 
@@ -73,10 +73,10 @@ static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock)
 	return tmp;
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
-	return arch_spin_trylock(lock) == 0;
+	return __arch_spin_trylock(lock) == 0;
 }
 
 /*
@@ -104,11 +104,11 @@ extern void __rw_yield(raw_rwlock_t *lock);
 #define SHARED_PROCESSOR	0
 #endif
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
 	while (1) {
-		if (likely(arch_spin_trylock(lock) == 0))
+		if (likely(__arch_spin_trylock(lock) == 0))
 			break;
 		do {
 			HMT_low();
@@ -120,13 +120,13 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 }
 
 static inline
-void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	unsigned long flags_dis;
 
 	CLEAR_IO_SYNC;
 	while (1) {
-		if (likely(arch_spin_trylock(lock) == 0))
+		if (likely(__arch_spin_trylock(lock) == 0))
 			break;
 		local_save_flags(flags_dis);
 		local_irq_restore(flags);
@@ -140,19 +140,19 @@ void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 	}
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	SYNC_IO;
-	__asm__ __volatile__("# __raw_spin_unlock\n\t"
+	__asm__ __volatile__("# arch_spin_unlock\n\t"
 				LWSYNC_ON_SMP: : :"memory");
 	lock->slock = 0;
 }
 
 #ifdef CONFIG_PPC64
-extern void __raw_spin_unlock_wait(arch_spinlock_t *lock);
+extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #else
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 #endif
 
 /*
@@ -290,9 +290,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	__spin_yield(lock)
-#define _raw_read_relax(lock)	__rw_yield(lock)
-#define _raw_write_relax(lock)	__rw_yield(lock)
+#define arch_spin_relax(lock)	__spin_yield(lock)
+#define arch_read_relax(lock)	__rw_yield(lock)
+#define arch_write_relax(lock)	__rw_yield(lock)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 57dfa414cfb8..fd0d29493fd6 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -80,13 +80,13 @@ static unsigned long lock_rtas(void)
 
 	local_irq_save(flags);
 	preempt_disable();
-	__raw_spin_lock_flags(&rtas.lock, flags);
+	arch_spin_lock_flags(&rtas.lock, flags);
 	return flags;
 }
 
 static void unlock_rtas(unsigned long flags)
 {
-	__raw_spin_unlock(&rtas.lock);
+	arch_spin_unlock(&rtas.lock);
 	local_irq_restore(flags);
 	preempt_enable();
 }
@@ -987,10 +987,10 @@ void __cpuinit rtas_give_timebase(void)
 
 	local_irq_save(flags);
 	hard_irq_disable();
-	__raw_spin_lock(&timebase_lock);
+	arch_spin_lock(&timebase_lock);
 	rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
 	timebase = get_tb();
-	__raw_spin_unlock(&timebase_lock);
+	arch_spin_unlock(&timebase_lock);
 
 	while (timebase)
 		barrier();
@@ -1002,8 +1002,8 @@ void __cpuinit rtas_take_timebase(void)
 {
 	while (!timebase)
 		barrier();
-	__raw_spin_lock(&timebase_lock);
+	arch_spin_lock(&timebase_lock);
 	set_tb(timebase >> 32, timebase & 0xffffffff);
 	timebase = 0;
-	__raw_spin_unlock(&timebase_lock);
+	arch_spin_unlock(&timebase_lock);
 }
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index b06294cde499..ee395e392115 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -82,7 +82,7 @@ void __rw_yield(raw_rwlock_t *rw)
 }
 #endif
 
-void __raw_spin_unlock_wait(arch_spinlock_t *lock)
+void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (lock->slock) {
 		HMT_low();
@@ -92,4 +92,4 @@ void __raw_spin_unlock_wait(arch_spinlock_t *lock)
 	HMT_medium();
 }
 
-EXPORT_SYMBOL(__raw_spin_unlock_wait);
+EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index be36fece41d7..242f8095c2df 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -80,11 +80,11 @@ static void __devinit pas_give_timebase(void)
 
 	local_irq_save(flags);
 	hard_irq_disable();
-	__raw_spin_lock(&timebase_lock);
+	arch_spin_lock(&timebase_lock);
 	mtspr(SPRN_TBCTL, TBCTL_FREEZE);
 	isync();
 	timebase = get_tb();
-	__raw_spin_unlock(&timebase_lock);
+	arch_spin_unlock(&timebase_lock);
 
 	while (timebase)
 		barrier();
@@ -97,10 +97,10 @@ static void __devinit pas_take_timebase(void)
 	while (!timebase)
 		smp_rmb();
 
-	__raw_spin_lock(&timebase_lock);
+	arch_spin_lock(&timebase_lock);
 	set_tb(timebase >> 32, timebase & 0xffffffff);
 	timebase = 0;
-	__raw_spin_unlock(&timebase_lock);
+	arch_spin_unlock(&timebase_lock);
 }
 
 struct smp_ops_t pas_smp_ops = {
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 6121fa4b83d9..a94c146657a9 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -52,27 +52,27 @@ _raw_compare_and_swap(volatile unsigned int *lock,
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-#define __raw_spin_is_locked(x) ((x)->owner_cpu != 0)
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) \
-		 _raw_spin_relax(lock); } while (0)
+#define arch_spin_is_locked(x) ((x)->owner_cpu != 0)
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) \
+		 arch_spin_relax(lock); } while (0)
 
-extern void _raw_spin_lock_wait(arch_spinlock_t *);
-extern void _raw_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
-extern int _raw_spin_trylock_retry(arch_spinlock_t *);
-extern void _raw_spin_relax(arch_spinlock_t *lock);
+extern void arch_spin_lock_wait(arch_spinlock_t *);
+extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
+extern int arch_spin_trylock_retry(arch_spinlock_t *);
+extern void arch_spin_relax(arch_spinlock_t *lock);
 
-static inline void __raw_spin_lock(arch_spinlock_t *lp)
+static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
 	int old;
 
 	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
 	if (likely(old == 0))
 		return;
-	_raw_spin_lock_wait(lp);
+	arch_spin_lock_wait(lp);
 }
 
-static inline void __raw_spin_lock_flags(arch_spinlock_t *lp,
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
 					 unsigned long flags)
 {
 	int old;
@@ -80,20 +80,20 @@ static inline void __raw_spin_lock_flags(arch_spinlock_t *lp,
 	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
 	if (likely(old == 0))
 		return;
-	_raw_spin_lock_wait_flags(lp, flags);
+	arch_spin_lock_wait_flags(lp, flags);
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lp)
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
 {
 	int old;
 
 	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
 	if (likely(old == 0))
 		return 1;
-	return _raw_spin_trylock_retry(lp);
+	return arch_spin_trylock_retry(lp);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lp)
+static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	_raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0);
 }
@@ -188,7 +188,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 	return _raw_write_trylock_retry(rw);
 }
 
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index d4cbf71a6077..f4596452f072 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -39,7 +39,7 @@ static inline void _raw_yield_cpu(int cpu)
 		_raw_yield();
 }
 
-void _raw_spin_lock_wait(arch_spinlock_t *lp)
+void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
 	int count = spin_retry;
 	unsigned int cpu = ~smp_processor_id();
@@ -51,15 +51,15 @@ void _raw_spin_lock_wait(arch_spinlock_t *lp)
 				_raw_yield_cpu(~owner);
 			count = spin_retry;
 		}
-		if (__raw_spin_is_locked(lp))
+		if (arch_spin_is_locked(lp))
 			continue;
 		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
 			return;
 	}
 }
-EXPORT_SYMBOL(_raw_spin_lock_wait);
+EXPORT_SYMBOL(arch_spin_lock_wait);
 
-void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
+void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
 	int count = spin_retry;
 	unsigned int cpu = ~smp_processor_id();
@@ -72,7 +72,7 @@ void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 				_raw_yield_cpu(~owner);
 			count = spin_retry;
 		}
-		if (__raw_spin_is_locked(lp))
+		if (arch_spin_is_locked(lp))
 			continue;
 		local_irq_disable();
 		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
@@ -80,30 +80,30 @@ void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 		local_irq_restore(flags);
 	}
 }
-EXPORT_SYMBOL(_raw_spin_lock_wait_flags);
+EXPORT_SYMBOL(arch_spin_lock_wait_flags);
 
-int _raw_spin_trylock_retry(arch_spinlock_t *lp)
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
 	unsigned int cpu = ~smp_processor_id();
 	int count;
 
 	for (count = spin_retry; count > 0; count--) {
-		if (__raw_spin_is_locked(lp))
+		if (arch_spin_is_locked(lp))
 			continue;
 		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
 			return 1;
 	}
 	return 0;
 }
-EXPORT_SYMBOL(_raw_spin_trylock_retry);
+EXPORT_SYMBOL(arch_spin_trylock_retry);
 
-void _raw_spin_relax(arch_spinlock_t *lock)
+void arch_spin_relax(arch_spinlock_t *lock)
 {
 	unsigned int cpu = lock->owner_cpu;
 	if (cpu != 0)
 		_raw_yield_cpu(~cpu);
 }
-EXPORT_SYMBOL(_raw_spin_relax);
+EXPORT_SYMBOL(arch_spin_relax);
 
 void _raw_read_lock_wait(raw_rwlock_t *rw)
 {
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 5a05b3fcefbe..da1c6491ed4b 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -23,10 +23,10 @@
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
-#define __raw_spin_is_locked(x)		((x)->lock <= 0)
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-#define __raw_spin_unlock_wait(x) \
-	do { while (__raw_spin_is_locked(x)) cpu_relax(); } while (0)
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+#define arch_spin_unlock_wait(x) \
+	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -34,14 +34,14 @@
  *
  * We make no fairness assumptions.  They have a cost.
  */
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 	unsigned long oldval;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%2, %0	! __raw_spin_lock	\n\t"
+		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
 		"mov		%0, %1				\n\t"
 		"mov		#0, %0				\n\t"
 		"movco.l	%0, @%2				\n\t"
@@ -54,12 +54,12 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
 	__asm__ __volatile__ (
-		"mov		#1, %0 ! __raw_spin_unlock	\n\t"
+		"mov		#1, %0 ! arch_spin_unlock	\n\t"
 		"mov.l		%0, @%1				\n\t"
 		: "=&z" (tmp)
 		: "r" (&lock->lock)
@@ -67,13 +67,13 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 	);
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp, oldval;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%2, %0	! __raw_spin_trylock	\n\t"
+		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
 		"mov		%0, %1				\n\t"
 		"mov		#0, %0				\n\t"
 		"movco.l	%0, @%2				\n\t"
@@ -219,8 +219,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* __ASM_SH_SPINLOCK_H */
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index b2d8a67f727e..9b0f2f53c81c 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -10,12 +10,12 @@
 
 #include <asm/psr.h>
 
-#define __raw_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
+#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
 	"\n1:\n\t"
@@ -35,7 +35,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	: "g2", "memory", "cc");
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned int result;
 	__asm__ __volatile__("ldstub [%1], %0"
@@ -45,7 +45,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	return (result == 0);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
 }
@@ -176,13 +176,13 @@ static inline int arch_read_trylock(raw_rwlock_t *rw)
 
 #define __raw_write_unlock(rw)	do { (rw)->lock = 0; } while(0)
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define __raw_read_lock_flags(rw, flags)   __raw_read_lock(rw)
 #define __raw_write_lock_flags(rw, flags)  __raw_write_lock(rw)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #define __raw_read_can_lock(rw) (!((rw)->lock & 0xff))
 #define __raw_write_can_lock(rw) (!(rw)->lock)
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 38e16c40efc4..7cf58a2fcda4 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -21,13 +21,13 @@
  * the spinner sections must be pre-V9 branches.
  */
 
-#define __raw_spin_is_locked(lp)	((lp)->lock != 0)
+#define arch_spin_is_locked(lp)	((lp)->lock != 0)
 
-#define __raw_spin_unlock_wait(lp)	\
+#define arch_spin_unlock_wait(lp)	\
 	do {	rmb();			\
 	} while((lp)->lock)
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
@@ -46,7 +46,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock)
 	: "memory");
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long result;
 
@@ -59,7 +59,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	return (result == 0UL);
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stb		%%g0, [%0]"
@@ -68,7 +68,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 	: "memory");
 }
 
-static inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	unsigned long tmp1, tmp2;
 
@@ -222,9 +222,9 @@ static int inline arch_write_trylock(raw_rwlock_t *lock)
 #define __raw_read_can_lock(rw)		(!((rw)->lock & 0x80000000UL))
 #define __raw_write_can_lock(rw)	(!(rw)->lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 #endif /* !(__ASSEMBLY__) */
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5655f75f10b7..dd59a85a918f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-static inline int __raw_spin_is_locked(struct arch_spinlock *lock)
+static inline int arch_spin_is_locked(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
 }
 
-static inline int __raw_spin_is_contended(struct arch_spinlock *lock)
+static inline int arch_spin_is_contended(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
 }
-#define __raw_spin_is_contended	__raw_spin_is_contended
+#define arch_spin_is_contended	arch_spin_is_contended
 
-static __always_inline void __raw_spin_lock(struct arch_spinlock *lock)
+static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
 {
 	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(struct arch_spinlock *lock,
+static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
 						  unsigned long flags)
 {
 	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
 }
 
-static __always_inline int __raw_spin_trylock(struct arch_spinlock *lock)
+static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
 }
 
-static __always_inline void __raw_spin_unlock(struct arch_spinlock *lock)
+static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
 {
 	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
 }
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 204b524fcf57..ab9055fd57d9 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -174,43 +174,43 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
 
-static inline int __raw_spin_is_locked(arch_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
 }
 
-static inline int __raw_spin_is_contended(arch_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_contended(lock);
 }
-#define __raw_spin_is_contended	__raw_spin_is_contended
+#define arch_spin_is_contended	arch_spin_is_contended
 
-static __always_inline void __raw_spin_lock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__ticket_spin_lock(lock);
 }
 
-static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return __ticket_spin_trylock(lock);
 }
 
-static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__ticket_spin_unlock(lock);
 }
 
-static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock,
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
-	__raw_spin_lock(lock);
+	arch_spin_lock(lock);
 }
 
 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
 
-static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (__raw_spin_is_locked(lock))
+	while (arch_spin_is_locked(lock))
 		cpu_relax();
 }
 
@@ -298,9 +298,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
 #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
 
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
 
 /* The {read|write|spin}_lock() on x86 are full memory barriers. */
 static inline void smp_mb__after_lock(void) { }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 5b75afac8a38..0a0aa1cec8f1 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -207,11 +207,11 @@ unsigned __kprobes long oops_begin(void)
 	/* racy, but better than risking deadlock. */
 	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
-	if (!__raw_spin_trylock(&die_lock)) {
+	if (!arch_spin_trylock(&die_lock)) {
 		if (cpu == die_owner)
 			/* nested oops. should stop eventually */;
 		else
-			__raw_spin_lock(&die_lock);
+			arch_spin_lock(&die_lock);
 	}
 	die_nest_count++;
 	die_owner = cpu;
@@ -231,7 +231,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	die_nest_count--;
 	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
-		__raw_spin_unlock(&die_lock);
+		arch_spin_unlock(&die_lock);
 	raw_local_irq_restore(flags);
 	oops_exit();
 
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index a0f39e090684..676b8c77a976 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -10,7 +10,7 @@
 static inline void
 default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
-	__raw_spin_lock(lock);
+	arch_spin_lock(lock);
 }
 
 struct pv_lock_ops pv_lock_ops = {
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index f1714697a09a..0aa5fed8b9e6 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void)
 		 * previous TSC that was measured (possibly on
 		 * another CPU) and update the previous TSC timestamp.
 		 */
-		__raw_spin_lock(&sync_lock);
+		arch_spin_lock(&sync_lock);
 		prev = last_tsc;
 		rdtsc_barrier();
 		now = get_cycles();
 		rdtsc_barrier();
 		last_tsc = now;
-		__raw_spin_unlock(&sync_lock);
+		arch_spin_unlock(&sync_lock);
 
 		/*
 		 * Be nice every now and then (and also check whether
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void)
 		 * we saw a time-warp of the TSC going backwards:
 		 */
 		if (unlikely(prev > now)) {
-			__raw_spin_lock(&sync_lock);
+			arch_spin_lock(&sync_lock);
 			max_warp = max(max_warp, prev - now);
 			nr_warps++;
-			__raw_spin_unlock(&sync_lock);
+			arch_spin_unlock(&sync_lock);
 		}
 	}
 	WARN(!(now-start),
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index dcf0afad4a7f..ecc44a8e2b44 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -22,12 +22,12 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 #define _atomic_spin_lock_irqsave(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);	\
 	local_irq_save(f);			\
-	__raw_spin_lock(s);			\
+	arch_spin_lock(s);			\
 } while(0)
 
 #define _atomic_spin_unlock_irqrestore(l,f) do {	\
 	arch_spinlock_t *s = ATOMIC_HASH(l);		\
-	__raw_spin_unlock(s);				\
+	arch_spin_unlock(s);				\
 	local_irq_restore(f);				\
 } while(0)
 
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 5ef7a4c060b5..de3a022489c6 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -14,7 +14,7 @@
  *  linux/spinlock_types.h:
  *                        defines the generic type and initializers
  *
- *  asm/spinlock.h:       contains the __raw_spin_*()/etc. lowlevel
+ *  asm/spinlock.h:       contains the arch_spin_*()/etc. lowlevel
  *                        implementations, mostly inline assembly code
  *
  *   (also included on UP-debug builds:)
@@ -34,7 +34,7 @@
  *                        defines the generic type and initializers
  *
  *  linux/spinlock_up.h:
- *                        contains the __raw_spin_*()/etc. version of UP
+ *                        contains the arch_spin_*()/etc. version of UP
  *                        builds. (which are NOPs on non-debug, non-preempt
  *                        builds)
  *
@@ -103,17 +103,17 @@ do {								\
 	do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
-#define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
+#define spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
 
 #ifdef CONFIG_GENERIC_LOCKBREAK
 #define spin_is_contended(lock) ((lock)->break_lock)
 #else
 
-#ifdef __raw_spin_is_contended
-#define spin_is_contended(lock)	__raw_spin_is_contended(&(lock)->raw_lock)
+#ifdef arch_spin_is_contended
+#define spin_is_contended(lock)	arch_spin_is_contended(&(lock)->raw_lock)
 #else
 #define spin_is_contended(lock)	(((void)(lock), 0))
-#endif /*__raw_spin_is_contended*/
+#endif /*arch_spin_is_contended*/
 #endif
 
 /* The lock does not imply full memory barrier. */
@@ -125,7 +125,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
  */
-#define spin_unlock_wait(lock)	__raw_spin_unlock_wait(&(lock)->raw_lock)
+#define spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void _raw_spin_lock(spinlock_t *lock);
@@ -133,11 +133,11 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
  extern int _raw_spin_trylock(spinlock_t *lock);
  extern void _raw_spin_unlock(spinlock_t *lock);
 #else
-# define _raw_spin_lock(lock)		__raw_spin_lock(&(lock)->raw_lock)
+# define _raw_spin_lock(lock)		arch_spin_lock(&(lock)->raw_lock)
 # define _raw_spin_lock_flags(lock, flags) \
-		__raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
-# define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
+		arch_spin_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_spin_trylock(lock)	arch_spin_trylock(&(lock)->raw_lock)
+# define _raw_spin_unlock(lock)		arch_spin_unlock(&(lock)->raw_lock)
 #endif
 
 /*
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 8ee2ac1bf636..1d3bcc3cf7c6 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -18,21 +18,21 @@
  */
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-#define __raw_spin_is_locked(x)		((x)->slock == 0)
+#define arch_spin_is_locked(x)		((x)->slock == 0)
 
-static inline void __raw_spin_lock(arch_spinlock_t *lock)
+static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
 }
 
 static inline void
-__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	local_irq_save(flags);
 	lock->slock = 0;
 }
 
-static inline int __raw_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	char oldval = lock->slock;
 
@@ -41,7 +41,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock)
 	return oldval > 0;
 }
 
-static inline void __raw_spin_unlock(arch_spinlock_t *lock)
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	lock->slock = 1;
 }
@@ -57,20 +57,20 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock)
 #define __raw_write_unlock(lock)	do { (void)(lock); } while (0)
 
 #else /* DEBUG_SPINLOCK */
-#define __raw_spin_is_locked(lock)	((void)(lock), 0)
+#define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
-# define __raw_spin_lock(lock)		do { (void)(lock); } while (0)
-# define __raw_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
-# define __raw_spin_unlock(lock)	do { (void)(lock); } while (0)
-# define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
+# define arch_spin_lock(lock)		do { (void)(lock); } while (0)
+# define arch_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
+# define arch_spin_unlock(lock)	do { (void)(lock); } while (0)
+# define arch_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
-#define __raw_spin_is_contended(lock)	(((void)(lock), 0))
+#define arch_spin_is_contended(lock)	(((void)(lock), 0))
 
 #define __raw_read_can_lock(lock)	(((void)(lock), 1))
 #define __raw_write_can_lock(lock)	(((void)(lock), 1))
 
-#define __raw_spin_unlock_wait(lock) \
-		do { cpu_relax(); } while (__raw_spin_is_locked(lock))
+#define arch_spin_unlock_wait(lock) \
+		do { cpu_relax(); } while (arch_spin_is_locked(lock))
 
 #endif /* __LINUX_SPINLOCK_UP_H */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2389e3f85cf6..5feaddcdbe49 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -77,7 +77,7 @@ static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED
 
 static int graph_lock(void)
 {
-	__raw_spin_lock(&lockdep_lock);
+	arch_spin_lock(&lockdep_lock);
 	/*
 	 * Make sure that if another CPU detected a bug while
 	 * walking the graph we dont change it (while the other
@@ -85,7 +85,7 @@ static int graph_lock(void)
 	 * dropped already)
 	 */
 	if (!debug_locks) {
-		__raw_spin_unlock(&lockdep_lock);
+		arch_spin_unlock(&lockdep_lock);
 		return 0;
 	}
 	/* prevent any recursions within lockdep from causing deadlocks */
@@ -95,11 +95,11 @@ static int graph_lock(void)
 
 static inline int graph_unlock(void)
 {
-	if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
+	if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
 		return DEBUG_LOCKS_WARN_ON(1);
 
 	current->lockdep_recursion--;
-	__raw_spin_unlock(&lockdep_lock);
+	arch_spin_unlock(&lockdep_lock);
 	return 0;
 }
 
@@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void)
 {
 	int ret = debug_locks_off();
 
-	__raw_spin_unlock(&lockdep_lock);
+	arch_spin_unlock(&lockdep_lock);
 
 	return ret;
 }
@@ -1170,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
 	this.class = class;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&lockdep_lock);
+	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_forward_deps(&this);
-	__raw_spin_unlock(&lockdep_lock);
+	arch_spin_unlock(&lockdep_lock);
 	local_irq_restore(flags);
 
 	return ret;
@@ -1197,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
 	this.class = class;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&lockdep_lock);
+	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_backward_deps(&this);
-	__raw_spin_unlock(&lockdep_lock);
+	arch_spin_unlock(&lockdep_lock);
 	local_irq_restore(flags);
 
 	return ret;
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 6b2d735846a5..7bebbd15b342 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
 							\
 		DEBUG_LOCKS_WARN_ON(in_interrupt());	\
 		local_irq_save(flags);			\
-		__raw_spin_lock(&(lock)->raw_lock);	\
+		arch_spin_lock(&(lock)->raw_lock);	\
 		DEBUG_LOCKS_WARN_ON(l->magic != l);	\
 	} while (0)
 
 #define spin_unlock_mutex(lock, flags)			\
 	do {						\
-		__raw_spin_unlock(&(lock)->raw_lock);	\
+		arch_spin_unlock(&(lock)->raw_lock);	\
 		local_irq_restore(flags);		\
 		preempt_check_resched();		\
 	} while (0)
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index e6e136318437..fbb5f8b78357 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -53,7 +53,7 @@ void __lockfunc __##op##_lock(locktype##_t *lock)			\
 		if (!(lock)->break_lock)				\
 			(lock)->break_lock = 1;				\
 		while (!op##_can_lock(lock) && (lock)->break_lock)	\
-			_raw_##op##_relax(&lock->raw_lock);		\
+			arch_##op##_relax(&lock->raw_lock);		\
 	}								\
 	(lock)->break_lock = 0;						\
 }									\
@@ -73,7 +73,7 @@ unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
 		if (!(lock)->break_lock)				\
 			(lock)->break_lock = 1;				\
 		while (!op##_can_lock(lock) && (lock)->break_lock)	\
-			_raw_##op##_relax(&lock->raw_lock);		\
+			arch_##op##_relax(&lock->raw_lock);		\
 	}								\
 	(lock)->break_lock = 0;						\
 	return flags;							\
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fb7a0fa508b9..f58c9ad15830 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2834,7 +2834,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	int ret;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&cpu_buffer->lock);
+	arch_spin_lock(&cpu_buffer->lock);
 
  again:
 	/*
@@ -2923,7 +2923,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	goto again;
 
  out:
-	__raw_spin_unlock(&cpu_buffer->lock);
+	arch_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
 	return reader;
@@ -3286,9 +3286,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	synchronize_sched();
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
-	__raw_spin_lock(&cpu_buffer->lock);
+	arch_spin_lock(&cpu_buffer->lock);
 	rb_iter_reset(iter);
-	__raw_spin_unlock(&cpu_buffer->lock);
+	arch_spin_unlock(&cpu_buffer->lock);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
 	return iter;
@@ -3408,11 +3408,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
 		goto out;
 
-	__raw_spin_lock(&cpu_buffer->lock);
+	arch_spin_lock(&cpu_buffer->lock);
 
 	rb_reset_cpu(cpu_buffer);
 
-	__raw_spin_unlock(&cpu_buffer->lock);
+	arch_spin_unlock(&cpu_buffer->lock);
 
  out:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 63bc1cc38219..bb6b5e7fa2a2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -555,13 +555,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	__raw_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&ftrace_max_lock);
 
 	tr->buffer = max_tr.buffer;
 	max_tr.buffer = buf;
 
 	__update_max_tr(tr, tsk, cpu);
-	__raw_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&ftrace_max_lock);
 }
 
 /**
@@ -581,7 +581,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	__raw_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&ftrace_max_lock);
 
 	ftrace_disable_cpu();
 
@@ -603,7 +603,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
 
 	__update_max_tr(tr, tsk, cpu);
-	__raw_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&ftrace_max_lock);
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
@@ -915,7 +915,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
 	 * nor do we want to disable interrupts,
 	 * so if we miss here, then better luck next time.
 	 */
-	if (!__raw_spin_trylock(&trace_cmdline_lock))
+	if (!arch_spin_trylock(&trace_cmdline_lock))
 		return;
 
 	idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +940,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
 
 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
 
-	__raw_spin_unlock(&trace_cmdline_lock);
+	arch_spin_unlock(&trace_cmdline_lock);
 }
 
 void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +958,14 @@ void trace_find_cmdline(int pid, char comm[])
 	}
 
 	preempt_disable();
-	__raw_spin_lock(&trace_cmdline_lock);
+	arch_spin_lock(&trace_cmdline_lock);
 	map = map_pid_to_cmdline[pid];
 	if (map != NO_CMDLINE_MAP)
 		strcpy(comm, saved_cmdlines[map]);
 	else
 		strcpy(comm, "<...>");
 
-	__raw_spin_unlock(&trace_cmdline_lock);
+	arch_spin_unlock(&trace_cmdline_lock);
 	preempt_enable();
 }
 
@@ -1283,7 +1283,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
 	/* Lockdep uses trace_printk for lock tracing */
 	local_irq_save(flags);
-	__raw_spin_lock(&trace_buf_lock);
+	arch_spin_lock(&trace_buf_lock);
 	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1304,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 		ring_buffer_unlock_commit(buffer, event);
 
 out_unlock:
-	__raw_spin_unlock(&trace_buf_lock);
+	arch_spin_unlock(&trace_buf_lock);
 	local_irq_restore(flags);
 
 out:
@@ -1360,7 +1360,7 @@ int trace_array_vprintk(struct trace_array *tr,
 
 	pause_graph_tracing();
 	raw_local_irq_save(irq_flags);
-	__raw_spin_lock(&trace_buf_lock);
+	arch_spin_lock(&trace_buf_lock);
 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	size = sizeof(*entry) + len + 1;
@@ -1378,7 +1378,7 @@ int trace_array_vprintk(struct trace_array *tr,
 		ring_buffer_unlock_commit(buffer, event);
 
  out_unlock:
-	__raw_spin_unlock(&trace_buf_lock);
+	arch_spin_unlock(&trace_buf_lock);
 	raw_local_irq_restore(irq_flags);
 	unpause_graph_tracing();
  out:
@@ -2279,7 +2279,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	mutex_lock(&tracing_cpumask_update_lock);
 
 	local_irq_disable();
-	__raw_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&ftrace_max_lock);
 	for_each_tracing_cpu(cpu) {
 		/*
 		 * Increase/decrease the disabled counter if we are
@@ -2294,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 			atomic_dec(&global_trace.data[cpu]->disabled);
 		}
 	}
-	__raw_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&ftrace_max_lock);
 	local_irq_enable();
 
 	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -4318,7 +4318,7 @@ static void __ftrace_dump(bool disable_tracing)
 
 	/* only one dump */
 	local_irq_save(flags);
-	__raw_spin_lock(&ftrace_dump_lock);
+	arch_spin_lock(&ftrace_dump_lock);
 	if (dump_ran)
 		goto out;
 
@@ -4393,7 +4393,7 @@ static void __ftrace_dump(bool disable_tracing)
 	}
 
  out:
-	__raw_spin_unlock(&ftrace_dump_lock);
+	arch_spin_unlock(&ftrace_dump_lock);
 	local_irq_restore(flags);
 }
 
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 433e2eda2d01..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
 	if (unlikely(in_nmi()))
 		goto out;
 
-	__raw_spin_lock(&trace_clock_struct.lock);
+	arch_spin_lock(&trace_clock_struct.lock);
 
 	/*
 	 * TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
 
 	trace_clock_struct.prev_time = now;
 
-	__raw_spin_unlock(&trace_clock_struct.lock);
+	arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
 	raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e347853564e9..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 		goto out;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&wakeup_lock);
+	arch_spin_lock(&wakeup_lock);
 
 	/* We could race with grabbing wakeup_lock */
 	if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 
 out_unlock:
 	__wakeup_reset(wakeup_trace);
-	__raw_spin_unlock(&wakeup_lock);
+	arch_spin_unlock(&wakeup_lock);
 	local_irq_restore(flags);
 out:
 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
 	tracing_reset_online_cpus(tr);
 
 	local_irq_save(flags);
-	__raw_spin_lock(&wakeup_lock);
+	arch_spin_lock(&wakeup_lock);
 	__wakeup_reset(tr);
-	__raw_spin_unlock(&wakeup_lock);
+	arch_spin_unlock(&wakeup_lock);
 	local_irq_restore(flags);
 }
 
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
 		goto out;
 
 	/* interrupts should be off from try_to_wake_up */
-	__raw_spin_lock(&wakeup_lock);
+	arch_spin_lock(&wakeup_lock);
 
 	/* check for races. */
 	if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
 	trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
-	__raw_spin_unlock(&wakeup_lock);
+	arch_spin_unlock(&wakeup_lock);
 out:
 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 
 	/* Don't allow flipping of max traces now */
 	local_irq_save(flags);
-	__raw_spin_lock(&ftrace_max_lock);
+	arch_spin_lock(&ftrace_max_lock);
 
 	cnt = ring_buffer_entries(tr->buffer);
 
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 			break;
 	}
 	tracing_on();
-	__raw_spin_unlock(&ftrace_max_lock);
+	arch_spin_unlock(&ftrace_max_lock);
 	local_irq_restore(flags);
 
 	if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 728c35221483..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -54,7 +54,7 @@ static inline void check_stack(void)
 		return;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&max_stack_lock);
+	arch_spin_lock(&max_stack_lock);
 
 	/* a race could have already updated it */
 	if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
 	}
 
  out:
-	__raw_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&max_stack_lock);
 	local_irq_restore(flags);
 }
 
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 		return ret;
 
 	local_irq_save(flags);
-	__raw_spin_lock(&max_stack_lock);
+	arch_spin_lock(&max_stack_lock);
 	*ptr = val;
-	__raw_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&max_stack_lock);
 	local_irq_restore(flags);
 
 	return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	local_irq_disable();
-	__raw_spin_lock(&max_stack_lock);
+	arch_spin_lock(&max_stack_lock);
 
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 
 static void t_stop(struct seq_file *m, void *p)
 {
-	__raw_spin_unlock(&max_stack_lock);
+	arch_spin_unlock(&max_stack_lock);
 	local_irq_enable();
 }
 
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index f73004137141..1304fe094546 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -106,7 +106,7 @@ static void __spin_lock_debug(spinlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_spin_trylock(&lock->raw_lock))
+			if (arch_spin_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -128,14 +128,14 @@ static void __spin_lock_debug(spinlock_t *lock)
 void _raw_spin_lock(spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
-	if (unlikely(!__raw_spin_trylock(&lock->raw_lock)))
+	if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
 		__spin_lock_debug(lock);
 	debug_spin_lock_after(lock);
 }
 
 int _raw_spin_trylock(spinlock_t *lock)
 {
-	int ret = __raw_spin_trylock(&lock->raw_lock);
+	int ret = arch_spin_trylock(&lock->raw_lock);
 
 	if (ret)
 		debug_spin_lock_after(lock);
@@ -151,7 +151,7 @@ int _raw_spin_trylock(spinlock_t *lock)
 void _raw_spin_unlock(spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
-	__raw_spin_unlock(&lock->raw_lock);
+	arch_spin_unlock(&lock->raw_lock);
 }
 
 static void rwlock_bug(rwlock_t *lock, const char *msg)
-- 
cgit v1.2.3


From fb3a6bbc912b12347614e5742c7c61416cdb0ca0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 20:01:19 +0100
Subject: locking: Convert raw_rwlock to arch_rwlock

Not strictly necessary for -rt as -rt does not have non sleeping
rwlocks, but it's odd to not have a consistent naming convention.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/spinlock.h          | 16 ++++++++--------
 arch/alpha/include/asm/spinlock_types.h    |  4 ++--
 arch/arm/include/asm/spinlock.h            | 12 ++++++------
 arch/arm/include/asm/spinlock_types.h      |  4 ++--
 arch/blackfin/include/asm/spinlock.h       | 16 ++++++++--------
 arch/blackfin/include/asm/spinlock_types.h |  4 ++--
 arch/cris/include/arch-v32/arch/spinlock.h | 16 ++++++++--------
 arch/ia64/include/asm/spinlock.h           | 16 ++++++++--------
 arch/ia64/include/asm/spinlock_types.h     |  4 ++--
 arch/m32r/include/asm/spinlock.h           | 12 ++++++------
 arch/m32r/include/asm/spinlock_types.h     |  4 ++--
 arch/mips/include/asm/spinlock.h           | 12 ++++++------
 arch/mips/include/asm/spinlock_types.h     |  4 ++--
 arch/parisc/include/asm/spinlock.h         | 16 ++++++++--------
 arch/parisc/include/asm/spinlock_types.h   |  4 ++--
 arch/powerpc/include/asm/spinlock.h        | 18 +++++++++---------
 arch/powerpc/include/asm/spinlock_types.h  |  4 ++--
 arch/powerpc/lib/locks.c                   |  2 +-
 arch/s390/include/asm/spinlock.h           | 30 +++++++++++++++---------------
 arch/s390/include/asm/spinlock_types.h     |  4 ++--
 arch/s390/lib/spinlock.c                   | 12 ++++++------
 arch/sh/include/asm/spinlock.h             | 12 ++++++------
 arch/sh/include/asm/spinlock_types.h       |  4 ++--
 arch/sparc/include/asm/spinlock_32.h       | 20 ++++++++++----------
 arch/sparc/include/asm/spinlock_64.h       | 12 ++++++------
 arch/sparc/include/asm/spinlock_types.h    |  4 ++--
 arch/x86/include/asm/spinlock.h            | 16 ++++++++--------
 arch/x86/include/asm/spinlock_types.h      |  4 ++--
 include/linux/rwlock_types.h               |  6 +++---
 include/linux/spinlock.h                   |  4 ++--
 include/linux/spinlock_types_up.h          |  4 ++--
 lib/spinlock_debug.c                       |  2 +-
 32 files changed, 151 insertions(+), 151 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 4dac79f504c3..e8b2970f037b 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -50,17 +50,17 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 /***********************************************************/
 
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+static inline int __raw_read_can_lock(arch_rwlock_t *lock)
 {
 	return (lock->lock & 1) == 0;
 }
 
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+static inline int __raw_write_can_lock(arch_rwlock_t *lock)
 {
 	return lock->lock == 0;
 }
 
-static inline void __raw_read_lock(raw_rwlock_t *lock)
+static inline void __raw_read_lock(arch_rwlock_t *lock)
 {
 	long regx;
 
@@ -80,7 +80,7 @@ static inline void __raw_read_lock(raw_rwlock_t *lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *lock)
+static inline void __raw_write_lock(arch_rwlock_t *lock)
 {
 	long regx;
 
@@ -100,7 +100,7 @@ static inline void __raw_write_lock(raw_rwlock_t *lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t * lock)
+static inline int __raw_read_trylock(arch_rwlock_t * lock)
 {
 	long regx;
 	int success;
@@ -122,7 +122,7 @@ static inline int __raw_read_trylock(raw_rwlock_t * lock)
 	return success;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t * lock)
+static inline int __raw_write_trylock(arch_rwlock_t * lock)
 {
 	long regx;
 	int success;
@@ -144,7 +144,7 @@ static inline int __raw_write_trylock(raw_rwlock_t * lock)
 	return success;
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t * lock)
+static inline void __raw_read_unlock(arch_rwlock_t * lock)
 {
 	long regx;
 	__asm__ __volatile__(
@@ -160,7 +160,7 @@ static inline void __raw_read_unlock(raw_rwlock_t * lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t * lock)
+static inline void __raw_write_unlock(arch_rwlock_t * lock)
 {
 	mb();
 	lock->lock = 0;
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index 08975ee0a100..54c2afce0a1d 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index de62eb098f68..a8671d8bc7d4 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -86,7 +86,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  * just write zero since the lock is exclusively held.
  */
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -106,7 +106,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	smp_mb();
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -126,7 +126,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	smp_mb();
 
@@ -156,7 +156,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
  * currently active.  However, we know we won't have any write
  * locks.
  */
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2;
 
@@ -176,7 +176,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 	smp_mb();
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2;
 
@@ -198,7 +198,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	: "cc");
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2 = 1;
 
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 9622e126a8de..d14d197ae04a 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 62d49540e02b..7e1c56b0a571 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -52,42 +52,42 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 		cpu_relax();
 }
 
-static inline int __raw_read_can_lock(raw_rwlock_t *rw)
+static inline int __raw_read_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) > 0;
 }
 
-static inline int __raw_write_can_lock(raw_rwlock_t *rw)
+static inline int __raw_write_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) == RW_LOCK_BIAS;
 }
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	__raw_read_lock_asm(&rw->lock);
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	return __raw_read_trylock_asm(&rw->lock);
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	__raw_read_unlock_asm(&rw->lock);
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	__raw_write_lock_asm(&rw->lock);
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	return __raw_write_trylock_asm(&rw->lock);
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	__raw_write_unlock_asm(&rw->lock);
 }
diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h
index c8a3928a58c5..1a33608c958b 100644
--- a/arch/blackfin/include/asm/spinlock_types.h
+++ b/arch/blackfin/include/asm/spinlock_types.h
@@ -21,8 +21,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index a2e8a394d555..1d7d3a8046cb 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -56,17 +56,17 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
  *
  */
 
-static inline int __raw_read_can_lock(raw_rwlock_t *x)
+static inline int __raw_read_can_lock(arch_rwlock_t *x)
 {
 	return (int)(x)->lock > 0;
 }
 
-static inline int __raw_write_can_lock(raw_rwlock_t *x)
+static inline int __raw_write_can_lock(arch_rwlock_t *x)
 {
 	return (x)->lock == RW_LOCK_BIAS;
 }
 
-static  inline void __raw_read_lock(raw_rwlock_t *rw)
+static  inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock == 0);
@@ -74,7 +74,7 @@ static  inline void __raw_read_lock(raw_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_write_lock(raw_rwlock_t *rw)
+static  inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
@@ -82,14 +82,14 @@ static  inline void __raw_write_lock(raw_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_read_unlock(raw_rwlock_t *rw)
+static  inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	rw->lock++;
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_write_unlock(raw_rwlock_t *rw)
+static  inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
@@ -97,7 +97,7 @@ static  inline void __raw_write_unlock(raw_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline int __raw_read_trylock(raw_rwlock_t *rw)
+static  inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
 	arch_spin_lock(&rw->slock);
@@ -109,7 +109,7 @@ static  inline int __raw_read_trylock(raw_rwlock_t *rw)
 	return ret;
 }
 
-static  inline int __raw_write_trylock(raw_rwlock_t *rw)
+static  inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
 	arch_spin_lock(&rw->slock);
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index b06165f6352f..6715b6a8ebc3 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -146,7 +146,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-__raw_read_lock_flags(raw_rwlock_t *lock, unsigned long flags)
+__raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 {
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1,%2\n"
@@ -177,7 +177,7 @@ __raw_read_lock_flags(raw_rwlock_t *lock, unsigned long flags)
 
 #define __raw_read_lock(rw)								\
 do {											\
-	raw_rwlock_t *__read_lock_ptr = (rw);						\
+	arch_rwlock_t *__read_lock_ptr = (rw);						\
 											\
 	while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {		\
 		ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);			\
@@ -190,14 +190,14 @@ do {											\
 
 #define __raw_read_unlock(rw)					\
 do {								\
-	raw_rwlock_t *__read_lock_ptr = (rw);			\
+	arch_rwlock_t *__read_lock_ptr = (rw);			\
 	ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);	\
 } while (0)
 
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-__raw_write_lock_flags(raw_rwlock_t *lock, unsigned long flags)
+__raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 {
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1, %2\n"
@@ -235,7 +235,7 @@ __raw_write_lock_flags(raw_rwlock_t *lock, unsigned long flags)
 	(result == 0);								\
 })
 
-static inline void __raw_write_unlock(raw_rwlock_t *x)
+static inline void __raw_write_unlock(arch_rwlock_t *x)
 {
 	u8 *y = (u8 *)x;
 	barrier();
@@ -265,7 +265,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *x)
 	(ia64_val == 0);						\
 })
 
-static inline void __raw_write_unlock(raw_rwlock_t *x)
+static inline void __raw_write_unlock(arch_rwlock_t *x)
 {
 	barrier();
 	x->write_lock = 0;
@@ -273,10 +273,10 @@ static inline void __raw_write_unlock(raw_rwlock_t *x)
 
 #endif /* !ASM_SUPPORTED */
 
-static inline int __raw_read_trylock(raw_rwlock_t *x)
+static inline int __raw_read_trylock(arch_rwlock_t *x)
 {
 	union {
-		raw_rwlock_t lock;
+		arch_rwlock_t lock;
 		__u32 word;
 	} old, new;
 	old.lock = new.lock = *x;
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 6a11b65fa66d..e2b42a52a6d3 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -14,8 +14,8 @@ typedef struct {
 typedef struct {
 	volatile unsigned int read_counter	: 31;
 	volatile unsigned int write_lock	:  1;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0, 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0, 0 }
 
 #endif
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 8acac950a43c..1c76af8c8e1b 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -148,7 +148,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  */
 #define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1;
 
@@ -199,7 +199,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1, tmp2;
 
@@ -252,7 +252,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1;
 
@@ -274,7 +274,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1, tmp2;
 
@@ -298,7 +298,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 	);
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
+static inline int __raw_read_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t*)lock;
 	if (atomic_dec_return(count) >= 0)
@@ -307,7 +307,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
 	return 0;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
+static inline int __raw_write_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h
index 5873a8701107..92e27672661f 100644
--- a/arch/m32r/include/asm/spinlock_types.h
+++ b/arch/m32r/include/asm/spinlock_types.h
@@ -13,11 +13,11 @@ typedef struct {
 
 typedef struct {
 	volatile int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
 #define RW_LOCK_BIAS			0x01000000
 #define RW_LOCK_BIAS_STR		"0x01000000"
 
-#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif /* _ASM_M32R_SPINLOCK_TYPES_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 95edebaaf22a..7bf27c8a3364 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -256,7 +256,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  */
 #define __raw_write_can_lock(rw)	(!(rw)->lock)
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
@@ -301,7 +301,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 /* Note the use of sub, not subu which will make the kernel die with an
    overflow exception if we ever try to unlock an rwlock that is already
    unlocked or is being held by a writer.  */
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
@@ -335,7 +335,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
@@ -377,7 +377,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	smp_llsc_mb();
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	smp_mb();
 
@@ -389,7 +389,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 	: "memory");
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 	int ret;
@@ -433,7 +433,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
 	return ret;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 	int ret;
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index b4c5efaadb9c..ee197c2f9c98 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -18,8 +18,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 235e7e386e2a..1ff3a0a94a43 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -69,7 +69,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
+static  __inline__ void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -81,7 +81,7 @@ static  __inline__ void __raw_read_lock(raw_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
+static  __inline__ void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -93,7 +93,7 @@ static  __inline__ void __raw_read_unlock(raw_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
+static __inline__ int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
  retry:
@@ -119,7 +119,7 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ void __raw_write_lock(raw_rwlock_t *rw)
+static __inline__ void __raw_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 retry:
@@ -141,7 +141,7 @@ retry:
 	local_irq_restore(flags);
 }
 
-static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
+static __inline__ void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	rw->counter = 0;
 	arch_spin_unlock(&rw->lock);
@@ -149,7 +149,7 @@ static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
+static __inline__ int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	int result = 0;
@@ -173,7 +173,7 @@ static __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
+static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw)
 {
 	return rw->counter >= 0;
 }
@@ -182,7 +182,7 @@ static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
+static __inline__ int __raw_write_can_lock(arch_rwlock_t *rw)
 {
 	return !rw->counter;
 }
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 396d2746ca57..8c373aa28a86 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -14,8 +14,8 @@ typedef struct {
 typedef struct {
 	arch_spinlock_t lock;
 	volatile int counter;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ __ARCH_SPIN_LOCK_UNLOCKED, 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ __ARCH_SPIN_LOCK_UNLOCKED, 0 }
 
 #endif
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index cdcaf6b97087..2fad2c07c593 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -97,7 +97,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (get_lppaca()->shared_proc)
 extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(raw_rwlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
 #else /* SPLPAR || ISERIES */
 #define __spin_yield(x)	barrier()
 #define __rw_yield(x)	barrier()
@@ -181,7 +181,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
  * This returns the old value in the lock + 1,
  * so we got a read lock if the return value is > 0.
  */
-static inline long arch_read_trylock(raw_rwlock_t *rw)
+static inline long arch_read_trylock(arch_rwlock_t *rw)
 {
 	long tmp;
 
@@ -205,7 +205,7 @@ static inline long arch_read_trylock(raw_rwlock_t *rw)
  * This returns the old value in the lock,
  * so we got the write lock if the return value is 0.
  */
-static inline long arch_write_trylock(raw_rwlock_t *rw)
+static inline long arch_write_trylock(arch_rwlock_t *rw)
 {
 	long tmp, token;
 
@@ -225,7 +225,7 @@ static inline long arch_write_trylock(raw_rwlock_t *rw)
 	return tmp;
 }
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	while (1) {
 		if (likely(arch_read_trylock(rw) > 0))
@@ -239,7 +239,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	while (1) {
 		if (likely(arch_write_trylock(rw) == 0))
@@ -253,17 +253,17 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	}
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	return arch_read_trylock(rw) > 0;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	return arch_write_trylock(rw) == 0;
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	long tmp;
 
@@ -280,7 +280,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	: "cr0", "xer", "memory");
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	__asm__ __volatile__("# write_unlock\n\t"
 				LWSYNC_ON_SMP: : :"memory");
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index f5f39d82711f..2351adc4fdc4 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct {
 
 typedef struct {
 	volatile signed int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index ee395e392115..58e14fba11b1 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -55,7 +55,7 @@ void __spin_yield(arch_spinlock_t *lock)
  * This turns out to be the same for read and write locks, since
  * we only know the holder if it is write-locked.
  */
-void __rw_yield(raw_rwlock_t *rw)
+void __rw_yield(arch_rwlock_t *rw)
 {
 	int lock_value;
 	unsigned int holder_cpu, yield_count;
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index a94c146657a9..7f98f0e48acb 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -121,14 +121,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
  */
 #define __raw_write_can_lock(x) ((x)->lock == 0)
 
-extern void _raw_read_lock_wait(raw_rwlock_t *lp);
-extern void _raw_read_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags);
-extern int _raw_read_trylock_retry(raw_rwlock_t *lp);
-extern void _raw_write_lock_wait(raw_rwlock_t *lp);
-extern void _raw_write_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags);
-extern int _raw_write_trylock_retry(raw_rwlock_t *lp);
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
+extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
+extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
+
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -136,7 +136,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		_raw_read_lock_wait(rw);
 }
 
-static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags)
+static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -144,7 +144,7 @@ static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags)
 		_raw_read_lock_wait_flags(rw, flags);
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int old, cmp;
 
@@ -155,24 +155,24 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	} while (cmp != old);
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
 		_raw_write_lock_wait(rw);
 }
 
-static inline void __raw_write_lock_flags(raw_rwlock_t *rw, unsigned long flags)
+static inline void __raw_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
 		_raw_write_lock_wait_flags(rw, flags);
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	_raw_compare_and_swap(&rw->lock, 0x80000000, 0);
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -181,7 +181,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
 	return _raw_read_trylock_retry(rw);
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0))
 		return 1;
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index e25c0370f6cd..9c76656a0af0 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index f4596452f072..09fee9a1aa15 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -105,7 +105,7 @@ void arch_spin_relax(arch_spinlock_t *lock)
 }
 EXPORT_SYMBOL(arch_spin_relax);
 
-void _raw_read_lock_wait(raw_rwlock_t *rw)
+void _raw_read_lock_wait(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	int count = spin_retry;
@@ -124,7 +124,7 @@ void _raw_read_lock_wait(raw_rwlock_t *rw)
 }
 EXPORT_SYMBOL(_raw_read_lock_wait);
 
-void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
+void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	unsigned int old;
 	int count = spin_retry;
@@ -145,7 +145,7 @@ void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
 }
 EXPORT_SYMBOL(_raw_read_lock_wait_flags);
 
-int _raw_read_trylock_retry(raw_rwlock_t *rw)
+int _raw_read_trylock_retry(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	int count = spin_retry;
@@ -161,7 +161,7 @@ int _raw_read_trylock_retry(raw_rwlock_t *rw)
 }
 EXPORT_SYMBOL(_raw_read_trylock_retry);
 
-void _raw_write_lock_wait(raw_rwlock_t *rw)
+void _raw_write_lock_wait(arch_rwlock_t *rw)
 {
 	int count = spin_retry;
 
@@ -178,7 +178,7 @@ void _raw_write_lock_wait(raw_rwlock_t *rw)
 }
 EXPORT_SYMBOL(_raw_write_lock_wait);
 
-void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
+void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	int count = spin_retry;
 
@@ -197,7 +197,7 @@ void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
 }
 EXPORT_SYMBOL(_raw_write_lock_wait_flags);
 
-int _raw_write_trylock_retry(raw_rwlock_t *rw)
+int _raw_write_trylock_retry(arch_rwlock_t *rw)
 {
 	int count = spin_retry;
 
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index da1c6491ed4b..7f3626aac869 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -108,7 +108,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  */
 #define __raw_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -126,7 +126,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -142,7 +142,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -160,7 +160,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	__asm__ __volatile__ (
 		"mov.l		%1, @%0 ! __raw_write_unlock	\n\t"
@@ -170,7 +170,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 	);
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, oldval;
 
@@ -193,7 +193,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
 	return (oldval > 0);
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, oldval;
 
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index a3be2db960ed..9b7560db06ca 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -13,9 +13,9 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
 #define RW_LOCK_BIAS			0x01000000
-#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 9b0f2f53c81c..06d37e588fde 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -65,7 +65,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  * Sort of like atomic_t's on Sparc, but even more clever.
  *
  *	------------------------------------
- *	| 24-bit counter           | wlock |  raw_rwlock_t
+ *	| 24-bit counter           | wlock |  arch_rwlock_t
  *	------------------------------------
  *	 31                       8 7     0
  *
@@ -76,9 +76,9 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  *
  * Unfortunately this scheme limits us to ~16,000,000 cpus.
  */
-static inline void arch_read_lock(raw_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
-	register raw_rwlock_t *lp asm("g1");
+	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
 	__asm__ __volatile__(
 	"mov	%%o7, %%g4\n\t"
@@ -96,9 +96,9 @@ do {	unsigned long flags; \
 	local_irq_restore(flags); \
 } while(0)
 
-static inline void arch_read_unlock(raw_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
-	register raw_rwlock_t *lp asm("g1");
+	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
 	__asm__ __volatile__(
 	"mov	%%o7, %%g4\n\t"
@@ -116,9 +116,9 @@ do {	unsigned long flags; \
 	local_irq_restore(flags); \
 } while(0)
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
-	register raw_rwlock_t *lp asm("g1");
+	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
 	__asm__ __volatile__(
 	"mov	%%o7, %%g4\n\t"
@@ -130,7 +130,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 	*(volatile __u32 *)&lp->lock = ~0U;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *rw)
+static inline int __raw_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned int val;
 
@@ -150,9 +150,9 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 	return (val == 0);
 }
 
-static inline int arch_read_trylock(raw_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	register raw_rwlock_t *lp asm("g1");
+	register arch_rwlock_t *lp asm("g1");
 	register int res asm("o0");
 	lp = rw;
 	__asm__ __volatile__(
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 7cf58a2fcda4..2b22d7f2c2fb 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -92,7 +92,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
-static void inline arch_read_lock(raw_rwlock_t *lock)
+static void inline arch_read_lock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -115,7 +115,7 @@ static void inline arch_read_lock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static int inline arch_read_trylock(raw_rwlock_t *lock)
+static int inline arch_read_trylock(arch_rwlock_t *lock)
 {
 	int tmp1, tmp2;
 
@@ -136,7 +136,7 @@ static int inline arch_read_trylock(raw_rwlock_t *lock)
 	return tmp1;
 }
 
-static void inline arch_read_unlock(raw_rwlock_t *lock)
+static void inline arch_read_unlock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -152,7 +152,7 @@ static void inline arch_read_unlock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static void inline arch_write_lock(raw_rwlock_t *lock)
+static void inline arch_write_lock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2;
 
@@ -177,7 +177,7 @@ static void inline arch_write_lock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static void inline arch_write_unlock(raw_rwlock_t *lock)
+static void inline arch_write_unlock(arch_rwlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stw		%%g0, [%0]"
@@ -186,7 +186,7 @@ static void inline arch_write_unlock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static int inline arch_write_trylock(raw_rwlock_t *lock)
+static int inline arch_write_trylock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2, result;
 
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
index c145e63a5d66..9c454fdeaad8 100644
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct {
 
 typedef struct {
 	volatile unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
 
 #endif
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index ab9055fd57d9..99cb86e843a0 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -232,7 +232,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+static inline int __raw_read_can_lock(arch_rwlock_t *lock)
 {
 	return (int)(lock)->lock > 0;
 }
@@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(raw_rwlock_t *lock)
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+static inline int __raw_write_can_lock(arch_rwlock_t *lock)
 {
 	return (lock)->lock == RW_LOCK_BIAS;
 }
 
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void __raw_read_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
 		     "jns 1f\n"
@@ -255,7 +255,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		     ::LOCK_PTR_REG (rw) : "memory");
 }
 
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void __raw_write_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
 		     "jz 1f\n"
@@ -264,7 +264,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
+static inline int __raw_read_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -274,7 +274,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
 	return 0;
 }
 
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
+static inline int __raw_write_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -284,12 +284,12 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
 	return 0;
 }
 
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void __raw_read_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
 }
 
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void __raw_write_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "addl %1, %0"
 		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 696f8364a4f3..dcb48b2edc11 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -13,8 +13,8 @@ typedef struct arch_spinlock {
 
 typedef struct {
 	unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index f8c935206a41..bd31808c7d8e 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -9,7 +9,7 @@
  * Released under the General Public License (GPL).
  */
 typedef struct {
-	raw_rwlock_t raw_lock;
+	arch_rwlock_t raw_lock;
 #ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
@@ -32,14 +32,14 @@ typedef struct {
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define __RW_LOCK_UNLOCKED(lockname)					\
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+	(rwlock_t)	{	.raw_lock = __ARCH_RW_LOCK_UNLOCKED,	\
 				.magic = RWLOCK_MAGIC,			\
 				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1,			\
 				RW_DEP_MAP_INIT(lockname) }
 #else
 #define __RW_LOCK_UNLOCKED(lockname) \
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+	(rwlock_t)	{	.raw_lock = __ARCH_RW_LOCK_UNLOCKED,	\
 				RW_DEP_MAP_INIT(lockname) }
 #endif
 
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index de3a022489c6..53bc2213b414 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -8,7 +8,7 @@
  *
  * on SMP builds:
  *
- *  asm/spinlock_types.h: contains the arch_spinlock_t/raw_rwlock_t and the
+ *  asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the
  *                        initializers
  *
  *  linux/spinlock_types.h:
@@ -75,7 +75,7 @@
 #define __lockfunc __attribute__((section(".spinlock.text")))
 
 /*
- * Pull the arch_spinlock_t and raw_rwlock_t definitions:
+ * Pull the arch_spinlock_t and arch_rwlock_t definitions:
  */
 #include <linux/spinlock_types.h>
 
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 10db021f4875..c09b6407ae1b 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -30,8 +30,8 @@ typedef struct { } arch_spinlock_t;
 
 typedef struct {
 	/* no debug version on UP */
-} raw_rwlock_t;
+} arch_rwlock_t;
 
-#define __RAW_RW_LOCK_UNLOCKED { }
+#define __ARCH_RW_LOCK_UNLOCKED { }
 
 #endif /* __LINUX_SPINLOCK_TYPES_UP_H */
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 1304fe094546..3f72f10d9cb0 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -41,7 +41,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
-	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
+	lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
 	lock->magic = RWLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
-- 
cgit v1.2.3


From e5931943d02bf751b1ec849c0d2ade23d76a8d41 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 20:08:46 +0100
Subject: locking: Convert raw_rwlock functions to arch_rwlock

Name space cleanup for rwlock functions. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/spinlock.h          | 20 +++++++-------
 arch/arm/include/asm/spinlock.h            | 20 +++++++-------
 arch/blackfin/include/asm/spinlock.h       | 40 ++++++++++++++--------------
 arch/cris/include/arch-v32/arch/spinlock.h | 16 ++++++------
 arch/ia64/include/asm/spinlock.h           | 32 +++++++++++------------
 arch/m32r/include/asm/spinlock.h           | 20 +++++++-------
 arch/mips/include/asm/spinlock.h           | 42 +++++++++++++++---------------
 arch/parisc/include/asm/spinlock.h         | 20 +++++++-------
 arch/powerpc/include/asm/spinlock.h        | 32 +++++++++++------------
 arch/s390/include/asm/spinlock.h           | 20 +++++++-------
 arch/s390/lib/spinlock.c                   | 12 ++++-----
 arch/sh/include/asm/spinlock.h             | 32 +++++++++++------------
 arch/sparc/include/asm/spinlock_32.h       | 32 +++++++++++------------
 arch/sparc/include/asm/spinlock_64.h       | 22 ++++++++--------
 arch/x86/include/asm/spinlock.h            | 20 +++++++-------
 include/linux/rwlock.h                     | 20 +++++++-------
 include/linux/spinlock_up.h                | 16 ++++++------
 lib/spinlock_debug.c                       | 16 ++++++------
 18 files changed, 216 insertions(+), 216 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index e8b2970f037b..d0faca1e992d 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -50,17 +50,17 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 /***********************************************************/
 
-static inline int __raw_read_can_lock(arch_rwlock_t *lock)
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
 {
 	return (lock->lock & 1) == 0;
 }
 
-static inline int __raw_write_can_lock(arch_rwlock_t *lock)
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
 {
 	return lock->lock == 0;
 }
 
-static inline void __raw_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
 {
 	long regx;
 
@@ -80,7 +80,7 @@ static inline void __raw_read_lock(arch_rwlock_t *lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
 {
 	long regx;
 
@@ -100,7 +100,7 @@ static inline void __raw_write_lock(arch_rwlock_t *lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t * lock)
+static inline int arch_read_trylock(arch_rwlock_t * lock)
 {
 	long regx;
 	int success;
@@ -122,7 +122,7 @@ static inline int __raw_read_trylock(arch_rwlock_t * lock)
 	return success;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t * lock)
+static inline int arch_write_trylock(arch_rwlock_t * lock)
 {
 	long regx;
 	int success;
@@ -144,7 +144,7 @@ static inline int __raw_write_trylock(arch_rwlock_t * lock)
 	return success;
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t * lock)
+static inline void arch_read_unlock(arch_rwlock_t * lock)
 {
 	long regx;
 	__asm__ __volatile__(
@@ -160,14 +160,14 @@ static inline void __raw_read_unlock(arch_rwlock_t * lock)
 	: "m" (*lock) : "memory");
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t * lock)
+static inline void arch_write_unlock(arch_rwlock_t * lock)
 {
 	mb();
 	lock->lock = 0;
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index a8671d8bc7d4..c91c64cab922 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -86,7 +86,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  * just write zero since the lock is exclusively held.
  */
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -106,7 +106,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	smp_mb();
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
@@ -126,7 +126,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	smp_mb();
 
@@ -142,7 +142,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw)
 }
 
 /* write_can_lock - would write_trylock() succeed? */
-#define __raw_write_can_lock(x)		((x)->lock == 0)
+#define arch_write_can_lock(x)		((x)->lock == 0)
 
 /*
  * Read locks are a bit more hairy:
@@ -156,7 +156,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw)
  * currently active.  However, we know we won't have any write
  * locks.
  */
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2;
 
@@ -176,7 +176,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 	smp_mb();
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2;
 
@@ -198,7 +198,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	: "cc");
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, tmp2 = 1;
 
@@ -215,10 +215,10 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw)
 }
 
 /* read_can_lock - would read_trylock() succeed? */
-#define __raw_read_can_lock(x)		((x)->lock < 0x80000000)
+#define arch_read_can_lock(x)		((x)->lock < 0x80000000)
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 7e1c56b0a571..1942ccfedbe0 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -17,12 +17,12 @@ asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
 asmlinkage int __raw_spin_trylock_asm(volatile int *ptr);
 asmlinkage void __raw_spin_unlock_asm(volatile int *ptr);
-asmlinkage void __raw_read_lock_asm(volatile int *ptr);
-asmlinkage int __raw_read_trylock_asm(volatile int *ptr);
-asmlinkage void __raw_read_unlock_asm(volatile int *ptr);
-asmlinkage void __raw_write_lock_asm(volatile int *ptr);
-asmlinkage int __raw_write_trylock_asm(volatile int *ptr);
-asmlinkage void __raw_write_unlock_asm(volatile int *ptr);
+asmlinkage void arch_read_lock_asm(volatile int *ptr);
+asmlinkage int arch_read_trylock_asm(volatile int *ptr);
+asmlinkage void arch_read_unlock_asm(volatile int *ptr);
+asmlinkage void arch_write_lock_asm(volatile int *ptr);
+asmlinkage int arch_write_trylock_asm(volatile int *ptr);
+asmlinkage void arch_write_unlock_asm(volatile int *ptr);
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
@@ -52,44 +52,44 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 		cpu_relax();
 }
 
-static inline int __raw_read_can_lock(arch_rwlock_t *rw)
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) > 0;
 }
 
-static inline int __raw_write_can_lock(arch_rwlock_t *rw)
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) == RW_LOCK_BIAS;
 }
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
-	__raw_read_lock_asm(&rw->lock);
+	arch_read_lock_asm(&rw->lock);
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	return __raw_read_trylock_asm(&rw->lock);
+	return arch_read_trylock_asm(&rw->lock);
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
-	__raw_read_unlock_asm(&rw->lock);
+	arch_read_unlock_asm(&rw->lock);
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	__raw_write_lock_asm(&rw->lock);
+	arch_write_lock_asm(&rw->lock);
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	return __raw_write_trylock_asm(&rw->lock);
+	return arch_write_trylock_asm(&rw->lock);
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
-	__raw_write_unlock_asm(&rw->lock);
+	arch_write_unlock_asm(&rw->lock);
 }
 
 #define arch_spin_relax(lock)  	cpu_relax()
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index 1d7d3a8046cb..f171a6600fbc 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -56,17 +56,17 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
  *
  */
 
-static inline int __raw_read_can_lock(arch_rwlock_t *x)
+static inline int arch_read_can_lock(arch_rwlock_t *x)
 {
 	return (int)(x)->lock > 0;
 }
 
-static inline int __raw_write_can_lock(arch_rwlock_t *x)
+static inline int arch_write_can_lock(arch_rwlock_t *x)
 {
 	return (x)->lock == RW_LOCK_BIAS;
 }
 
-static  inline void __raw_read_lock(arch_rwlock_t *rw)
+static  inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock == 0);
@@ -74,7 +74,7 @@ static  inline void __raw_read_lock(arch_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_write_lock(arch_rwlock_t *rw)
+static  inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
@@ -82,14 +82,14 @@ static  inline void __raw_write_lock(arch_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_read_unlock(arch_rwlock_t *rw)
+static  inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	rw->lock++;
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline void __raw_write_unlock(arch_rwlock_t *rw)
+static  inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	arch_spin_lock(&rw->slock);
 	while (rw->lock != RW_LOCK_BIAS);
@@ -97,7 +97,7 @@ static  inline void __raw_write_unlock(arch_rwlock_t *rw)
 	arch_spin_unlock(&rw->slock);
 }
 
-static  inline int __raw_read_trylock(arch_rwlock_t *rw)
+static  inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
 	arch_spin_lock(&rw->slock);
@@ -109,7 +109,7 @@ static  inline int __raw_read_trylock(arch_rwlock_t *rw)
 	return ret;
 }
 
-static  inline int __raw_write_trylock(arch_rwlock_t *rw)
+static  inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
 	arch_spin_lock(&rw->slock);
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 6715b6a8ebc3..1a91c9121d17 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -140,13 +140,13 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	__ticket_spin_unlock_wait(lock);
 }
 
-#define __raw_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
-#define __raw_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
+#define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
+#define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
 
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-__raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 {
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1,%2\n"
@@ -169,13 +169,13 @@ __raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "p6", "p7", "r2", "memory");
 }
 
-#define __raw_read_lock(lock) __raw_read_lock_flags(lock, 0)
+#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
 
 #else /* !ASM_SUPPORTED */
 
-#define __raw_read_lock_flags(rw, flags) __raw_read_lock(rw)
+#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
 
-#define __raw_read_lock(rw)								\
+#define arch_read_lock(rw)								\
 do {											\
 	arch_rwlock_t *__read_lock_ptr = (rw);						\
 											\
@@ -188,7 +188,7 @@ do {											\
 
 #endif /* !ASM_SUPPORTED */
 
-#define __raw_read_unlock(rw)					\
+#define arch_read_unlock(rw)					\
 do {								\
 	arch_rwlock_t *__read_lock_ptr = (rw);			\
 	ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);	\
@@ -197,7 +197,7 @@ do {								\
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-__raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 {
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1, %2\n"
@@ -221,9 +221,9 @@ __raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
 }
 
-#define __raw_write_lock(rw) __raw_write_lock_flags(rw, 0)
+#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
 
-#define __raw_write_trylock(rw)							\
+#define arch_write_trylock(rw)							\
 ({										\
 	register long result;							\
 										\
@@ -235,7 +235,7 @@ __raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 	(result == 0);								\
 })
 
-static inline void __raw_write_unlock(arch_rwlock_t *x)
+static inline void arch_write_unlock(arch_rwlock_t *x)
 {
 	u8 *y = (u8 *)x;
 	barrier();
@@ -244,9 +244,9 @@ static inline void __raw_write_unlock(arch_rwlock_t *x)
 
 #else /* !ASM_SUPPORTED */
 
-#define __raw_write_lock_flags(l, flags) __raw_write_lock(l)
+#define arch_write_lock_flags(l, flags) arch_write_lock(l)
 
-#define __raw_write_lock(l)								\
+#define arch_write_lock(l)								\
 ({											\
 	__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);			\
 	__u32 *ia64_write_lock_ptr = (__u32 *) (l);					\
@@ -257,7 +257,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x)
 	} while (ia64_val);								\
 })
 
-#define __raw_write_trylock(rw)						\
+#define arch_write_trylock(rw)						\
 ({									\
 	__u64 ia64_val;							\
 	__u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1);			\
@@ -265,7 +265,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x)
 	(ia64_val == 0);						\
 })
 
-static inline void __raw_write_unlock(arch_rwlock_t *x)
+static inline void arch_write_unlock(arch_rwlock_t *x)
 {
 	barrier();
 	x->write_lock = 0;
@@ -273,7 +273,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x)
 
 #endif /* !ASM_SUPPORTED */
 
-static inline int __raw_read_trylock(arch_rwlock_t *x)
+static inline int arch_read_trylock(arch_rwlock_t *x)
 {
 	union {
 		arch_rwlock_t lock;
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 1c76af8c8e1b..179a06489b10 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -140,15 +140,15 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_read_can_lock(x) ((int)(x)->lock > 0)
+#define arch_read_can_lock(x) ((int)(x)->lock > 0)
 
 /**
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1;
 
@@ -199,7 +199,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1, tmp2;
 
@@ -252,7 +252,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1;
 
@@ -274,7 +274,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp0, tmp1, tmp2;
 
@@ -298,7 +298,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw)
 	);
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t*)lock;
 	if (atomic_dec_return(count) >= 0)
@@ -307,7 +307,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *lock)
 	return 0;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
@@ -316,8 +316,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *lock)
 	return 0;
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 7bf27c8a3364..21ef9efbde43 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -248,21 +248,21 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_read_can_lock(rw)	((rw)->lock >= 0)
+#define arch_read_can_lock(rw)	((rw)->lock >= 0)
 
 /*
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_write_can_lock(rw)	(!(rw)->lock)
+#define arch_write_can_lock(rw)	(!(rw)->lock)
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_read_lock	\n"
+		"	.set	noreorder	# arch_read_lock	\n"
 		"1:	ll	%1, %2					\n"
 		"	bltz	%1, 1b					\n"
 		"	 addu	%1, 1					\n"
@@ -275,7 +275,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_read_lock	\n"
+		"	.set	noreorder	# arch_read_lock	\n"
 		"1:	ll	%1, %2					\n"
 		"	bltz	%1, 2f					\n"
 		"	 addu	%1, 1					\n"
@@ -301,7 +301,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 /* Note the use of sub, not subu which will make the kernel die with an
    overflow exception if we ever try to unlock an rwlock that is already
    unlocked or is being held by a writer.  */
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
@@ -309,7 +309,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"1:	ll	%1, %2		# __raw_read_unlock	\n"
+		"1:	ll	%1, %2		# arch_read_unlock	\n"
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
@@ -318,7 +318,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_read_unlock	\n"
+		"	.set	noreorder	# arch_read_unlock	\n"
 		"1:	ll	%1, %2					\n"
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
@@ -335,13 +335,13 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_write_lock	\n"
+		"	.set	noreorder	# arch_write_lock	\n"
 		"1:	ll	%1, %2					\n"
 		"	bnez	%1, 1b					\n"
 		"	 lui	%1, 0x8000				\n"
@@ -354,7 +354,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_write_lock	\n"
+		"	.set	noreorder	# arch_write_lock	\n"
 		"1:	ll	%1, %2					\n"
 		"	bnez	%1, 2f					\n"
 		"	 lui	%1, 0x8000				\n"
@@ -377,26 +377,26 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	smp_llsc_mb();
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	smp_mb();
 
 	__asm__ __volatile__(
-	"				# __raw_write_unlock	\n"
+	"				# arch_write_unlock	\n"
 	"	sw	$0, %0					\n"
 	: "=m" (rw->lock)
 	: "m" (rw->lock)
 	: "memory");
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 	int ret;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_read_trylock	\n"
+		"	.set	noreorder	# arch_read_trylock	\n"
 		"	li	%2, 0					\n"
 		"1:	ll	%1, %3					\n"
 		"	bltz	%1, 2f					\n"
@@ -413,7 +413,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_read_trylock	\n"
+		"	.set	noreorder	# arch_read_trylock	\n"
 		"	li	%2, 0					\n"
 		"1:	ll	%1, %3					\n"
 		"	bltz	%1, 2f					\n"
@@ -433,14 +433,14 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw)
 	return ret;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 	int ret;
 
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_write_trylock	\n"
+		"	.set	noreorder	# arch_write_trylock	\n"
 		"	li	%2, 0					\n"
 		"1:	ll	%1, %3					\n"
 		"	bnez	%1, 2f					\n"
@@ -457,7 +457,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
-		"	.set	noreorder	# __raw_write_trylock	\n"
+		"	.set	noreorder	# arch_write_trylock	\n"
 		"	li	%2, 0					\n"
 		"1:	ll	%1, %3					\n"
 		"	bnez	%1, 2f					\n"
@@ -480,8 +480,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw)
 	return ret;
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 1ff3a0a94a43..74036f436a3b 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -69,7 +69,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_lock(arch_rwlock_t *rw)
+static  __inline__ void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -81,7 +81,7 @@ static  __inline__ void __raw_read_lock(arch_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static  __inline__ void __raw_read_unlock(arch_rwlock_t *rw)
+static  __inline__ void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -93,7 +93,7 @@ static  __inline__ void __raw_read_unlock(arch_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to grab the same read lock */
-static __inline__ int __raw_read_trylock(arch_rwlock_t *rw)
+static __inline__ int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
  retry:
@@ -119,7 +119,7 @@ static __inline__ int __raw_read_trylock(arch_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ void __raw_write_lock(arch_rwlock_t *rw)
+static __inline__ void arch_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 retry:
@@ -141,7 +141,7 @@ retry:
 	local_irq_restore(flags);
 }
 
-static __inline__ void __raw_write_unlock(arch_rwlock_t *rw)
+static __inline__ void arch_write_unlock(arch_rwlock_t *rw)
 {
 	rw->counter = 0;
 	arch_spin_unlock(&rw->lock);
@@ -149,7 +149,7 @@ static __inline__ void __raw_write_unlock(arch_rwlock_t *rw)
 
 /* Note that we have to ensure interrupts are disabled in case we're
  * interrupted by some other code that wants to read_trylock() this lock */
-static __inline__ int __raw_write_trylock(arch_rwlock_t *rw)
+static __inline__ int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long flags;
 	int result = 0;
@@ -173,7 +173,7 @@ static __inline__ int __raw_write_trylock(arch_rwlock_t *rw)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw)
+static __inline__ int arch_read_can_lock(arch_rwlock_t *rw)
 {
 	return rw->counter >= 0;
 }
@@ -182,13 +182,13 @@ static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw)
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static __inline__ int __raw_write_can_lock(arch_rwlock_t *rw)
+static __inline__ int arch_write_can_lock(arch_rwlock_t *rw)
 {
 	return !rw->counter;
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 2fad2c07c593..764094cff681 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -166,8 +166,8 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
  * read-locks.
  */
 
-#define __raw_read_can_lock(rw)		((rw)->lock >= 0)
-#define __raw_write_can_lock(rw)	(!(rw)->lock)
+#define arch_read_can_lock(rw)		((rw)->lock >= 0)
+#define arch_write_can_lock(rw)	(!(rw)->lock)
 
 #ifdef CONFIG_PPC64
 #define __DO_SIGN_EXTEND	"extsw	%0,%0\n"
@@ -181,7 +181,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
  * This returns the old value in the lock + 1,
  * so we got a read lock if the return value is > 0.
  */
-static inline long arch_read_trylock(arch_rwlock_t *rw)
+static inline long __arch_read_trylock(arch_rwlock_t *rw)
 {
 	long tmp;
 
@@ -205,7 +205,7 @@ static inline long arch_read_trylock(arch_rwlock_t *rw)
  * This returns the old value in the lock,
  * so we got the write lock if the return value is 0.
  */
-static inline long arch_write_trylock(arch_rwlock_t *rw)
+static inline long __arch_write_trylock(arch_rwlock_t *rw)
 {
 	long tmp, token;
 
@@ -225,10 +225,10 @@ static inline long arch_write_trylock(arch_rwlock_t *rw)
 	return tmp;
 }
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	while (1) {
-		if (likely(arch_read_trylock(rw) > 0))
+		if (likely(__arch_read_trylock(rw) > 0))
 			break;
 		do {
 			HMT_low();
@@ -239,10 +239,10 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 	}
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	while (1) {
-		if (likely(arch_write_trylock(rw) == 0))
+		if (likely(__arch_write_trylock(rw) == 0))
 			break;
 		do {
 			HMT_low();
@@ -253,17 +253,17 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	}
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	return arch_read_trylock(rw) > 0;
+	return __arch_read_trylock(rw) > 0;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	return arch_write_trylock(rw) == 0;
+	return __arch_write_trylock(rw) == 0;
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	long tmp;
 
@@ -280,15 +280,15 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	: "cr0", "xer", "memory");
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	__asm__ __volatile__("# write_unlock\n\t"
 				LWSYNC_ON_SMP: : :"memory");
 	rw->lock = 0;
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	__spin_yield(lock)
 #define arch_read_relax(lock)	__rw_yield(lock)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 7f98f0e48acb..a587907d77f3 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -113,13 +113,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_read_can_lock(x) ((int)(x)->lock >= 0)
+#define arch_read_can_lock(x) ((int)(x)->lock >= 0)
 
 /**
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_write_can_lock(x) ((x)->lock == 0)
+#define arch_write_can_lock(x) ((x)->lock == 0)
 
 extern void _raw_read_lock_wait(arch_rwlock_t *lp);
 extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
@@ -128,7 +128,7 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp);
 extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
 extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -136,7 +136,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 		_raw_read_lock_wait(rw);
 }
 
-static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
+static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -144,7 +144,7 @@ static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 		_raw_read_lock_wait_flags(rw, flags);
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int old, cmp;
 
@@ -155,24 +155,24 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	} while (cmp != old);
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
 		_raw_write_lock_wait(rw);
 }
 
-static inline void __raw_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
+static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
 		_raw_write_lock_wait_flags(rw, flags);
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	_raw_compare_and_swap(&rw->lock, 0x80000000, 0);
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
@@ -181,7 +181,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw)
 	return _raw_read_trylock_retry(rw);
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0))
 		return 1;
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 09fee9a1aa15..10754a375668 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -115,7 +115,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
 			_raw_yield();
 			count = spin_retry;
 		}
-		if (!__raw_read_can_lock(rw))
+		if (!arch_read_can_lock(rw))
 			continue;
 		old = rw->lock & 0x7fffffffU;
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
@@ -135,7 +135,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 			_raw_yield();
 			count = spin_retry;
 		}
-		if (!__raw_read_can_lock(rw))
+		if (!arch_read_can_lock(rw))
 			continue;
 		old = rw->lock & 0x7fffffffU;
 		local_irq_disable();
@@ -151,7 +151,7 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw)
 	int count = spin_retry;
 
 	while (count-- > 0) {
-		if (!__raw_read_can_lock(rw))
+		if (!arch_read_can_lock(rw))
 			continue;
 		old = rw->lock & 0x7fffffffU;
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
@@ -170,7 +170,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
 			_raw_yield();
 			count = spin_retry;
 		}
-		if (!__raw_write_can_lock(rw))
+		if (!arch_write_can_lock(rw))
 			continue;
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
 			return;
@@ -188,7 +188,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 			_raw_yield();
 			count = spin_retry;
 		}
-		if (!__raw_write_can_lock(rw))
+		if (!arch_write_can_lock(rw))
 			continue;
 		local_irq_disable();
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
@@ -202,7 +202,7 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw)
 	int count = spin_retry;
 
 	while (count-- > 0) {
-		if (!__raw_write_can_lock(rw))
+		if (!arch_write_can_lock(rw))
 			continue;
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
 			return 1;
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 7f3626aac869..bdc0f3b6c56a 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -100,21 +100,21 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_read_can_lock(x)	((x)->lock > 0)
+#define arch_read_can_lock(x)	((x)->lock > 0)
 
 /**
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-#define __raw_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%1, %0	! __raw_read_lock	\n\t"
+		"movli.l	@%1, %0	! arch_read_lock	\n\t"
 		"cmp/pl		%0				\n\t"
 		"bf		1b				\n\t"
 		"add		#-1, %0				\n\t"
@@ -126,13 +126,13 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%1, %0	! __raw_read_unlock	\n\t"
+		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
 		"add		#1, %0				\n\t"
 		"movco.l	%0, @%1				\n\t"
 		"bf		1b				\n\t"
@@ -142,13 +142,13 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	unsigned long tmp;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%1, %0	! __raw_write_lock	\n\t"
+		"movli.l	@%1, %0	! arch_write_lock	\n\t"
 		"cmp/hs		%2, %0				\n\t"
 		"bf		1b				\n\t"
 		"sub		%2, %0				\n\t"
@@ -160,23 +160,23 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	);
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	__asm__ __volatile__ (
-		"mov.l		%1, @%0 ! __raw_write_unlock	\n\t"
+		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
 		:
 		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
 		: "t", "memory"
 	);
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *rw)
+static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, oldval;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%2, %0	! __raw_read_trylock	\n\t"
+		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
 		"mov		%0, %1				\n\t"
 		"cmp/pl		%0				\n\t"
 		"bf		2f				\n\t"
@@ -193,13 +193,13 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw)
 	return (oldval > 0);
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned long tmp, oldval;
 
 	__asm__ __volatile__ (
 		"1:						\n\t"
-		"movli.l	@%2, %0	! __raw_write_trylock	\n\t"
+		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
 		"mov		%0, %1				\n\t"
 		"cmp/hs		%3, %0				\n\t"
 		"bf		2f				\n\t"
@@ -216,8 +216,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw)
 	return (oldval > (RW_LOCK_BIAS - 1));
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 06d37e588fde..7f9b9dba38a6 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -76,7 +76,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
  *
  * Unfortunately this scheme limits us to ~16,000,000 cpus.
  */
-static inline void arch_read_lock(arch_rwlock_t *rw)
+static inline void __arch_read_lock(arch_rwlock_t *rw)
 {
 	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -89,14 +89,14 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	: "g2", "g4", "memory", "cc");
 }
 
-#define __raw_read_lock(lock) \
+#define arch_read_lock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	arch_read_lock(lock); \
+	__arch_read_lock(lock); \
 	local_irq_restore(flags); \
 } while(0)
 
-static inline void arch_read_unlock(arch_rwlock_t *rw)
+static inline void __arch_read_unlock(arch_rwlock_t *rw)
 {
 	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -109,14 +109,14 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 	: "g2", "g4", "memory", "cc");
 }
 
-#define __raw_read_unlock(lock) \
+#define arch_read_unlock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	arch_read_unlock(lock); \
+	__arch_read_unlock(lock); \
 	local_irq_restore(flags); \
 } while(0)
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	register arch_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -130,7 +130,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 	*(volatile __u32 *)&lp->lock = ~0U;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *rw)
+static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned int val;
 
@@ -150,7 +150,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw)
 	return (val == 0);
 }
 
-static inline int arch_read_trylock(arch_rwlock_t *rw)
+static inline int __arch_read_trylock(arch_rwlock_t *rw)
 {
 	register arch_rwlock_t *lp asm("g1");
 	register int res asm("o0");
@@ -165,27 +165,27 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	return res;
 }
 
-#define __raw_read_trylock(lock) \
+#define arch_read_trylock(lock) \
 ({	unsigned long flags; \
 	int res; \
 	local_irq_save(flags); \
-	res = arch_read_trylock(lock); \
+	res = __arch_read_trylock(lock); \
 	local_irq_restore(flags); \
 	res; \
 })
 
-#define __raw_write_unlock(rw)	do { (rw)->lock = 0; } while(0)
+#define arch_write_unlock(rw)	do { (rw)->lock = 0; } while(0)
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define __raw_read_lock_flags(rw, flags)   __raw_read_lock(rw)
-#define __raw_write_lock_flags(rw, flags)  __raw_write_lock(rw)
+#define arch_read_lock_flags(rw, flags)   arch_read_lock(rw)
+#define arch_write_lock_flags(rw, flags)  arch_write_lock(rw)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
 
-#define __raw_read_can_lock(rw) (!((rw)->lock & 0xff))
-#define __raw_write_can_lock(rw) (!(rw)->lock)
+#define arch_read_can_lock(rw) (!((rw)->lock & 0xff))
+#define arch_write_can_lock(rw) (!(rw)->lock)
 
 #endif /* !(__ASSEMBLY__) */
 
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 2b22d7f2c2fb..073936a8b275 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -210,17 +210,17 @@ static int inline arch_write_trylock(arch_rwlock_t *lock)
 	return result;
 }
 
-#define __raw_read_lock(p)	arch_read_lock(p)
-#define __raw_read_lock_flags(p, f) arch_read_lock(p)
-#define __raw_read_trylock(p)	arch_read_trylock(p)
-#define __raw_read_unlock(p)	arch_read_unlock(p)
-#define __raw_write_lock(p)	arch_write_lock(p)
-#define __raw_write_lock_flags(p, f) arch_write_lock(p)
-#define __raw_write_unlock(p)	arch_write_unlock(p)
-#define __raw_write_trylock(p)	arch_write_trylock(p)
-
-#define __raw_read_can_lock(rw)		(!((rw)->lock & 0x80000000UL))
-#define __raw_write_can_lock(rw)	(!(rw)->lock)
+#define arch_read_lock(p)	arch_read_lock(p)
+#define arch_read_lock_flags(p, f) arch_read_lock(p)
+#define arch_read_trylock(p)	arch_read_trylock(p)
+#define arch_read_unlock(p)	arch_read_unlock(p)
+#define arch_write_lock(p)	arch_write_lock(p)
+#define arch_write_lock_flags(p, f) arch_write_lock(p)
+#define arch_write_unlock(p)	arch_write_unlock(p)
+#define arch_write_trylock(p)	arch_write_trylock(p)
+
+#define arch_read_can_lock(rw)		(!((rw)->lock & 0x80000000UL))
+#define arch_write_can_lock(rw)	(!(rw)->lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 99cb86e843a0..3089f70c0c52 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -232,7 +232,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
  * read_can_lock - would read_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_read_can_lock(arch_rwlock_t *lock)
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
 {
 	return (int)(lock)->lock > 0;
 }
@@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(arch_rwlock_t *lock)
  * write_can_lock - would write_trylock() succeed?
  * @lock: the rwlock in question.
  */
-static inline int __raw_write_can_lock(arch_rwlock_t *lock)
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
 {
 	return (lock)->lock == RW_LOCK_BIAS;
 }
 
-static inline void __raw_read_lock(arch_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
 		     "jns 1f\n"
@@ -255,7 +255,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw)
 		     ::LOCK_PTR_REG (rw) : "memory");
 }
 
-static inline void __raw_write_lock(arch_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
 		     "jz 1f\n"
@@ -264,7 +264,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw)
 		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
 }
 
-static inline int __raw_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -274,7 +274,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *lock)
 	return 0;
 }
 
-static inline int __raw_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
 
@@ -284,19 +284,19 @@ static inline int __raw_write_trylock(arch_rwlock_t *lock)
 	return 0;
 }
 
-static inline void __raw_read_unlock(arch_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
 }
 
-static inline void __raw_write_unlock(arch_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	asm volatile(LOCK_PREFIX "addl %1, %0"
 		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 73785b0bd6b9..5725b034defe 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -38,20 +38,20 @@ do {								\
  extern int _raw_write_trylock(rwlock_t *lock);
  extern void _raw_write_unlock(rwlock_t *lock);
 #else
-# define _raw_read_lock(rwlock)		__raw_read_lock(&(rwlock)->raw_lock)
+# define _raw_read_lock(rwlock)		arch_read_lock(&(rwlock)->raw_lock)
 # define _raw_read_lock_flags(lock, flags) \
-		__raw_read_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_read_trylock(rwlock)	__raw_read_trylock(&(rwlock)->raw_lock)
-# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
-# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
+		arch_read_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
+# define _raw_read_unlock(rwlock)	arch_read_unlock(&(rwlock)->raw_lock)
+# define _raw_write_lock(rwlock)	arch_write_lock(&(rwlock)->raw_lock)
 # define _raw_write_lock_flags(lock, flags) \
-		__raw_write_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_write_trylock(rwlock)	__raw_write_trylock(&(rwlock)->raw_lock)
-# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
+		arch_write_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
+# define _raw_write_unlock(rwlock)	arch_write_unlock(&(rwlock)->raw_lock)
 #endif
 
-#define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
-#define write_can_lock(rwlock)		__raw_write_can_lock(&(rwlock)->raw_lock)
+#define read_can_lock(rwlock)		arch_read_can_lock(&(rwlock)->raw_lock)
+#define write_can_lock(rwlock)		arch_write_can_lock(&(rwlock)->raw_lock)
 
 /*
  * Define the various rw_lock methods.  Note we define these
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 1d3bcc3cf7c6..b14f6a91e19f 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -49,12 +49,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 /*
  * Read-write spinlocks. No debug version.
  */
-#define __raw_read_lock(lock)		do { (void)(lock); } while (0)
-#define __raw_write_lock(lock)		do { (void)(lock); } while (0)
-#define __raw_read_trylock(lock)	({ (void)(lock); 1; })
-#define __raw_write_trylock(lock)	({ (void)(lock); 1; })
-#define __raw_read_unlock(lock)		do { (void)(lock); } while (0)
-#define __raw_write_unlock(lock)	do { (void)(lock); } while (0)
+#define arch_read_lock(lock)		do { (void)(lock); } while (0)
+#define arch_write_lock(lock)		do { (void)(lock); } while (0)
+#define arch_read_trylock(lock)	({ (void)(lock); 1; })
+#define arch_write_trylock(lock)	({ (void)(lock); 1; })
+#define arch_read_unlock(lock)		do { (void)(lock); } while (0)
+#define arch_write_unlock(lock)	do { (void)(lock); } while (0)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
@@ -67,8 +67,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #define arch_spin_is_contended(lock)	(((void)(lock), 0))
 
-#define __raw_read_can_lock(lock)	(((void)(lock), 1))
-#define __raw_write_can_lock(lock)	(((void)(lock), 1))
+#define arch_read_can_lock(lock)	(((void)(lock), 1))
+#define arch_write_can_lock(lock)	(((void)(lock), 1))
 
 #define arch_spin_unlock_wait(lock) \
 		do { cpu_relax(); } while (arch_spin_is_locked(lock))
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 3f72f10d9cb0..0cea0bf6114e 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -176,7 +176,7 @@ static void __read_lock_debug(rwlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_read_trylock(&lock->raw_lock))
+			if (arch_read_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -196,12 +196,12 @@ static void __read_lock_debug(rwlock_t *lock)
 void _raw_read_lock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	__raw_read_lock(&lock->raw_lock);
+	arch_read_lock(&lock->raw_lock);
 }
 
 int _raw_read_trylock(rwlock_t *lock)
 {
-	int ret = __raw_read_trylock(&lock->raw_lock);
+	int ret = arch_read_trylock(&lock->raw_lock);
 
 #ifndef CONFIG_SMP
 	/*
@@ -215,7 +215,7 @@ int _raw_read_trylock(rwlock_t *lock)
 void _raw_read_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	__raw_read_unlock(&lock->raw_lock);
+	arch_read_unlock(&lock->raw_lock);
 }
 
 static inline void debug_write_lock_before(rwlock_t *lock)
@@ -251,7 +251,7 @@ static void __write_lock_debug(rwlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_write_trylock(&lock->raw_lock))
+			if (arch_write_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -271,13 +271,13 @@ static void __write_lock_debug(rwlock_t *lock)
 void _raw_write_lock(rwlock_t *lock)
 {
 	debug_write_lock_before(lock);
-	__raw_write_lock(&lock->raw_lock);
+	arch_write_lock(&lock->raw_lock);
 	debug_write_lock_after(lock);
 }
 
 int _raw_write_trylock(rwlock_t *lock)
 {
-	int ret = __raw_write_trylock(&lock->raw_lock);
+	int ret = arch_write_trylock(&lock->raw_lock);
 
 	if (ret)
 		debug_write_lock_after(lock);
@@ -293,5 +293,5 @@ int _raw_write_trylock(rwlock_t *lock)
 void _raw_write_unlock(rwlock_t *lock)
 {
 	debug_write_unlock(lock);
-	__raw_write_unlock(&lock->raw_lock);
+	arch_write_unlock(&lock->raw_lock);
 }
-- 
cgit v1.2.3


From c2f21ce2e31286a0a32f8da0a7856e9ca1122ef3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 2 Dec 2009 20:02:59 +0100
Subject: locking: Implement new raw_spinlock

Now that the raw_spin name space is freed up, we can implement
raw_spinlock and the related functions which are used to annotate the
locks which are not converted to sleeping spinlocks in preempt-rt.

A side effect is that only such locks can be used with the low level
lock fsunctions which circumvent lockdep.

For !rt spin_* functions are mapped to the raw_spin* implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/spinlock.h         | 258 +++++++++++++++++++++++++++++----------
 include/linux/spinlock_api_smp.h |  51 ++++----
 include/linux/spinlock_api_up.h  |   2 +-
 include/linux/spinlock_types.h   |  49 ++++++--
 kernel/mutex-debug.h             |  12 +-
 kernel/sched.c                   |   2 +-
 kernel/spinlock.c                |  34 +++---
 lib/spinlock_debug.c             |  24 ++--
 8 files changed, 297 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 53bc2213b414..ef5a55d96b9b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -80,7 +80,7 @@
 #include <linux/spinlock_types.h>
 
 /*
- * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
+ * Pull the arch_spin*() functions/declarations (UP-nondebug doesnt need them):
  */
 #ifdef CONFIG_SMP
 # include <asm/spinlock.h>
@@ -89,30 +89,30 @@
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-  extern void __spin_lock_init(spinlock_t *lock, const char *name,
-			       struct lock_class_key *key);
-# define spin_lock_init(lock)					\
+  extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+				   struct lock_class_key *key);
+# define raw_spin_lock_init(lock)				\
 do {								\
 	static struct lock_class_key __key;			\
 								\
-	__spin_lock_init((lock), #lock, &__key);		\
+	__raw_spin_lock_init((lock), #lock, &__key);		\
 } while (0)
 
 #else
-# define spin_lock_init(lock)					\
-	do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0)
+# define raw_spin_lock_init(lock)				\
+	do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
-#define spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
+#define raw_spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
 
 #ifdef CONFIG_GENERIC_LOCKBREAK
-#define spin_is_contended(lock) ((lock)->break_lock)
+#define raw_spin_is_contended(lock) ((lock)->break_lock)
 #else
 
 #ifdef arch_spin_is_contended
-#define spin_is_contended(lock)	arch_spin_is_contended(&(lock)->raw_lock)
+#define raw_spin_is_contended(lock)	arch_spin_is_contended(&(lock)->raw_lock)
 #else
-#define spin_is_contended(lock)	(((void)(lock), 0))
+#define raw_spin_is_contended(lock)	(((void)(lock), 0))
 #endif /*arch_spin_is_contended*/
 #endif
 
@@ -122,22 +122,37 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 #endif
 
 /**
- * spin_unlock_wait - wait until the spinlock gets unlocked
+ * raw_spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
  */
-#define spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
+#define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void _raw_spin_lock(spinlock_t *lock);
+ extern void _raw_spin_lock(raw_spinlock_t *lock);
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
- extern int _raw_spin_trylock(spinlock_t *lock);
- extern void _raw_spin_unlock(spinlock_t *lock);
+ extern int _raw_spin_trylock(raw_spinlock_t *lock);
+ extern void _raw_spin_unlock(raw_spinlock_t *lock);
 #else
-# define _raw_spin_lock(lock)		arch_spin_lock(&(lock)->raw_lock)
-# define _raw_spin_lock_flags(lock, flags) \
-		arch_spin_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_spin_trylock(lock)	arch_spin_trylock(&(lock)->raw_lock)
-# define _raw_spin_unlock(lock)		arch_spin_unlock(&(lock)->raw_lock)
+static inline void _raw_spin_lock(raw_spinlock_t *lock)
+{
+	arch_spin_lock(&lock->raw_lock);
+}
+
+static inline void
+_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
+{
+	arch_spin_lock_flags(&lock->raw_lock, *flags);
+}
+
+static inline int _raw_spin_trylock(raw_spinlock_t *lock)
+{
+	return arch_spin_trylock(&(lock)->raw_lock);
+}
+
+static inline void _raw_spin_unlock(raw_spinlock_t *lock)
+{
+	arch_spin_unlock(&lock->raw_lock);
+}
 #endif
 
 /*
@@ -146,38 +161,38 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
  * various methods are defined as nops in the case they are not
  * required.
  */
-#define spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
+#define raw_spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
 
-#define spin_lock(lock)			_spin_lock(lock)
+#define raw_spin_lock(lock)		_spin_lock(lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
-# define spin_lock_nest_lock(lock, nest_lock)				\
+# define raw_spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
+# define raw_spin_lock_nest_lock(lock, nest_lock)			\
 	 do {								\
 		 typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\
 		 _spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
 	 } while (0)
 #else
-# define spin_lock_nested(lock, subclass) _spin_lock(lock)
-# define spin_lock_nest_lock(lock, nest_lock) _spin_lock(lock)
+# define raw_spin_lock_nested(lock, subclass)		_spin_lock(lock)
+# define raw_spin_lock_nest_lock(lock, nest_lock)	_spin_lock(lock)
 #endif
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)			\
+#define raw_spin_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		flags = _spin_lock_irqsave(lock);	\
 	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+#define raw_spin_lock_irqsave_nested(lock, flags, subclass)		\
 	do {								\
 		typecheck(unsigned long, flags);			\
 		flags = _spin_lock_irqsave_nested(lock, subclass);	\
 	} while (0)
 #else
-#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+#define raw_spin_lock_irqsave_nested(lock, flags, subclass)		\
 	do {								\
 		typecheck(unsigned long, flags);			\
 		flags = _spin_lock_irqsave(lock);			\
@@ -186,45 +201,178 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 
 #else
 
-#define spin_lock_irqsave(lock, flags)			\
+#define raw_spin_lock_irqsave(lock, flags)		\
 	do {						\
 		typecheck(unsigned long, flags);	\
 		_spin_lock_irqsave(lock, flags);	\
 	} while (0)
 
-#define spin_lock_irqsave_nested(lock, flags, subclass)	\
-	spin_lock_irqsave(lock, flags)
+#define raw_spin_lock_irqsave_nested(lock, flags, subclass)	\
+	raw_spin_lock_irqsave(lock, flags)
 
 #endif
 
-#define spin_lock_irq(lock)		_spin_lock_irq(lock)
-#define spin_lock_bh(lock)		_spin_lock_bh(lock)
-#define spin_unlock(lock)		_spin_unlock(lock)
-#define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
+#define raw_spin_lock_irq(lock)	_spin_lock_irq(lock)
+#define raw_spin_lock_bh(lock)		_spin_lock_bh(lock)
+#define raw_spin_unlock(lock)		_spin_unlock(lock)
+#define raw_spin_unlock_irq(lock)	_spin_unlock_irq(lock)
 
-#define spin_unlock_irqrestore(lock, flags)		\
-	do {						\
-		typecheck(unsigned long, flags);	\
+#define raw_spin_unlock_irqrestore(lock, flags)		\
+	do {							\
+		typecheck(unsigned long, flags);		\
 		_spin_unlock_irqrestore(lock, flags);	\
 	} while (0)
-#define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
+#define raw_spin_unlock_bh(lock)	_spin_unlock_bh(lock)
 
-#define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
+#define raw_spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
 
-#define spin_trylock_irq(lock) \
+#define raw_spin_trylock_irq(lock) \
 ({ \
 	local_irq_disable(); \
-	spin_trylock(lock) ? \
+	raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_enable(); 0;  }); \
 })
 
-#define spin_trylock_irqsave(lock, flags) \
+#define raw_spin_trylock_irqsave(lock, flags) \
 ({ \
 	local_irq_save(flags); \
-	spin_trylock(lock) ? \
+	raw_spin_trylock(lock) ? \
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
 
+/**
+ * raw_spin_can_lock - would raw_spin_trylock() succeed?
+ * @lock: the spinlock in question.
+ */
+#define raw_spin_can_lock(lock)	(!raw_spin_is_locked(lock))
+
+/* Include rwlock functions */
+#include <linux/rwlock.h>
+
+/*
+ * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+# include <linux/spinlock_api_smp.h>
+#else
+# include <linux/spinlock_api_up.h>
+#endif
+
+/*
+ * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
+ */
+
+static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
+{
+	return &lock->rlock;
+}
+
+#define spin_lock_init(_lock)				\
+do {							\
+	spinlock_check(_lock);				\
+	raw_spin_lock_init(&(_lock)->rlock);		\
+} while (0)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+	raw_spin_lock(&lock->rlock);
+}
+
+static inline void spin_lock_bh(spinlock_t *lock)
+{
+	raw_spin_lock_bh(&lock->rlock);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+	return raw_spin_trylock(&lock->rlock);
+}
+
+#define spin_lock_nested(lock, subclass)			\
+do {								\
+	raw_spin_lock_nested(spinlock_check(lock), subclass);	\
+} while (0)
+
+#define spin_lock_nest_lock(lock, nest_lock)				\
+do {									\
+	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
+} while (0)
+
+static inline void spin_lock_irq(spinlock_t *lock)
+{
+	raw_spin_lock_irq(&lock->rlock);
+}
+
+#define spin_lock_irqsave(lock, flags)				\
+do {								\
+	raw_spin_lock_irqsave(spinlock_check(lock), flags);	\
+} while (0)
+
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+do {									\
+	raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
+} while (0)
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+	raw_spin_unlock(&lock->rlock);
+}
+
+static inline void spin_unlock_bh(spinlock_t *lock)
+{
+	raw_spin_unlock_bh(&lock->rlock);
+}
+
+static inline void spin_unlock_irq(spinlock_t *lock)
+{
+	raw_spin_unlock_irq(&lock->rlock);
+}
+
+static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+{
+	raw_spin_unlock_irqrestore(&lock->rlock, flags);
+}
+
+static inline int spin_trylock_bh(spinlock_t *lock)
+{
+	return raw_spin_trylock_bh(&lock->rlock);
+}
+
+static inline int spin_trylock_irq(spinlock_t *lock)
+{
+	return raw_spin_trylock_irq(&lock->rlock);
+}
+
+#define spin_trylock_irqsave(lock, flags)			\
+({								\
+	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
+})
+
+static inline void spin_unlock_wait(spinlock_t *lock)
+{
+	raw_spin_unlock_wait(&lock->rlock);
+}
+
+static inline int spin_is_locked(spinlock_t *lock)
+{
+	return raw_spin_is_locked(&lock->rlock);
+}
+
+static inline int spin_is_contended(spinlock_t *lock)
+{
+	return raw_spin_is_contended(&lock->rlock);
+}
+
+static inline int spin_can_lock(spinlock_t *lock)
+{
+	return raw_spin_can_lock(&lock->rlock);
+}
+
+static inline void assert_spin_locked(spinlock_t *lock)
+{
+	assert_raw_spin_locked(&lock->rlock);
+}
+
 /*
  * Pull the atomic_t declaration:
  * (asm-mips/atomic.h needs above definitions)
@@ -242,22 +390,4 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
 #define atomic_dec_and_lock(atomic, lock) \
 		__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
 
-/**
- * spin_can_lock - would spin_trylock() succeed?
- * @lock: the spinlock in question.
- */
-#define spin_can_lock(lock)	(!spin_is_locked(lock))
-
-/* Include rwlock functions */
-#include <linux/rwlock.h>
-
-/*
- * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-# include <linux/spinlock_api_smp.h>
-#else
-# include <linux/spinlock_api_up.h>
-#endif
-
 #endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index a2b2c9df91de..eabe5068d138 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -17,26 +17,29 @@
 
 int in_lock_functions(unsigned long addr);
 
-#define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
+#define assert_raw_spin_locked(x)	BUG_ON(!raw_spin_is_locked(x))
 
-void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+void __lockfunc _spin_lock(raw_spinlock_t *lock)	__acquires(lock);
+void __lockfunc _spin_lock_nested(raw_spinlock_t *lock, int subclass)
 							__acquires(lock);
-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map)
+void __lockfunc
+_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
 							__acquires(lock);
-void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(lock);
-void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(lock);
+void __lockfunc _spin_lock_bh(raw_spinlock_t *lock)	__acquires(lock);
+void __lockfunc _spin_lock_irq(raw_spinlock_t *lock)	__acquires(lock);
 
-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+unsigned long __lockfunc _spin_lock_irqsave(raw_spinlock_t *lock)
 							__acquires(lock);
-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+unsigned long __lockfunc
+_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)
 							__acquires(lock);
-int __lockfunc _spin_trylock(spinlock_t *lock);
-int __lockfunc _spin_trylock_bh(spinlock_t *lock);
-void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(lock);
-void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(lock);
-void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(lock);
-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+int __lockfunc _spin_trylock(raw_spinlock_t *lock);
+int __lockfunc _spin_trylock_bh(raw_spinlock_t *lock);
+void __lockfunc _spin_unlock(raw_spinlock_t *lock)	__releases(lock);
+void __lockfunc _spin_unlock_bh(raw_spinlock_t *lock)	__releases(lock);
+void __lockfunc _spin_unlock_irq(raw_spinlock_t *lock)	__releases(lock);
+void __lockfunc
+_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 							__releases(lock);
 
 #ifdef CONFIG_INLINE_SPIN_LOCK
@@ -79,7 +82,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
 #endif
 
-static inline int __spin_trylock(spinlock_t *lock)
+static inline int __spin_trylock(raw_spinlock_t *lock)
 {
 	preempt_disable();
 	if (_raw_spin_trylock(lock)) {
@@ -97,7 +100,7 @@ static inline int __spin_trylock(spinlock_t *lock)
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-static inline unsigned long __spin_lock_irqsave(spinlock_t *lock)
+static inline unsigned long __spin_lock_irqsave(raw_spinlock_t *lock)
 {
 	unsigned long flags;
 
@@ -117,7 +120,7 @@ static inline unsigned long __spin_lock_irqsave(spinlock_t *lock)
 	return flags;
 }
 
-static inline void __spin_lock_irq(spinlock_t *lock)
+static inline void __spin_lock_irq(raw_spinlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
@@ -125,7 +128,7 @@ static inline void __spin_lock_irq(spinlock_t *lock)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
 
-static inline void __spin_lock_bh(spinlock_t *lock)
+static inline void __spin_lock_bh(raw_spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
@@ -133,7 +136,7 @@ static inline void __spin_lock_bh(spinlock_t *lock)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 }
 
-static inline void __spin_lock(spinlock_t *lock)
+static inline void __spin_lock(raw_spinlock_t *lock)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
@@ -142,14 +145,14 @@ static inline void __spin_lock(spinlock_t *lock)
 
 #endif /* CONFIG_PREEMPT */
 
-static inline void __spin_unlock(spinlock_t *lock)
+static inline void __spin_unlock(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	preempt_enable();
 }
 
-static inline void __spin_unlock_irqrestore(spinlock_t *lock,
+static inline void __spin_unlock_irqrestore(raw_spinlock_t *lock,
 					    unsigned long flags)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
@@ -158,7 +161,7 @@ static inline void __spin_unlock_irqrestore(spinlock_t *lock,
 	preempt_enable();
 }
 
-static inline void __spin_unlock_irq(spinlock_t *lock)
+static inline void __spin_unlock_irq(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
@@ -166,7 +169,7 @@ static inline void __spin_unlock_irq(spinlock_t *lock)
 	preempt_enable();
 }
 
-static inline void __spin_unlock_bh(spinlock_t *lock)
+static inline void __spin_unlock_bh(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
@@ -174,7 +177,7 @@ static inline void __spin_unlock_bh(spinlock_t *lock)
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 
-static inline int __spin_trylock_bh(spinlock_t *lock)
+static inline int __spin_trylock_bh(raw_spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index 04e1d3164576..3a9e27adecf9 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -16,7 +16,7 @@
 
 #define in_lock_functions(ADDR)		0
 
-#define assert_spin_locked(lock)	do { (void)(lock); } while (0)
+#define assert_raw_spin_locked(lock)	do { (void)(lock); } while (0)
 
 /*
  * In the UP-nondebug case there's no real locking going on, so the
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 7dadce303ebf..851b7783720d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -17,7 +17,7 @@
 
 #include <linux/lockdep.h>
 
-typedef struct {
+typedef struct raw_spinlock {
 	arch_spinlock_t raw_lock;
 #ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
@@ -29,7 +29,7 @@ typedef struct {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif
-} spinlock_t;
+} raw_spinlock_t;
 
 #define SPINLOCK_MAGIC		0xdead4ead
 
@@ -42,18 +42,45 @@ typedef struct {
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-# define __SPIN_LOCK_UNLOCKED(lockname)					\
-	(spinlock_t)	{	.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
-				.magic = SPINLOCK_MAGIC,		\
-				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1,			\
-				SPIN_DEP_MAP_INIT(lockname) }
+# define SPIN_DEBUG_INIT(lockname)		\
+	.magic = SPINLOCK_MAGIC,		\
+	.owner_cpu = -1,			\
+	.owner = SPINLOCK_OWNER_INIT,
 #else
-# define __SPIN_LOCK_UNLOCKED(lockname) \
-	(spinlock_t)	{	.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
-				SPIN_DEP_MAP_INIT(lockname) }
+# define SPIN_DEBUG_INIT(lockname)
 #endif
 
+#define __RAW_SPIN_LOCK_INITIALIZER(lockname)	\
+	{					\
+	.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
+	SPIN_DEBUG_INIT(lockname)		\
+	SPIN_DEP_MAP_INIT(lockname) }
+
+#define __RAW_SPIN_LOCK_UNLOCKED(lockname)	\
+	(raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
+
+#define DEFINE_RAW_SPINLOCK(x)	raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
+
+typedef struct spinlock {
+	union {
+		struct raw_spinlock rlock;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
+		struct {
+			u8 __padding[LOCK_PADSIZE];
+			struct lockdep_map dep_map;
+		};
+#endif
+	};
+} spinlock_t;
+
+#define __SPIN_LOCK_INITIALIZER(lockname) \
+	{ { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
+
+#define __SPIN_LOCK_UNLOCKED(lockname) \
+	(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
+
 /*
  * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
  * deprecated.
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 7bebbd15b342..57d527a16f9d 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
 							\
 		DEBUG_LOCKS_WARN_ON(in_interrupt());	\
 		local_irq_save(flags);			\
-		arch_spin_lock(&(lock)->raw_lock);	\
+		arch_spin_lock(&(lock)->rlock.raw_lock);\
 		DEBUG_LOCKS_WARN_ON(l->magic != l);	\
 	} while (0)
 
-#define spin_unlock_mutex(lock, flags)			\
-	do {						\
-		arch_spin_unlock(&(lock)->raw_lock);	\
-		local_irq_restore(flags);		\
-		preempt_check_resched();		\
+#define spin_unlock_mutex(lock, flags)				\
+	do {							\
+		arch_spin_unlock(&(lock)->rlock.raw_lock);	\
+		local_irq_restore(flags);			\
+		preempt_check_resched();			\
 	} while (0)
diff --git a/kernel/sched.c b/kernel/sched.c
index fd05861b2111..e6acf2d7b753 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -884,7 +884,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner = current;
+	rq->lock.rlock.owner = current;
 #endif
 	/*
 	 * If we are tracking spinlock dependencies then we have to
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index fbb5f8b78357..54eb7dd3c608 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -32,6 +32,8 @@
  * include/linux/spinlock_api_smp.h
  */
 #else
+#define raw_read_can_lock(l)	read_can_lock(l)
+#define raw_write_can_lock(l)	write_can_lock(l)
 /*
  * We build the __lock_function inlines here. They are too large for
  * inlining all over the place, but here is only one user per function
@@ -52,7 +54,7 @@ void __lockfunc __##op##_lock(locktype##_t *lock)			\
 									\
 		if (!(lock)->break_lock)				\
 			(lock)->break_lock = 1;				\
-		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+		while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
 			arch_##op##_relax(&lock->raw_lock);		\
 	}								\
 	(lock)->break_lock = 0;						\
@@ -72,7 +74,7 @@ unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
 									\
 		if (!(lock)->break_lock)				\
 			(lock)->break_lock = 1;				\
-		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+		while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
 			arch_##op##_relax(&lock->raw_lock);		\
 	}								\
 	(lock)->break_lock = 0;						\
@@ -107,14 +109,14 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
  *         __[spin|read|write]_lock_irqsave()
  *         __[spin|read|write]_lock_bh()
  */
-BUILD_LOCK_OPS(spin, spinlock);
+BUILD_LOCK_OPS(spin, raw_spinlock);
 BUILD_LOCK_OPS(read, rwlock);
 BUILD_LOCK_OPS(write, rwlock);
 
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _spin_trylock(spinlock_t *lock)
+int __lockfunc _spin_trylock(raw_spinlock_t *lock)
 {
 	return __spin_trylock(lock);
 }
@@ -122,7 +124,7 @@ EXPORT_SYMBOL(_spin_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _spin_trylock_bh(spinlock_t *lock)
+int __lockfunc _spin_trylock_bh(raw_spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
 }
@@ -130,7 +132,7 @@ EXPORT_SYMBOL(_spin_trylock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _spin_lock(spinlock_t *lock)
+void __lockfunc _spin_lock(raw_spinlock_t *lock)
 {
 	__spin_lock(lock);
 }
@@ -138,7 +140,7 @@ EXPORT_SYMBOL(_spin_lock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+unsigned long __lockfunc _spin_lock_irqsave(raw_spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
 }
@@ -146,7 +148,7 @@ EXPORT_SYMBOL(_spin_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _spin_lock_irq(spinlock_t *lock)
+void __lockfunc _spin_lock_irq(raw_spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
 }
@@ -154,7 +156,7 @@ EXPORT_SYMBOL(_spin_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _spin_lock_bh(spinlock_t *lock)
+void __lockfunc _spin_lock_bh(raw_spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
 }
@@ -162,7 +164,7 @@ EXPORT_SYMBOL(_spin_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK
-void __lockfunc _spin_unlock(spinlock_t *lock)
+void __lockfunc _spin_unlock(raw_spinlock_t *lock)
 {
 	__spin_unlock(lock);
 }
@@ -170,7 +172,7 @@ EXPORT_SYMBOL(_spin_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+void __lockfunc _spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
 }
@@ -178,7 +180,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _spin_unlock_irq(spinlock_t *lock)
+void __lockfunc _spin_unlock_irq(raw_spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
 }
@@ -186,7 +188,7 @@ EXPORT_SYMBOL(_spin_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _spin_unlock_bh(spinlock_t *lock)
+void __lockfunc _spin_unlock_bh(raw_spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
 }
@@ -339,7 +341,7 @@ EXPORT_SYMBOL(_write_unlock_bh);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+void __lockfunc _spin_lock_nested(raw_spinlock_t *lock, int subclass)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
@@ -347,7 +349,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
 }
 EXPORT_SYMBOL(_spin_lock_nested);
 
-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock,
+unsigned long __lockfunc _spin_lock_irqsave_nested(raw_spinlock_t *lock,
 						   int subclass)
 {
 	unsigned long flags;
@@ -361,7 +363,7 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock,
 }
 EXPORT_SYMBOL(_spin_lock_irqsave_nested);
 
-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
+void __lockfunc _spin_lock_nest_lock(raw_spinlock_t *lock,
 				     struct lockdep_map *nest_lock)
 {
 	preempt_disable();
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 0cea0bf6114e..e705848cc33c 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -13,8 +13,8 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 
-void __spin_lock_init(spinlock_t *lock, const char *name,
-		      struct lock_class_key *key)
+void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+			  struct lock_class_key *key)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	/*
@@ -29,7 +29,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	lock->owner_cpu = -1;
 }
 
-EXPORT_SYMBOL(__spin_lock_init);
+EXPORT_SYMBOL(__raw_spin_lock_init);
 
 void __rwlock_init(rwlock_t *lock, const char *name,
 		   struct lock_class_key *key)
@@ -49,7 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 
 EXPORT_SYMBOL(__rwlock_init);
 
-static void spin_bug(spinlock_t *lock, const char *msg)
+static void spin_bug(raw_spinlock_t *lock, const char *msg)
 {
 	struct task_struct *owner = NULL;
 
@@ -73,7 +73,7 @@ static void spin_bug(spinlock_t *lock, const char *msg)
 #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
 
 static inline void
-debug_spin_lock_before(spinlock_t *lock)
+debug_spin_lock_before(raw_spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
 	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -81,16 +81,16 @@ debug_spin_lock_before(spinlock_t *lock)
 							lock, "cpu recursion");
 }
 
-static inline void debug_spin_lock_after(spinlock_t *lock)
+static inline void debug_spin_lock_after(raw_spinlock_t *lock)
 {
 	lock->owner_cpu = raw_smp_processor_id();
 	lock->owner = current;
 }
 
-static inline void debug_spin_unlock(spinlock_t *lock)
+static inline void debug_spin_unlock(raw_spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked");
+	SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
 	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
 	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
@@ -98,7 +98,7 @@ static inline void debug_spin_unlock(spinlock_t *lock)
 	lock->owner_cpu = -1;
 }
 
-static void __spin_lock_debug(spinlock_t *lock)
+static void __spin_lock_debug(raw_spinlock_t *lock)
 {
 	u64 i;
 	u64 loops = loops_per_jiffy * HZ;
@@ -125,7 +125,7 @@ static void __spin_lock_debug(spinlock_t *lock)
 	}
 }
 
-void _raw_spin_lock(spinlock_t *lock)
+void _raw_spin_lock(raw_spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
 	if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
@@ -133,7 +133,7 @@ void _raw_spin_lock(spinlock_t *lock)
 	debug_spin_lock_after(lock);
 }
 
-int _raw_spin_trylock(spinlock_t *lock)
+int _raw_spin_trylock(raw_spinlock_t *lock)
 {
 	int ret = arch_spin_trylock(&lock->raw_lock);
 
@@ -148,7 +148,7 @@ int _raw_spin_trylock(spinlock_t *lock)
 	return ret;
 }
 
-void _raw_spin_unlock(spinlock_t *lock)
+void _raw_spin_unlock(raw_spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
 	arch_spin_unlock(&lock->raw_lock);
-- 
cgit v1.2.3


From 9828ea9d75c38fe3dce05d00566eed61c85732e6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 20:55:53 +0100
Subject: locking: Further name space cleanups

The name space hierarchy for the internal lock functions is now a bit
backwards. raw_spin* functions map to _spin* which use __spin*, while
we would like to have _raw_spin* and __raw_spin*.

_raw_spin* is already used by lock debugging, so rename those funtions
to do_raw_spin* to free up the _raw_spin* name space.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rwlock.h           | 32 ++++++++++++++++----------------
 include/linux/rwlock_api_smp.h   | 40 ++++++++++++++++++++--------------------
 include/linux/spinlock.h         | 16 ++++++++--------
 include/linux/spinlock_api_smp.h | 24 ++++++++++++------------
 kernel/sched.c                   |  2 +-
 kernel/spinlock.c                | 12 ++++++------
 lib/kernel_lock.c                | 18 +++++++++---------
 lib/spinlock_debug.c             | 18 +++++++++---------
 8 files changed, 81 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 5725b034defe..bd799bc6d086 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -29,25 +29,25 @@ do {								\
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void _raw_read_lock(rwlock_t *lock);
-#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
- extern int _raw_read_trylock(rwlock_t *lock);
- extern void _raw_read_unlock(rwlock_t *lock);
- extern void _raw_write_lock(rwlock_t *lock);
-#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
- extern int _raw_write_trylock(rwlock_t *lock);
- extern void _raw_write_unlock(rwlock_t *lock);
+ extern void do_raw_read_lock(rwlock_t *lock);
+#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
+ extern int do_raw_read_trylock(rwlock_t *lock);
+ extern void do_raw_read_unlock(rwlock_t *lock);
+ extern void do_raw_write_lock(rwlock_t *lock);
+#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
+ extern int do_raw_write_trylock(rwlock_t *lock);
+ extern void do_raw_write_unlock(rwlock_t *lock);
 #else
-# define _raw_read_lock(rwlock)		arch_read_lock(&(rwlock)->raw_lock)
-# define _raw_read_lock_flags(lock, flags) \
+# define do_raw_read_lock(rwlock)		arch_read_lock(&(rwlock)->raw_lock)
+# define do_raw_read_lock_flags(lock, flags) \
 		arch_read_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
-# define _raw_read_unlock(rwlock)	arch_read_unlock(&(rwlock)->raw_lock)
-# define _raw_write_lock(rwlock)	arch_write_lock(&(rwlock)->raw_lock)
-# define _raw_write_lock_flags(lock, flags) \
+# define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
+# define do_raw_read_unlock(rwlock)	arch_read_unlock(&(rwlock)->raw_lock)
+# define do_raw_write_lock(rwlock)	arch_write_lock(&(rwlock)->raw_lock)
+# define do_raw_write_lock_flags(lock, flags) \
 		arch_write_lock_flags(&(lock)->raw_lock, *(flags))
-# define _raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
-# define _raw_write_unlock(rwlock)	arch_write_unlock(&(rwlock)->raw_lock)
+# define do_raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
+# define do_raw_write_unlock(rwlock)	arch_write_unlock(&(rwlock)->raw_lock)
 #endif
 
 #define read_can_lock(rwlock)		arch_read_can_lock(&(rwlock)->raw_lock)
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index 090f876f828d..b3ba5ae6a8c4 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -113,7 +113,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 static inline int __read_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_read_trylock(lock)) {
+	if (do_raw_read_trylock(lock)) {
 		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
 	}
@@ -124,7 +124,7 @@ static inline int __read_trylock(rwlock_t *lock)
 static inline int __write_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_write_trylock(lock)) {
+	if (do_raw_write_trylock(lock)) {
 		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
 	}
@@ -143,7 +143,7 @@ static inline void __read_lock(rwlock_t *lock)
 {
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
 static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
@@ -153,8 +153,8 @@ static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
 	local_irq_save(flags);
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
-			     _raw_read_lock_flags, &flags);
+	LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock,
+			     do_raw_read_lock_flags, &flags);
 	return flags;
 }
 
@@ -163,7 +163,7 @@ static inline void __read_lock_irq(rwlock_t *lock)
 	local_irq_disable();
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
 static inline void __read_lock_bh(rwlock_t *lock)
@@ -171,7 +171,7 @@ static inline void __read_lock_bh(rwlock_t *lock)
 	local_bh_disable();
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
 static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
@@ -181,8 +181,8 @@ static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
 	local_irq_save(flags);
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
-			     _raw_write_lock_flags, &flags);
+	LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock,
+			     do_raw_write_lock_flags, &flags);
 	return flags;
 }
 
@@ -191,7 +191,7 @@ static inline void __write_lock_irq(rwlock_t *lock)
 	local_irq_disable();
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
 static inline void __write_lock_bh(rwlock_t *lock)
@@ -199,14 +199,14 @@ static inline void __write_lock_bh(rwlock_t *lock)
 	local_bh_disable();
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
 static inline void __write_lock(rwlock_t *lock)
 {
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
 #endif /* CONFIG_PREEMPT */
@@ -214,21 +214,21 @@ static inline void __write_lock(rwlock_t *lock)
 static inline void __write_unlock(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
+	do_raw_write_unlock(lock);
 	preempt_enable();
 }
 
 static inline void __read_unlock(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
+	do_raw_read_unlock(lock);
 	preempt_enable();
 }
 
 static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
+	do_raw_read_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
 }
@@ -236,7 +236,7 @@ static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 static inline void __read_unlock_irq(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
+	do_raw_read_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
 }
@@ -244,7 +244,7 @@ static inline void __read_unlock_irq(rwlock_t *lock)
 static inline void __read_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
+	do_raw_read_unlock(lock);
 	preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
@@ -253,7 +253,7 @@ static inline void __write_unlock_irqrestore(rwlock_t *lock,
 					     unsigned long flags)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
+	do_raw_write_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
 }
@@ -261,7 +261,7 @@ static inline void __write_unlock_irqrestore(rwlock_t *lock,
 static inline void __write_unlock_irq(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
+	do_raw_write_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
 }
@@ -269,7 +269,7 @@ static inline void __write_unlock_irq(rwlock_t *lock)
 static inline void __write_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
+	do_raw_write_unlock(lock);
 	preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ef5a55d96b9b..0cbc58acf689 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -128,28 +128,28 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 #define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void _raw_spin_lock(raw_spinlock_t *lock);
-#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
- extern int _raw_spin_trylock(raw_spinlock_t *lock);
- extern void _raw_spin_unlock(raw_spinlock_t *lock);
+ extern void do_raw_spin_lock(raw_spinlock_t *lock);
+#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
+ extern int do_raw_spin_trylock(raw_spinlock_t *lock);
+ extern void do_raw_spin_unlock(raw_spinlock_t *lock);
 #else
-static inline void _raw_spin_lock(raw_spinlock_t *lock)
+static inline void do_raw_spin_lock(raw_spinlock_t *lock)
 {
 	arch_spin_lock(&lock->raw_lock);
 }
 
 static inline void
-_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
+do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
 {
 	arch_spin_lock_flags(&lock->raw_lock, *flags);
 }
 
-static inline int _raw_spin_trylock(raw_spinlock_t *lock)
+static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
 	return arch_spin_trylock(&(lock)->raw_lock);
 }
 
-static inline void _raw_spin_unlock(raw_spinlock_t *lock)
+static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
 {
 	arch_spin_unlock(&lock->raw_lock);
 }
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index eabe5068d138..1be1fc57fc4b 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -85,7 +85,7 @@ _spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 static inline int __spin_trylock(raw_spinlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
+	if (do_raw_spin_trylock(lock)) {
 		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
 	}
@@ -109,13 +109,13 @@ static inline unsigned long __spin_lock_irqsave(raw_spinlock_t *lock)
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	/*
 	 * On lockdep we dont want the hand-coded irq-enable of
-	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * do_raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
 #ifdef CONFIG_LOCKDEP
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 #else
-	_raw_spin_lock_flags(lock, &flags);
+	do_raw_spin_lock_flags(lock, &flags);
 #endif
 	return flags;
 }
@@ -125,7 +125,7 @@ static inline void __spin_lock_irq(raw_spinlock_t *lock)
 	local_irq_disable();
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 
 static inline void __spin_lock_bh(raw_spinlock_t *lock)
@@ -133,14 +133,14 @@ static inline void __spin_lock_bh(raw_spinlock_t *lock)
 	local_bh_disable();
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 
 static inline void __spin_lock(raw_spinlock_t *lock)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 
 #endif /* CONFIG_PREEMPT */
@@ -148,7 +148,7 @@ static inline void __spin_lock(raw_spinlock_t *lock)
 static inline void __spin_unlock(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
+	do_raw_spin_unlock(lock);
 	preempt_enable();
 }
 
@@ -156,7 +156,7 @@ static inline void __spin_unlock_irqrestore(raw_spinlock_t *lock,
 					    unsigned long flags)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
+	do_raw_spin_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
 }
@@ -164,7 +164,7 @@ static inline void __spin_unlock_irqrestore(raw_spinlock_t *lock,
 static inline void __spin_unlock_irq(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
+	do_raw_spin_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
 }
@@ -172,7 +172,7 @@ static inline void __spin_unlock_irq(raw_spinlock_t *lock)
 static inline void __spin_unlock_bh(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
+	do_raw_spin_unlock(lock);
 	preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
@@ -181,7 +181,7 @@ static inline int __spin_trylock_bh(raw_spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
+	if (do_raw_spin_trylock(lock)) {
 		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index e6acf2d7b753..91c65dd91435 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6684,7 +6684,7 @@ SYSCALL_DEFINE0(sched_yield)
 	 */
 	__release(rq->lock);
 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-	_raw_spin_unlock(&rq->lock);
+	do_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();
 
 	schedule();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 54eb7dd3c608..795240b81224 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -48,7 +48,7 @@ void __lockfunc __##op##_lock(locktype##_t *lock)			\
 {									\
 	for (;;) {							\
 		preempt_disable();					\
-		if (likely(_raw_##op##_trylock(lock)))			\
+		if (likely(do_raw_##op##_trylock(lock)))		\
 			break;						\
 		preempt_enable();					\
 									\
@@ -67,7 +67,7 @@ unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
 	for (;;) {							\
 		preempt_disable();					\
 		local_irq_save(flags);					\
-		if (likely(_raw_##op##_trylock(lock)))			\
+		if (likely(do_raw_##op##_trylock(lock)))		\
 			break;						\
 		local_irq_restore(flags);				\
 		preempt_enable();					\
@@ -345,7 +345,7 @@ void __lockfunc _spin_lock_nested(raw_spinlock_t *lock, int subclass)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 EXPORT_SYMBOL(_spin_lock_nested);
 
@@ -357,8 +357,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(raw_spinlock_t *lock,
 	local_irq_save(flags);
 	preempt_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock,
-				_raw_spin_lock_flags, &flags);
+	LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
+				do_raw_spin_lock_flags, &flags);
 	return flags;
 }
 EXPORT_SYMBOL(_spin_lock_irqsave_nested);
@@ -368,7 +368,7 @@ void __lockfunc _spin_lock_nest_lock(raw_spinlock_t *lock,
 {
 	preempt_disable();
 	spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 EXPORT_SYMBOL(_spin_lock_nest_lock);
 
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5526b46aba94..fdd23cdb53f3 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -36,12 +36,12 @@ static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
  * If it successfully gets the lock, it should increment
  * the preemption count like any spinlock does.
  *
- * (This works on UP too - _raw_spin_trylock will never
+ * (This works on UP too - do_raw_spin_trylock will never
  * return false in that case)
  */
 int __lockfunc __reacquire_kernel_lock(void)
 {
-	while (!_raw_spin_trylock(&kernel_flag)) {
+	while (!do_raw_spin_trylock(&kernel_flag)) {
 		if (need_resched())
 			return -EAGAIN;
 		cpu_relax();
@@ -52,27 +52,27 @@ int __lockfunc __reacquire_kernel_lock(void)
 
 void __lockfunc __release_kernel_lock(void)
 {
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable_no_resched();
 }
 
 /*
  * These are the BKL spinlocks - we try to be polite about preemption.
  * If SMP is not on (ie UP preemption), this all goes away because the
- * _raw_spin_trylock() will always succeed.
+ * do_raw_spin_trylock() will always succeed.
  */
 #ifdef CONFIG_PREEMPT
 static inline void __lock_kernel(void)
 {
 	preempt_disable();
-	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
 		/*
 		 * If preemption was disabled even before this
 		 * was called, there's nothing we can be polite
 		 * about - just spin.
 		 */
 		if (preempt_count() > 1) {
-			_raw_spin_lock(&kernel_flag);
+			do_raw_spin_lock(&kernel_flag);
 			return;
 		}
 
@@ -85,7 +85,7 @@ static inline void __lock_kernel(void)
 			while (spin_is_locked(&kernel_flag))
 				cpu_relax();
 			preempt_disable();
-		} while (!_raw_spin_trylock(&kernel_flag));
+		} while (!do_raw_spin_trylock(&kernel_flag));
 	}
 }
 
@@ -96,7 +96,7 @@ static inline void __lock_kernel(void)
  */
 static inline void __lock_kernel(void)
 {
-	_raw_spin_lock(&kernel_flag);
+	do_raw_spin_lock(&kernel_flag);
 }
 #endif
 
@@ -106,7 +106,7 @@ static inline void __unlock_kernel(void)
 	 * the BKL is not covered by lockdep, so we open-code the
 	 * unlocking sequence (and thus avoid the dep-chain ops):
 	 */
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable();
 }
 
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index e705848cc33c..4755b98b6dfb 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -125,7 +125,7 @@ static void __spin_lock_debug(raw_spinlock_t *lock)
 	}
 }
 
-void _raw_spin_lock(raw_spinlock_t *lock)
+void do_raw_spin_lock(raw_spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
 	if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
@@ -133,7 +133,7 @@ void _raw_spin_lock(raw_spinlock_t *lock)
 	debug_spin_lock_after(lock);
 }
 
-int _raw_spin_trylock(raw_spinlock_t *lock)
+int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
 	int ret = arch_spin_trylock(&lock->raw_lock);
 
@@ -148,7 +148,7 @@ int _raw_spin_trylock(raw_spinlock_t *lock)
 	return ret;
 }
 
-void _raw_spin_unlock(raw_spinlock_t *lock)
+void do_raw_spin_unlock(raw_spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
 	arch_spin_unlock(&lock->raw_lock);
@@ -193,13 +193,13 @@ static void __read_lock_debug(rwlock_t *lock)
 }
 #endif
 
-void _raw_read_lock(rwlock_t *lock)
+void do_raw_read_lock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
 	arch_read_lock(&lock->raw_lock);
 }
 
-int _raw_read_trylock(rwlock_t *lock)
+int do_raw_read_trylock(rwlock_t *lock)
 {
 	int ret = arch_read_trylock(&lock->raw_lock);
 
@@ -212,7 +212,7 @@ int _raw_read_trylock(rwlock_t *lock)
 	return ret;
 }
 
-void _raw_read_unlock(rwlock_t *lock)
+void do_raw_read_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
 	arch_read_unlock(&lock->raw_lock);
@@ -268,14 +268,14 @@ static void __write_lock_debug(rwlock_t *lock)
 }
 #endif
 
-void _raw_write_lock(rwlock_t *lock)
+void do_raw_write_lock(rwlock_t *lock)
 {
 	debug_write_lock_before(lock);
 	arch_write_lock(&lock->raw_lock);
 	debug_write_lock_after(lock);
 }
 
-int _raw_write_trylock(rwlock_t *lock)
+int do_raw_write_trylock(rwlock_t *lock)
 {
 	int ret = arch_write_trylock(&lock->raw_lock);
 
@@ -290,7 +290,7 @@ int _raw_write_trylock(rwlock_t *lock)
 	return ret;
 }
 
-void _raw_write_unlock(rwlock_t *lock)
+void do_raw_write_unlock(rwlock_t *lock)
 {
 	debug_write_unlock(lock);
 	arch_write_unlock(&lock->raw_lock);
-- 
cgit v1.2.3


From 9c1721aa4994f6625decbd915241f3a94ee2fe67 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Dec 2009 21:52:18 +0100
Subject: locking: Cleanup the name space completely

Make the name space hierarchy of locking functions consistent:
     raw_spin* -> _raw_spin* -> __raw_spin*

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rwlock.h           |  50 +++++-----
 include/linux/rwlock_api_smp.h   | 113 ++++++++++++-----------
 include/linux/spinlock.h         |  37 ++++----
 include/linux/spinlock_api_smp.h |  79 ++++++++--------
 include/linux/spinlock_api_up.h  |  64 +++++++------
 kernel/spinlock.c                | 192 +++++++++++++++++++--------------------
 6 files changed, 274 insertions(+), 261 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index bd799bc6d086..71e0b00b6f2c 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -38,7 +38,7 @@ do {								\
  extern int do_raw_write_trylock(rwlock_t *lock);
  extern void do_raw_write_unlock(rwlock_t *lock);
 #else
-# define do_raw_read_lock(rwlock)		arch_read_lock(&(rwlock)->raw_lock)
+# define do_raw_read_lock(rwlock)	arch_read_lock(&(rwlock)->raw_lock)
 # define do_raw_read_lock_flags(lock, flags) \
 		arch_read_lock_flags(&(lock)->raw_lock, *(flags))
 # define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
@@ -58,23 +58,23 @@ do {								\
  * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
  * methods are defined as nops in the case they are not required.
  */
-#define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
-#define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
+#define read_trylock(lock)	__cond_lock(lock, _raw_read_trylock(lock))
+#define write_trylock(lock)	__cond_lock(lock, _raw_write_trylock(lock))
 
-#define write_lock(lock)		_write_lock(lock)
-#define read_lock(lock)			_read_lock(lock)
+#define write_lock(lock)	_raw_write_lock(lock)
+#define read_lock(lock)		_raw_read_lock(lock)
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
 #define read_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		flags = _read_lock_irqsave(lock);	\
+		flags = _raw_read_lock_irqsave(lock);	\
 	} while (0)
 #define write_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		flags = _write_lock_irqsave(lock);	\
+		flags = _raw_write_lock_irqsave(lock);	\
 	} while (0)
 
 #else
@@ -82,38 +82,38 @@ do {								\
 #define read_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		_read_lock_irqsave(lock, flags);	\
+		_raw_read_lock_irqsave(lock, flags);	\
 	} while (0)
 #define write_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		_write_lock_irqsave(lock, flags);	\
+		_raw_write_lock_irqsave(lock, flags);	\
 	} while (0)
 
 #endif
 
-#define read_lock_irq(lock)		_read_lock_irq(lock)
-#define read_lock_bh(lock)		_read_lock_bh(lock)
-#define write_lock_irq(lock)		_write_lock_irq(lock)
-#define write_lock_bh(lock)		_write_lock_bh(lock)
-#define read_unlock(lock)		_read_unlock(lock)
-#define write_unlock(lock)		_write_unlock(lock)
-#define read_unlock_irq(lock)		_read_unlock_irq(lock)
-#define write_unlock_irq(lock)		_write_unlock_irq(lock)
-
-#define read_unlock_irqrestore(lock, flags)		\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		_read_unlock_irqrestore(lock, flags);	\
+#define read_lock_irq(lock)		_raw_read_lock_irq(lock)
+#define read_lock_bh(lock)		_raw_read_lock_bh(lock)
+#define write_lock_irq(lock)		_raw_write_lock_irq(lock)
+#define write_lock_bh(lock)		_raw_write_lock_bh(lock)
+#define read_unlock(lock)		_raw_read_unlock(lock)
+#define write_unlock(lock)		_raw_write_unlock(lock)
+#define read_unlock_irq(lock)		_raw_read_unlock_irq(lock)
+#define write_unlock_irq(lock)		_raw_write_unlock_irq(lock)
+
+#define read_unlock_irqrestore(lock, flags)			\
+	do {							\
+		typecheck(unsigned long, flags);		\
+		_raw_read_unlock_irqrestore(lock, flags);	\
 	} while (0)
-#define read_unlock_bh(lock)		_read_unlock_bh(lock)
+#define read_unlock_bh(lock)		_raw_read_unlock_bh(lock)
 
 #define write_unlock_irqrestore(lock, flags)		\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		_write_unlock_irqrestore(lock, flags);	\
+		_raw_write_unlock_irqrestore(lock, flags);	\
 	} while (0)
-#define write_unlock_bh(lock)		_write_unlock_bh(lock)
+#define write_unlock_bh(lock)		_raw_write_unlock_bh(lock)
 
 #define write_trylock_irqsave(lock, flags) \
 ({ \
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index b3ba5ae6a8c4..9c9f0495d37c 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -15,102 +15,106 @@
  * Released under the General Public License (GPL).
  */
 
-void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
-void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+void __lockfunc _raw_read_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_write_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_read_lock_bh(rwlock_t *lock)	__acquires(lock);
+void __lockfunc _raw_write_lock_bh(rwlock_t *lock)	__acquires(lock);
+void __lockfunc _raw_read_lock_irq(rwlock_t *lock)	__acquires(lock);
+void __lockfunc _raw_write_lock_irq(rwlock_t *lock)	__acquires(lock);
+unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
-int __lockfunc _read_trylock(rwlock_t *lock);
-int __lockfunc _write_trylock(rwlock_t *lock);
-void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
-void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
-void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
-void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
-void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
-void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+int __lockfunc _raw_read_trylock(rwlock_t *lock);
+int __lockfunc _raw_write_trylock(rwlock_t *lock);
+void __lockfunc _raw_read_unlock(rwlock_t *lock)	__releases(lock);
+void __lockfunc _raw_write_unlock(rwlock_t *lock)	__releases(lock);
+void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)	__releases(lock);
+void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)	__releases(lock);
+void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)	__releases(lock);
+void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)	__releases(lock);
+void __lockfunc
+_raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+void __lockfunc
+_raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
 #ifdef CONFIG_INLINE_READ_LOCK
-#define _read_lock(lock) __read_lock(lock)
+#define _raw_read_lock(lock) __raw_read_lock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_LOCK
-#define _write_lock(lock) __write_lock(lock)
+#define _raw_write_lock(lock) __raw_write_lock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_LOCK_BH
-#define _read_lock_bh(lock) __read_lock_bh(lock)
+#define _raw_read_lock_bh(lock) __raw_read_lock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_LOCK_BH
-#define _write_lock_bh(lock) __write_lock_bh(lock)
+#define _raw_write_lock_bh(lock) __raw_write_lock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_LOCK_IRQ
-#define _read_lock_irq(lock) __read_lock_irq(lock)
+#define _raw_read_lock_irq(lock) __raw_read_lock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_LOCK_IRQ
-#define _write_lock_irq(lock) __write_lock_irq(lock)
+#define _raw_write_lock_irq(lock) __raw_write_lock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE
-#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
+#define _raw_read_lock_irqsave(lock) __raw_read_lock_irqsave(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#define _raw_write_lock_irqsave(lock) __raw_write_lock_irqsave(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_TRYLOCK
-#define _read_trylock(lock) __read_trylock(lock)
+#define _raw_read_trylock(lock) __raw_read_trylock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_TRYLOCK
-#define _write_trylock(lock) __write_trylock(lock)
+#define _raw_write_trylock(lock) __raw_write_trylock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_UNLOCK
-#define _read_unlock(lock) __read_unlock(lock)
+#define _raw_read_unlock(lock) __raw_read_unlock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_UNLOCK
-#define _write_unlock(lock) __write_unlock(lock)
+#define _raw_write_unlock(lock) __raw_write_unlock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_UNLOCK_BH
-#define _read_unlock_bh(lock) __read_unlock_bh(lock)
+#define _raw_read_unlock_bh(lock) __raw_read_unlock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_UNLOCK_BH
-#define _write_unlock_bh(lock) __write_unlock_bh(lock)
+#define _raw_write_unlock_bh(lock) __raw_write_unlock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_UNLOCK_IRQ
-#define _read_unlock_irq(lock) __read_unlock_irq(lock)
+#define _raw_read_unlock_irq(lock) __raw_read_unlock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-#define _write_unlock_irq(lock) __write_unlock_irq(lock)
+#define _raw_write_unlock_irq(lock) __raw_write_unlock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
+#define _raw_read_unlock_irqrestore(lock, flags) \
+	__raw_read_unlock_irqrestore(lock, flags)
 #endif
 
 #ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+#define _raw_write_unlock_irqrestore(lock, flags) \
+	__raw_write_unlock_irqrestore(lock, flags)
 #endif
 
-static inline int __read_trylock(rwlock_t *lock)
+static inline int __raw_read_trylock(rwlock_t *lock)
 {
 	preempt_disable();
 	if (do_raw_read_trylock(lock)) {
@@ -121,7 +125,7 @@ static inline int __read_trylock(rwlock_t *lock)
 	return 0;
 }
 
-static inline int __write_trylock(rwlock_t *lock)
+static inline int __raw_write_trylock(rwlock_t *lock)
 {
 	preempt_disable();
 	if (do_raw_write_trylock(lock)) {
@@ -139,14 +143,14 @@ static inline int __write_trylock(rwlock_t *lock)
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-static inline void __read_lock(rwlock_t *lock)
+static inline void __raw_read_lock(rwlock_t *lock)
 {
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
-static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
+static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock)
 {
 	unsigned long flags;
 
@@ -158,7 +162,7 @@ static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
 	return flags;
 }
 
-static inline void __read_lock_irq(rwlock_t *lock)
+static inline void __raw_read_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
@@ -166,7 +170,7 @@ static inline void __read_lock_irq(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
-static inline void __read_lock_bh(rwlock_t *lock)
+static inline void __raw_read_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
@@ -174,7 +178,7 @@ static inline void __read_lock_bh(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 }
 
-static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
+static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock)
 {
 	unsigned long flags;
 
@@ -186,7 +190,7 @@ static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
 	return flags;
 }
 
-static inline void __write_lock_irq(rwlock_t *lock)
+static inline void __raw_write_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
@@ -194,7 +198,7 @@ static inline void __write_lock_irq(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
-static inline void __write_lock_bh(rwlock_t *lock)
+static inline void __raw_write_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
@@ -202,7 +206,7 @@ static inline void __write_lock_bh(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
-static inline void __write_lock(rwlock_t *lock)
+static inline void __raw_write_lock(rwlock_t *lock)
 {
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
@@ -211,21 +215,22 @@ static inline void __write_lock(rwlock_t *lock)
 
 #endif /* CONFIG_PREEMPT */
 
-static inline void __write_unlock(rwlock_t *lock)
+static inline void __raw_write_unlock(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_write_unlock(lock);
 	preempt_enable();
 }
 
-static inline void __read_unlock(rwlock_t *lock)
+static inline void __raw_read_unlock(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_read_unlock(lock);
 	preempt_enable();
 }
 
-static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+static inline void
+__raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_read_unlock(lock);
@@ -233,7 +238,7 @@ static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 	preempt_enable();
 }
 
-static inline void __read_unlock_irq(rwlock_t *lock)
+static inline void __raw_read_unlock_irq(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_read_unlock(lock);
@@ -241,7 +246,7 @@ static inline void __read_unlock_irq(rwlock_t *lock)
 	preempt_enable();
 }
 
-static inline void __read_unlock_bh(rwlock_t *lock)
+static inline void __raw_read_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_read_unlock(lock);
@@ -249,7 +254,7 @@ static inline void __read_unlock_bh(rwlock_t *lock)
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 
-static inline void __write_unlock_irqrestore(rwlock_t *lock,
+static inline void __raw_write_unlock_irqrestore(rwlock_t *lock,
 					     unsigned long flags)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
@@ -258,7 +263,7 @@ static inline void __write_unlock_irqrestore(rwlock_t *lock,
 	preempt_enable();
 }
 
-static inline void __write_unlock_irq(rwlock_t *lock)
+static inline void __raw_write_unlock_irq(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_write_unlock(lock);
@@ -266,7 +271,7 @@ static inline void __write_unlock_irq(rwlock_t *lock)
 	preempt_enable();
 }
 
-static inline void __write_unlock_bh(rwlock_t *lock)
+static inline void __raw_write_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_write_unlock(lock);
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 0cbc58acf689..86088213334a 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -161,20 +161,22 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
  * various methods are defined as nops in the case they are not
  * required.
  */
-#define raw_spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
+#define raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
 
-#define raw_spin_lock(lock)		_spin_lock(lock)
+#define raw_spin_lock(lock)	_raw_spin_lock(lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define raw_spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
+# define raw_spin_lock_nested(lock, subclass) \
+	_raw_spin_lock_nested(lock, subclass)
+
 # define raw_spin_lock_nest_lock(lock, nest_lock)			\
 	 do {								\
 		 typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\
-		 _spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
+		 _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
 	 } while (0)
 #else
-# define raw_spin_lock_nested(lock, subclass)		_spin_lock(lock)
-# define raw_spin_lock_nest_lock(lock, nest_lock)	_spin_lock(lock)
+# define raw_spin_lock_nested(lock, subclass)		_raw_spin_lock(lock)
+# define raw_spin_lock_nest_lock(lock, nest_lock)	_raw_spin_lock(lock)
 #endif
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
@@ -182,20 +184,20 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
 #define raw_spin_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		flags = _spin_lock_irqsave(lock);	\
+		flags = _raw_spin_lock_irqsave(lock);	\
 	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define raw_spin_lock_irqsave_nested(lock, flags, subclass)		\
 	do {								\
 		typecheck(unsigned long, flags);			\
-		flags = _spin_lock_irqsave_nested(lock, subclass);	\
+		flags = _raw_spin_lock_irqsave_nested(lock, subclass);	\
 	} while (0)
 #else
 #define raw_spin_lock_irqsave_nested(lock, flags, subclass)		\
 	do {								\
 		typecheck(unsigned long, flags);			\
-		flags = _spin_lock_irqsave(lock);			\
+		flags = _raw_spin_lock_irqsave(lock);			\
 	} while (0)
 #endif
 
@@ -204,7 +206,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
 #define raw_spin_lock_irqsave(lock, flags)		\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		_spin_lock_irqsave(lock, flags);	\
+		_raw_spin_lock_irqsave(lock, flags);	\
 	} while (0)
 
 #define raw_spin_lock_irqsave_nested(lock, flags, subclass)	\
@@ -212,19 +214,20 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
 
 #endif
 
-#define raw_spin_lock_irq(lock)	_spin_lock_irq(lock)
-#define raw_spin_lock_bh(lock)		_spin_lock_bh(lock)
-#define raw_spin_unlock(lock)		_spin_unlock(lock)
-#define raw_spin_unlock_irq(lock)	_spin_unlock_irq(lock)
+#define raw_spin_lock_irq(lock)		_raw_spin_lock_irq(lock)
+#define raw_spin_lock_bh(lock)		_raw_spin_lock_bh(lock)
+#define raw_spin_unlock(lock)		_raw_spin_unlock(lock)
+#define raw_spin_unlock_irq(lock)	_raw_spin_unlock_irq(lock)
 
 #define raw_spin_unlock_irqrestore(lock, flags)		\
 	do {							\
 		typecheck(unsigned long, flags);		\
-		_spin_unlock_irqrestore(lock, flags);	\
+		_raw_spin_unlock_irqrestore(lock, flags);	\
 	} while (0)
-#define raw_spin_unlock_bh(lock)	_spin_unlock_bh(lock)
+#define raw_spin_unlock_bh(lock)	_raw_spin_unlock_bh(lock)
 
-#define raw_spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
+#define raw_spin_trylock_bh(lock) \
+	__cond_lock(lock, _raw_spin_trylock_bh(lock))
 
 #define raw_spin_trylock_irq(lock) \
 ({ \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 1be1fc57fc4b..e253ccd7a604 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -19,70 +19,71 @@ int in_lock_functions(unsigned long addr);
 
 #define assert_raw_spin_locked(x)	BUG_ON(!raw_spin_is_locked(x))
 
-void __lockfunc _spin_lock(raw_spinlock_t *lock)	__acquires(lock);
-void __lockfunc _spin_lock_nested(raw_spinlock_t *lock, int subclass)
-							__acquires(lock);
+void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
+								__acquires(lock);
 void __lockfunc
-_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
-							__acquires(lock);
-void __lockfunc _spin_lock_bh(raw_spinlock_t *lock)	__acquires(lock);
-void __lockfunc _spin_lock_irq(raw_spinlock_t *lock)	__acquires(lock);
-
-unsigned long __lockfunc _spin_lock_irqsave(raw_spinlock_t *lock)
-							__acquires(lock);
+_raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
+								__acquires(lock);
+void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
+								__acquires(lock);
+
+unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
+								__acquires(lock);
 unsigned long __lockfunc
-_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)
-							__acquires(lock);
-int __lockfunc _spin_trylock(raw_spinlock_t *lock);
-int __lockfunc _spin_trylock_bh(raw_spinlock_t *lock);
-void __lockfunc _spin_unlock(raw_spinlock_t *lock)	__releases(lock);
-void __lockfunc _spin_unlock_bh(raw_spinlock_t *lock)	__releases(lock);
-void __lockfunc _spin_unlock_irq(raw_spinlock_t *lock)	__releases(lock);
+_raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)
+								__acquires(lock);
+int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock);
+int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock);
+void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)		__releases(lock);
+void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)	__releases(lock);
+void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)	__releases(lock);
 void __lockfunc
-_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
-							__releases(lock);
+_raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+								__releases(lock);
 
 #ifdef CONFIG_INLINE_SPIN_LOCK
-#define _spin_lock(lock) __spin_lock(lock)
+#define _raw_spin_lock(lock) __raw_spin_lock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_LOCK_BH
-#define _spin_lock_bh(lock) __spin_lock_bh(lock)
+#define _raw_spin_lock_bh(lock) __raw_spin_lock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_LOCK_IRQ
-#define _spin_lock_irq(lock) __spin_lock_irq(lock)
+#define _raw_spin_lock_irq(lock) __raw_spin_lock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
+#define _raw_spin_lock_irqsave(lock) __raw_spin_lock_irqsave(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_TRYLOCK
-#define _spin_trylock(lock) __spin_trylock(lock)
+#define _raw_spin_trylock(lock) __raw_spin_trylock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH
-#define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
+#define _raw_spin_trylock_bh(lock) __raw_spin_trylock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_UNLOCK
-#define _spin_unlock(lock) __spin_unlock(lock)
+#define _raw_spin_unlock(lock) __raw_spin_unlock(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_BH
-#define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
+#define _raw_spin_unlock_bh(lock) __raw_spin_unlock_bh(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-#define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
+#define _raw_spin_unlock_irq(lock) __raw_spin_unlock_irq(lock)
 #endif
 
 #ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
+#define _raw_spin_unlock_irqrestore(lock, flags) __raw_spin_unlock_irqrestore(lock, flags)
 #endif
 
-static inline int __spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
 	preempt_disable();
 	if (do_raw_spin_trylock(lock)) {
@@ -100,7 +101,7 @@ static inline int __spin_trylock(raw_spinlock_t *lock)
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-static inline unsigned long __spin_lock_irqsave(raw_spinlock_t *lock)
+static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
 	unsigned long flags;
 
@@ -120,7 +121,7 @@ static inline unsigned long __spin_lock_irqsave(raw_spinlock_t *lock)
 	return flags;
 }
 
-static inline void __spin_lock_irq(raw_spinlock_t *lock)
+static inline void __raw_spin_lock_irq(raw_spinlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
@@ -128,7 +129,7 @@ static inline void __spin_lock_irq(raw_spinlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 
-static inline void __spin_lock_bh(raw_spinlock_t *lock)
+static inline void __raw_spin_lock_bh(raw_spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
@@ -136,7 +137,7 @@ static inline void __spin_lock_bh(raw_spinlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
 
-static inline void __spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
@@ -145,14 +146,14 @@ static inline void __spin_lock(raw_spinlock_t *lock)
 
 #endif /* CONFIG_PREEMPT */
 
-static inline void __spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_spin_unlock(lock);
 	preempt_enable();
 }
 
-static inline void __spin_unlock_irqrestore(raw_spinlock_t *lock,
+static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
 					    unsigned long flags)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
@@ -161,7 +162,7 @@ static inline void __spin_unlock_irqrestore(raw_spinlock_t *lock,
 	preempt_enable();
 }
 
-static inline void __spin_unlock_irq(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_spin_unlock(lock);
@@ -169,7 +170,7 @@ static inline void __spin_unlock_irq(raw_spinlock_t *lock)
 	preempt_enable();
 }
 
-static inline void __spin_unlock_bh(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	do_raw_spin_unlock(lock);
@@ -177,7 +178,7 @@ static inline void __spin_unlock_bh(raw_spinlock_t *lock)
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 
-static inline int __spin_trylock_bh(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index 3a9e27adecf9..af1f47229e70 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -40,7 +40,8 @@
   do { preempt_enable(); __release(lock); (void)(lock); } while (0)
 
 #define __UNLOCK_BH(lock) \
-  do { preempt_enable_no_resched(); local_bh_enable(); __release(lock); (void)(lock); } while (0)
+  do { preempt_enable_no_resched(); local_bh_enable(); \
+	  __release(lock); (void)(lock); } while (0)
 
 #define __UNLOCK_IRQ(lock) \
   do { local_irq_enable(); __UNLOCK(lock); } while (0)
@@ -48,34 +49,37 @@
 #define __UNLOCK_IRQRESTORE(lock, flags) \
   do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
 
-#define _spin_lock(lock)			__LOCK(lock)
-#define _spin_lock_nested(lock, subclass)	__LOCK(lock)
-#define _read_lock(lock)			__LOCK(lock)
-#define _write_lock(lock)			__LOCK(lock)
-#define _spin_lock_bh(lock)			__LOCK_BH(lock)
-#define _read_lock_bh(lock)			__LOCK_BH(lock)
-#define _write_lock_bh(lock)			__LOCK_BH(lock)
-#define _spin_lock_irq(lock)			__LOCK_IRQ(lock)
-#define _read_lock_irq(lock)			__LOCK_IRQ(lock)
-#define _write_lock_irq(lock)			__LOCK_IRQ(lock)
-#define _spin_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
-#define _read_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
-#define _write_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
-#define _spin_trylock(lock)			({ __LOCK(lock); 1; })
-#define _read_trylock(lock)			({ __LOCK(lock); 1; })
-#define _write_trylock(lock)			({ __LOCK(lock); 1; })
-#define _spin_trylock_bh(lock)			({ __LOCK_BH(lock); 1; })
-#define _spin_unlock(lock)			__UNLOCK(lock)
-#define _read_unlock(lock)			__UNLOCK(lock)
-#define _write_unlock(lock)			__UNLOCK(lock)
-#define _spin_unlock_bh(lock)			__UNLOCK_BH(lock)
-#define _write_unlock_bh(lock)			__UNLOCK_BH(lock)
-#define _read_unlock_bh(lock)			__UNLOCK_BH(lock)
-#define _spin_unlock_irq(lock)			__UNLOCK_IRQ(lock)
-#define _read_unlock_irq(lock)			__UNLOCK_IRQ(lock)
-#define _write_unlock_irq(lock)			__UNLOCK_IRQ(lock)
-#define _spin_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
-#define _read_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
-#define _write_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
+#define _raw_spin_lock(lock)			__LOCK(lock)
+#define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
+#define _raw_read_lock(lock)			__LOCK(lock)
+#define _raw_write_lock(lock)			__LOCK(lock)
+#define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
+#define _raw_read_lock_bh(lock)			__LOCK_BH(lock)
+#define _raw_write_lock_bh(lock)		__LOCK_BH(lock)
+#define _raw_spin_lock_irq(lock)		__LOCK_IRQ(lock)
+#define _raw_read_lock_irq(lock)		__LOCK_IRQ(lock)
+#define _raw_write_lock_irq(lock)		__LOCK_IRQ(lock)
+#define _raw_spin_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
+#define _raw_read_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
+#define _raw_write_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
+#define _raw_spin_trylock(lock)			({ __LOCK(lock); 1; })
+#define _raw_read_trylock(lock)			({ __LOCK(lock); 1; })
+#define _raw_write_trylock(lock)			({ __LOCK(lock); 1; })
+#define _raw_spin_trylock_bh(lock)		({ __LOCK_BH(lock); 1; })
+#define _raw_spin_unlock(lock)			__UNLOCK(lock)
+#define _raw_read_unlock(lock)			__UNLOCK(lock)
+#define _raw_write_unlock(lock)			__UNLOCK(lock)
+#define _raw_spin_unlock_bh(lock)		__UNLOCK_BH(lock)
+#define _raw_write_unlock_bh(lock)		__UNLOCK_BH(lock)
+#define _raw_read_unlock_bh(lock)		__UNLOCK_BH(lock)
+#define _raw_spin_unlock_irq(lock)		__UNLOCK_IRQ(lock)
+#define _raw_read_unlock_irq(lock)		__UNLOCK_IRQ(lock)
+#define _raw_write_unlock_irq(lock)		__UNLOCK_IRQ(lock)
+#define _raw_spin_unlock_irqrestore(lock, flags) \
+					__UNLOCK_IRQRESTORE(lock, flags)
+#define _raw_read_unlock_irqrestore(lock, flags) \
+					__UNLOCK_IRQRESTORE(lock, flags)
+#define _raw_write_unlock_irqrestore(lock, flags) \
+					__UNLOCK_IRQRESTORE(lock, flags)
 
 #endif /* __LINUX_SPINLOCK_API_UP_H */
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 795240b81224..be6517fb9c14 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -44,7 +44,7 @@
  * towards that other CPU that it should break the lock ASAP.
  */
 #define BUILD_LOCK_OPS(op, locktype)					\
-void __lockfunc __##op##_lock(locktype##_t *lock)			\
+void __lockfunc __raw_##op##_lock(locktype##_t *lock)			\
 {									\
 	for (;;) {							\
 		preempt_disable();					\
@@ -60,7 +60,7 @@ void __lockfunc __##op##_lock(locktype##_t *lock)			\
 	(lock)->break_lock = 0;						\
 }									\
 									\
-unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
+unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)	\
 {									\
 	unsigned long flags;						\
 									\
@@ -81,12 +81,12 @@ unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
 	return flags;							\
 }									\
 									\
-void __lockfunc __##op##_lock_irq(locktype##_t *lock)			\
+void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock)		\
 {									\
-	_##op##_lock_irqsave(lock);					\
+	_raw_##op##_lock_irqsave(lock);					\
 }									\
 									\
-void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
+void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock)		\
 {									\
 	unsigned long flags;						\
 									\
@@ -95,7 +95,7 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
 	/* irq-disabling. We use the generic preemption-aware	*/	\
 	/* function:						*/	\
 	/**/								\
-	flags = _##op##_lock_irqsave(lock);				\
+	flags = _raw_##op##_lock_irqsave(lock);				\
 	local_bh_disable();						\
 	local_irq_restore(flags);					\
 }									\
@@ -116,240 +116,240 @@ BUILD_LOCK_OPS(write, rwlock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _spin_trylock(raw_spinlock_t *lock)
+int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
 {
-	return __spin_trylock(lock);
+	return __raw_spin_trylock(lock);
 }
-EXPORT_SYMBOL(_spin_trylock);
+EXPORT_SYMBOL(_raw_spin_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _spin_trylock_bh(raw_spinlock_t *lock)
+int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
 {
-	return __spin_trylock_bh(lock);
+	return __raw_spin_trylock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_trylock_bh);
+EXPORT_SYMBOL(_raw_spin_trylock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _spin_lock(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
 {
-	__spin_lock(lock);
+	__raw_spin_lock(lock);
 }
-EXPORT_SYMBOL(_spin_lock);
+EXPORT_SYMBOL(_raw_spin_lock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _spin_lock_irqsave(raw_spinlock_t *lock)
+unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
-	return __spin_lock_irqsave(lock);
+	return __raw_spin_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_spin_lock_irqsave);
+EXPORT_SYMBOL(_raw_spin_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _spin_lock_irq(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
 {
-	__spin_lock_irq(lock);
+	__raw_spin_lock_irq(lock);
 }
-EXPORT_SYMBOL(_spin_lock_irq);
+EXPORT_SYMBOL(_raw_spin_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _spin_lock_bh(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
 {
-	__spin_lock_bh(lock);
+	__raw_spin_lock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_lock_bh);
+EXPORT_SYMBOL(_raw_spin_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK
-void __lockfunc _spin_unlock(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
 {
-	__spin_unlock(lock);
+	__raw_spin_unlock(lock);
 }
-EXPORT_SYMBOL(_spin_unlock);
+EXPORT_SYMBOL(_raw_spin_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 {
-	__spin_unlock_irqrestore(lock, flags);
+	__raw_spin_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_spin_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _spin_unlock_irq(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
 {
-	__spin_unlock_irq(lock);
+	__raw_spin_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_spin_unlock_irq);
+EXPORT_SYMBOL(_raw_spin_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _spin_unlock_bh(raw_spinlock_t *lock)
+void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
 {
-	__spin_unlock_bh(lock);
+	__raw_spin_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_spin_unlock_bh);
+EXPORT_SYMBOL(_raw_spin_unlock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_READ_TRYLOCK
-int __lockfunc _read_trylock(rwlock_t *lock)
+int __lockfunc _raw_read_trylock(rwlock_t *lock)
 {
-	return __read_trylock(lock);
+	return __raw_read_trylock(lock);
 }
-EXPORT_SYMBOL(_read_trylock);
+EXPORT_SYMBOL(_raw_read_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _read_lock(rwlock_t *lock)
+void __lockfunc _raw_read_lock(rwlock_t *lock)
 {
-	__read_lock(lock);
+	__raw_read_lock(lock);
 }
-EXPORT_SYMBOL(_read_lock);
+EXPORT_SYMBOL(_raw_read_lock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
 {
-	return __read_lock_irqsave(lock);
+	return __raw_read_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_read_lock_irqsave);
+EXPORT_SYMBOL(_raw_read_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_IRQ
-void __lockfunc _read_lock_irq(rwlock_t *lock)
+void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
 {
-	__read_lock_irq(lock);
+	__raw_read_lock_irq(lock);
 }
-EXPORT_SYMBOL(_read_lock_irq);
+EXPORT_SYMBOL(_raw_read_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _read_lock_bh(rwlock_t *lock)
+void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
 {
-	__read_lock_bh(lock);
+	__raw_read_lock_bh(lock);
 }
-EXPORT_SYMBOL(_read_lock_bh);
+EXPORT_SYMBOL(_raw_read_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _read_unlock(rwlock_t *lock)
+void __lockfunc _raw_read_unlock(rwlock_t *lock)
 {
-	__read_unlock(lock);
+	__raw_read_unlock(lock);
 }
-EXPORT_SYMBOL(_read_unlock);
+EXPORT_SYMBOL(_raw_read_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	__read_unlock_irqrestore(lock, flags);
+	__raw_read_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_read_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _read_unlock_irq(rwlock_t *lock)
+void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
 {
-	__read_unlock_irq(lock);
+	__raw_read_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_read_unlock_irq);
+EXPORT_SYMBOL(_raw_read_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _read_unlock_bh(rwlock_t *lock)
+void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
 {
-	__read_unlock_bh(lock);
+	__raw_read_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_read_unlock_bh);
+EXPORT_SYMBOL(_raw_read_unlock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_TRYLOCK
-int __lockfunc _write_trylock(rwlock_t *lock)
+int __lockfunc _raw_write_trylock(rwlock_t *lock)
 {
-	return __write_trylock(lock);
+	return __raw_write_trylock(lock);
 }
-EXPORT_SYMBOL(_write_trylock);
+EXPORT_SYMBOL(_raw_write_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _write_lock(rwlock_t *lock)
+void __lockfunc _raw_write_lock(rwlock_t *lock)
 {
-	__write_lock(lock);
+	__raw_write_lock(lock);
 }
-EXPORT_SYMBOL(_write_lock);
+EXPORT_SYMBOL(_raw_write_lock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
 {
-	return __write_lock_irqsave(lock);
+	return __raw_write_lock_irqsave(lock);
 }
-EXPORT_SYMBOL(_write_lock_irqsave);
+EXPORT_SYMBOL(_raw_write_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _write_lock_irq(rwlock_t *lock)
+void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
 {
-	__write_lock_irq(lock);
+	__raw_write_lock_irq(lock);
 }
-EXPORT_SYMBOL(_write_lock_irq);
+EXPORT_SYMBOL(_raw_write_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _write_lock_bh(rwlock_t *lock)
+void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
 {
-	__write_lock_bh(lock);
+	__raw_write_lock_bh(lock);
 }
-EXPORT_SYMBOL(_write_lock_bh);
+EXPORT_SYMBOL(_raw_write_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _write_unlock(rwlock_t *lock)
+void __lockfunc _raw_write_unlock(rwlock_t *lock)
 {
-	__write_unlock(lock);
+	__raw_write_unlock(lock);
 }
-EXPORT_SYMBOL(_write_unlock);
+EXPORT_SYMBOL(_raw_write_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	__write_unlock_irqrestore(lock, flags);
+	__raw_write_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(_write_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _write_unlock_irq(rwlock_t *lock)
+void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
 {
-	__write_unlock_irq(lock);
+	__raw_write_unlock_irq(lock);
 }
-EXPORT_SYMBOL(_write_unlock_irq);
+EXPORT_SYMBOL(_raw_write_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _write_unlock_bh(rwlock_t *lock)
+void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
 {
-	__write_unlock_bh(lock);
+	__raw_write_unlock_bh(lock);
 }
-EXPORT_SYMBOL(_write_unlock_bh);
+EXPORT_SYMBOL(_raw_write_unlock_bh);
 #endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-void __lockfunc _spin_lock_nested(raw_spinlock_t *lock, int subclass)
+void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
 {
 	preempt_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
-EXPORT_SYMBOL(_spin_lock_nested);
+EXPORT_SYMBOL(_raw_spin_lock_nested);
 
-unsigned long __lockfunc _spin_lock_irqsave_nested(raw_spinlock_t *lock,
+unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
 						   int subclass)
 {
 	unsigned long flags;
@@ -361,16 +361,16 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(raw_spinlock_t *lock,
 				do_raw_spin_lock_flags, &flags);
 	return flags;
 }
-EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
 
-void __lockfunc _spin_lock_nest_lock(raw_spinlock_t *lock,
+void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
 				     struct lockdep_map *nest_lock)
 {
 	preempt_disable();
 	spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 }
-EXPORT_SYMBOL(_spin_lock_nest_lock);
+EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
 
 #endif
 
-- 
cgit v1.2.3


From a26724591edba5acc528d41f3906a972590e8f54 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 14:46:14 +0100
Subject: plist: Make plist debugging raw_spinlock aware

plists are used with spinlocks and raw_spinlocks. Change the plist
debugging to handle both types.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/plist.h | 43 +++++++++++++++++++++++++++++++++++++------
 kernel/futex.c        |  6 +++---
 lib/plist.c           |  8 +++++---
 3 files changed, 45 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 45926d77d6ac..8227f717c70f 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -81,7 +81,8 @@ struct plist_head {
 	struct list_head prio_list;
 	struct list_head node_list;
 #ifdef CONFIG_DEBUG_PI_LIST
-	spinlock_t *lock;
+	raw_spinlock_t *rawlock;
+	spinlock_t *spinlock;
 #endif
 };
 
@@ -91,9 +92,11 @@ struct plist_node {
 };
 
 #ifdef CONFIG_DEBUG_PI_LIST
-# define PLIST_HEAD_LOCK_INIT(_lock)	.lock = _lock
+# define PLIST_HEAD_LOCK_INIT(_lock)		.spinlock = _lock
+# define PLIST_HEAD_LOCK_INIT_RAW(_lock)	.rawlock = _lock
 #else
 # define PLIST_HEAD_LOCK_INIT(_lock)
+# define PLIST_HEAD_LOCK_INIT_RAW(_lock)
 #endif
 
 #define _PLIST_HEAD_INIT(head)				\
@@ -107,10 +110,21 @@ struct plist_node {
  */
 #define PLIST_HEAD_INIT(head, _lock)			\
 {							\
-        _PLIST_HEAD_INIT(head),                         \
+	_PLIST_HEAD_INIT(head),				\
 	PLIST_HEAD_LOCK_INIT(&(_lock))			\
 }
 
+/**
+ * PLIST_HEAD_INIT_RAW - static struct plist_head initializer
+ * @head:	struct plist_head variable name
+ * @_lock:	lock to initialize for this list
+ */
+#define PLIST_HEAD_INIT_RAW(head, _lock)		\
+{							\
+	_PLIST_HEAD_INIT(head),				\
+	PLIST_HEAD_LOCK_INIT_RAW(&(_lock))		\
+}
+
 /**
  * PLIST_NODE_INIT - static struct plist_node initializer
  * @node:	struct plist_node variable name
@@ -119,13 +133,13 @@ struct plist_node {
 #define PLIST_NODE_INIT(node, __prio)			\
 {							\
 	.prio  = (__prio),				\
-	.plist = { _PLIST_HEAD_INIT((node).plist) }, 	\
+	.plist = { _PLIST_HEAD_INIT((node).plist) },	\
 }
 
 /**
  * plist_head_init - dynamic struct plist_head initializer
  * @head:	&struct plist_head pointer
- * @lock:	list spinlock, remembered for debugging
+ * @lock:	spinlock protecting the list (debugging)
  */
 static inline void
 plist_head_init(struct plist_head *head, spinlock_t *lock)
@@ -133,7 +147,24 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
 	INIT_LIST_HEAD(&head->prio_list);
 	INIT_LIST_HEAD(&head->node_list);
 #ifdef CONFIG_DEBUG_PI_LIST
-	head->lock = lock;
+	head->spinlock = lock;
+	head->rawlock = NULL;
+#endif
+}
+
+/**
+ * plist_head_init_raw - dynamic struct plist_head initializer
+ * @head:	&struct plist_head pointer
+ * @lock:	raw_spinlock protecting the list (debugging)
+ */
+static inline void
+plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
+{
+	INIT_LIST_HEAD(&head->prio_list);
+	INIT_LIST_HEAD(&head->node_list);
+#ifdef CONFIG_DEBUG_PI_LIST
+	head->rawlock = lock;
+	head->spinlock = NULL;
 #endif
 }
 
diff --git a/kernel/futex.c b/kernel/futex.c
index d73ef1f3e55d..6af474df17bb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 		plist_add(&q->list, &hb2->chain);
 		q->lock_ptr = &hb2->lock;
 #ifdef CONFIG_DEBUG_PI_LIST
-		q->list.plist.lock = &hb2->lock;
+		q->list.plist.spinlock = &hb2->lock;
 #endif
 	}
 	get_futex_key_refs(key2);
@@ -1046,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
 
 	q->lock_ptr = &hb->lock;
 #ifdef CONFIG_DEBUG_PI_LIST
-	q->list.plist.lock = &hb->lock;
+	q->list.plist.spinlock = &hb->lock;
 #endif
 
 	wake_up_state(q->task, TASK_NORMAL);
@@ -1394,7 +1394,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
 
 	plist_node_init(&q->list, prio);
 #ifdef CONFIG_DEBUG_PI_LIST
-	q->list.plist.lock = &hb->lock;
+	q->list.plist.spinlock = &hb->lock;
 #endif
 	plist_add(&q->list, &hb->chain);
 	q->task = current;
diff --git a/lib/plist.c b/lib/plist.c
index d6c64a824e1d..1471988d9190 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -54,9 +54,11 @@ static void plist_check_list(struct list_head *top)
 
 static void plist_check_head(struct plist_head *head)
 {
-	WARN_ON(!head->lock);
-	if (head->lock)
-		WARN_ON_SMP(!spin_is_locked(head->lock));
+	WARN_ON(!head->rawlock && !head->spinlock);
+	if (head->rawlock)
+		WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
+	if (head->spinlock)
+		WARN_ON_SMP(!spin_is_locked(head->spinlock));
 	plist_check_list(&head->prio_list);
 	plist_check_list(&head->node_list);
 }
-- 
cgit v1.2.3


From 1d615482547584b9a8bb6316a58fed6ce90dd9ff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 14:54:03 +0100
Subject: sched: Convert pi_lock to raw_spinlock

Convert locks which cannot be sleeping locks in preempt-rt to
raw_spinlocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h |  2 +-
 include/linux/sched.h     |  2 +-
 kernel/exit.c             |  2 +-
 kernel/fork.c             |  4 ++--
 kernel/futex.c            | 38 +++++++++++++++----------------
 kernel/rtmutex-debug.c    |  4 ++--
 kernel/rtmutex.c          | 58 +++++++++++++++++++++++------------------------
 kernel/sched.c            | 12 +++++-----
 8 files changed, 61 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 8d10aa7fd4c9..abec69b63d7e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -165,7 +165,7 @@ extern struct cred init_cred;
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
-	.pi_lock	= __SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
+	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
 	.timer_slack_ns = 50000, /* 50 usec default slack */		\
 	.pids = {							\
 		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 294eb2f80144..41a9ea322dce 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1409,7 +1409,7 @@ struct task_struct {
 #endif
 
 	/* Protection of the PI data structures: */
-	spinlock_t pi_lock;
+	raw_spinlock_t pi_lock;
 
 #ifdef CONFIG_RT_MUTEXES
 	/* PI waiters blocked on a rt_mutex held by this task */
diff --git a/kernel/exit.c b/kernel/exit.c
index 6f50ef55a6f3..5962d7ccf243 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -933,7 +933,7 @@ NORET_TYPE void do_exit(long code)
 	 * an exiting task cleaning up the robust pi futexes.
 	 */
 	smp_mb();
-	spin_unlock_wait(&tsk->pi_lock);
+	raw_spin_unlock_wait(&tsk->pi_lock);
 
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
diff --git a/kernel/fork.c b/kernel/fork.c
index 1415dc4598ae..9bd91447e052 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -939,9 +939,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 
 static void rt_mutex_init_task(struct task_struct *p)
 {
-	spin_lock_init(&p->pi_lock);
+	raw_spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
-	plist_head_init(&p->pi_waiters, &p->pi_lock);
+	plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
 	p->pi_blocked_on = NULL;
 #endif
 }
diff --git a/kernel/futex.c b/kernel/futex.c
index 6af474df17bb..320b369d20b5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -403,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
 	 * and has cleaned up the pi_state already
 	 */
 	if (pi_state->owner) {
-		spin_lock_irq(&pi_state->owner->pi_lock);
+		raw_spin_lock_irq(&pi_state->owner->pi_lock);
 		list_del_init(&pi_state->list);
-		spin_unlock_irq(&pi_state->owner->pi_lock);
+		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
 
 		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
 	}
@@ -470,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr)
 	 * pi_state_list anymore, but we have to be careful
 	 * versus waiters unqueueing themselves:
 	 */
-	spin_lock_irq(&curr->pi_lock);
+	raw_spin_lock_irq(&curr->pi_lock);
 	while (!list_empty(head)) {
 
 		next = head->next;
 		pi_state = list_entry(next, struct futex_pi_state, list);
 		key = pi_state->key;
 		hb = hash_futex(&key);
-		spin_unlock_irq(&curr->pi_lock);
+		raw_spin_unlock_irq(&curr->pi_lock);
 
 		spin_lock(&hb->lock);
 
-		spin_lock_irq(&curr->pi_lock);
+		raw_spin_lock_irq(&curr->pi_lock);
 		/*
 		 * We dropped the pi-lock, so re-check whether this
 		 * task still owns the PI-state:
@@ -495,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr)
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
 		pi_state->owner = NULL;
-		spin_unlock_irq(&curr->pi_lock);
+		raw_spin_unlock_irq(&curr->pi_lock);
 
 		rt_mutex_unlock(&pi_state->pi_mutex);
 
 		spin_unlock(&hb->lock);
 
-		spin_lock_irq(&curr->pi_lock);
+		raw_spin_lock_irq(&curr->pi_lock);
 	}
-	spin_unlock_irq(&curr->pi_lock);
+	raw_spin_unlock_irq(&curr->pi_lock);
 }
 
 static int
@@ -558,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 	 * change of the task flags, we do this protected by
 	 * p->pi_lock:
 	 */
-	spin_lock_irq(&p->pi_lock);
+	raw_spin_lock_irq(&p->pi_lock);
 	if (unlikely(p->flags & PF_EXITING)) {
 		/*
 		 * The task is on the way out. When PF_EXITPIDONE is
@@ -567,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 		 */
 		int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
 
-		spin_unlock_irq(&p->pi_lock);
+		raw_spin_unlock_irq(&p->pi_lock);
 		put_task_struct(p);
 		return ret;
 	}
@@ -586,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &p->pi_state_list);
 	pi_state->owner = p;
-	spin_unlock_irq(&p->pi_lock);
+	raw_spin_unlock_irq(&p->pi_lock);
 
 	put_task_struct(p);
 
@@ -794,16 +794,16 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 		}
 	}
 
-	spin_lock_irq(&pi_state->owner->pi_lock);
+	raw_spin_lock_irq(&pi_state->owner->pi_lock);
 	WARN_ON(list_empty(&pi_state->list));
 	list_del_init(&pi_state->list);
-	spin_unlock_irq(&pi_state->owner->pi_lock);
+	raw_spin_unlock_irq(&pi_state->owner->pi_lock);
 
-	spin_lock_irq(&new_owner->pi_lock);
+	raw_spin_lock_irq(&new_owner->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &new_owner->pi_state_list);
 	pi_state->owner = new_owner;
-	spin_unlock_irq(&new_owner->pi_lock);
+	raw_spin_unlock_irq(&new_owner->pi_lock);
 
 	spin_unlock(&pi_state->pi_mutex.wait_lock);
 	rt_mutex_unlock(&pi_state->pi_mutex);
@@ -1529,18 +1529,18 @@ retry:
 	 * itself.
 	 */
 	if (pi_state->owner != NULL) {
-		spin_lock_irq(&pi_state->owner->pi_lock);
+		raw_spin_lock_irq(&pi_state->owner->pi_lock);
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
-		spin_unlock_irq(&pi_state->owner->pi_lock);
+		raw_spin_unlock_irq(&pi_state->owner->pi_lock);
 	}
 
 	pi_state->owner = newowner;
 
-	spin_lock_irq(&newowner->pi_lock);
+	raw_spin_lock_irq(&newowner->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &newowner->pi_state_list);
-	spin_unlock_irq(&newowner->pi_lock);
+	raw_spin_unlock_irq(&newowner->pi_lock);
 	return 0;
 
 	/*
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5fcb4fe645e2..ddabb54bb5c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -37,8 +37,8 @@ do {								\
 	if (rt_trace_on) {					\
 		rt_trace_on = 0;				\
 		console_verbose();				\
-		if (spin_is_locked(&current->pi_lock))		\
-			spin_unlock(&current->pi_lock);		\
+		if (raw_spin_is_locked(&current->pi_lock))	\
+			raw_spin_unlock(&current->pi_lock);	\
 	}							\
 } while (0)
 
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 29bd4baf9e75..d33da470f9da 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -138,9 +138,9 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
 	__rt_mutex_adjust_prio(task);
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 }
 
 /*
@@ -195,7 +195,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	/*
 	 * Task can not go away as we did a get_task() before !
 	 */
-	spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
 
 	waiter = task->pi_blocked_on;
 	/*
@@ -232,7 +232,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 
 	lock = waiter->lock;
 	if (!spin_trylock(&lock->wait_lock)) {
-		spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 		cpu_relax();
 		goto retry;
 	}
@@ -253,13 +253,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	plist_add(&waiter->list_entry, &lock->wait_list);
 
 	/* Release the task */
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 	put_task_struct(task);
 
 	/* Grab the next task */
 	task = rt_mutex_owner(lock);
 	get_task_struct(task);
-	spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		/* Boost the owner */
@@ -277,7 +277,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		__rt_mutex_adjust_prio(task);
 	}
 
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	top_waiter = rt_mutex_top_waiter(lock);
 	spin_unlock(&lock->wait_lock);
@@ -288,7 +288,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	goto again;
 
  out_unlock_pi:
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  out_put_task:
 	put_task_struct(task);
 
@@ -313,9 +313,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
 	if (pendowner == task)
 		return 1;
 
-	spin_lock_irqsave(&pendowner->pi_lock, flags);
+	raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
 	if (task->prio >= pendowner->prio) {
-		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 0;
 	}
 
@@ -325,7 +325,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
 	 * priority.
 	 */
 	if (likely(!rt_mutex_has_waiters(lock))) {
-		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 1;
 	}
 
@@ -333,7 +333,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
 	next = rt_mutex_top_waiter(lock);
 	plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
 	__rt_mutex_adjust_prio(pendowner);
-	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 
 	/*
 	 * We are going to steal the lock and a waiter was
@@ -350,10 +350,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
 	 * might be task:
 	 */
 	if (likely(next->task != task)) {
-		spin_lock_irqsave(&task->pi_lock, flags);
+		raw_spin_lock_irqsave(&task->pi_lock, flags);
 		plist_add(&next->pi_list_entry, &task->pi_waiters);
 		__rt_mutex_adjust_prio(task);
-		spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 	}
 	return 1;
 }
@@ -420,7 +420,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	unsigned long flags;
 	int chain_walk = 0, res;
 
-	spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
 	__rt_mutex_adjust_prio(task);
 	waiter->task = task;
 	waiter->lock = lock;
@@ -434,17 +434,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
 	task->pi_blocked_on = waiter;
 
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
-		spin_lock_irqsave(&owner->pi_lock, flags);
+		raw_spin_lock_irqsave(&owner->pi_lock, flags);
 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 		plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 
 		__rt_mutex_adjust_prio(owner);
 		if (owner->pi_blocked_on)
 			chain_walk = 1;
-		spin_unlock_irqrestore(&owner->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
 		chain_walk = 1;
@@ -483,7 +483,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
 	struct task_struct *pendowner;
 	unsigned long flags;
 
-	spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock_irqsave(&current->pi_lock, flags);
 
 	waiter = rt_mutex_top_waiter(lock);
 	plist_del(&waiter->list_entry, &lock->wait_list);
@@ -500,7 +500,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
 
 	rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
 
-	spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 
 	/*
 	 * Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +509,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
 	 * waiter with higher priority than pending-owner->normal_prio
 	 * is blocked on the unboosted (pending) owner.
 	 */
-	spin_lock_irqsave(&pendowner->pi_lock, flags);
+	raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
 
 	WARN_ON(!pendowner->pi_blocked_on);
 	WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -523,7 +523,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
 		next = rt_mutex_top_waiter(lock);
 		plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
 	}
-	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 
 	wake_up_process(pendowner);
 }
@@ -541,15 +541,15 @@ static void remove_waiter(struct rt_mutex *lock,
 	unsigned long flags;
 	int chain_walk = 0;
 
-	spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock_irqsave(&current->pi_lock, flags);
 	plist_del(&waiter->list_entry, &lock->wait_list);
 	waiter->task = NULL;
 	current->pi_blocked_on = NULL;
-	spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 
 	if (first && owner != current) {
 
-		spin_lock_irqsave(&owner->pi_lock, flags);
+		raw_spin_lock_irqsave(&owner->pi_lock, flags);
 
 		plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
 
@@ -564,7 +564,7 @@ static void remove_waiter(struct rt_mutex *lock,
 		if (owner->pi_blocked_on)
 			chain_walk = 1;
 
-		spin_unlock_irqrestore(&owner->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 
 	WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -592,15 +592,15 @@ void rt_mutex_adjust_pi(struct task_struct *task)
 	struct rt_mutex_waiter *waiter;
 	unsigned long flags;
 
-	spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
 
 	waiter = task->pi_blocked_on;
 	if (!waiter || waiter->list_entry.prio == task->prio) {
-		spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 		return;
 	}
 
-	spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
 	get_task_struct(task);
diff --git a/kernel/sched.c b/kernel/sched.c
index 01c5016e57f1..18cceeecce35 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6323,7 +6323,7 @@ recheck:
 	 * make sure no PI-waiters arrive (or leave) while we are
 	 * changing the priority of the task:
 	 */
-	spin_lock_irqsave(&p->pi_lock, flags);
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	/*
 	 * To be able to change p->policy safely, the apropriate
 	 * runqueue lock must be held.
@@ -6333,7 +6333,7 @@ recheck:
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
 		__task_rq_unlock(rq);
-		spin_unlock_irqrestore(&p->pi_lock, flags);
+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
 	update_rq_clock(rq);
@@ -6357,7 +6357,7 @@ recheck:
 		check_class_changed(rq, p, prev_class, oldprio, running);
 	}
 	__task_rq_unlock(rq);
-	spin_unlock_irqrestore(&p->pi_lock, flags);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 	rt_mutex_adjust_pi(p);
 
@@ -9624,7 +9624,7 @@ void __init sched_init(void)
 #endif
 
 #ifdef CONFIG_RT_MUTEXES
-	plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+	plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
 #endif
 
 	/*
@@ -9749,13 +9749,13 @@ void normalize_rt_tasks(void)
 			continue;
 		}
 
-		spin_lock(&p->pi_lock);
+		raw_spin_lock(&p->pi_lock);
 		rq = __task_rq_lock(p);
 
 		normalize_task(rq, p);
 
 		__task_rq_unlock(rq);
-		spin_unlock(&p->pi_lock);
+		raw_spin_unlock(&p->pi_lock);
 	} while_each_thread(g, p);
 
 	read_unlock_irqrestore(&tasklist_lock, flags);
-- 
cgit v1.2.3


From d209d74d52ab39dc071656533cac095294f70de7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 18:22:11 +0100
Subject: rtmutes: Convert rtmutex.lock to raw_spinlock

Convert locks which cannot be sleeping locks in preempt-rt to
raw_spinlocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rtmutex.h |  6 +++---
 kernel/futex.c          |  6 +++---
 kernel/rtmutex.c        | 48 ++++++++++++++++++++++++------------------------
 3 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index f19b00b7d530..281d8fd775e8 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -24,7 +24,7 @@
  * @owner:	the mutex owner
  */
 struct rt_mutex {
-	spinlock_t		wait_lock;
+	raw_spinlock_t		wait_lock;
 	struct plist_head	wait_list;
 	struct task_struct	*owner;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
@@ -63,8 +63,8 @@ struct hrtimer_sleeper;
 #endif
 
 #define __RT_MUTEX_INITIALIZER(mutexname) \
-	{ .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
-	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \
+	{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+	, .wait_list = PLIST_HEAD_INIT_RAW(mutexname.wait_list, mutexname.wait_lock) \
 	, .owner = NULL \
 	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 320b369d20b5..8e3c3ffe1b9a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -760,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	if (!pi_state)
 		return -EINVAL;
 
-	spin_lock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_lock(&pi_state->pi_mutex.wait_lock);
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
 	/*
@@ -789,7 +789,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 		else if (curval != uval)
 			ret = -EINVAL;
 		if (ret) {
-			spin_unlock(&pi_state->pi_mutex.wait_lock);
+			raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 			return ret;
 		}
 	}
@@ -805,7 +805,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	pi_state->owner = new_owner;
 	raw_spin_unlock_irq(&new_owner->pi_lock);
 
-	spin_unlock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 	rt_mutex_unlock(&pi_state->pi_mutex);
 
 	return 0;
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index d33da470f9da..a9604815786a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -231,7 +231,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		goto out_unlock_pi;
 
 	lock = waiter->lock;
-	if (!spin_trylock(&lock->wait_lock)) {
+	if (!raw_spin_trylock(&lock->wait_lock)) {
 		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 		cpu_relax();
 		goto retry;
@@ -240,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	/* Deadlock detection */
 	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
-		spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&lock->wait_lock);
 		ret = deadlock_detect ? -EDEADLK : 0;
 		goto out_unlock_pi;
 	}
@@ -280,7 +280,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	top_waiter = rt_mutex_top_waiter(lock);
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	if (!detect_deadlock && waiter != top_waiter)
 		goto out_put_task;
@@ -459,12 +459,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	 */
 	get_task_struct(owner);
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
 					 task);
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	return res;
 }
@@ -575,11 +575,11 @@ static void remove_waiter(struct rt_mutex *lock,
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
 	get_task_struct(owner);
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 }
 
 /*
@@ -672,14 +672,14 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 				break;
 		}
 
-		spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&lock->wait_lock);
 
 		debug_rt_mutex_print_deadlock(waiter);
 
 		if (waiter->task)
 			schedule_rt_mutex(lock);
 
-		spin_lock(&lock->wait_lock);
+		raw_spin_lock(&lock->wait_lock);
 		set_current_state(state);
 	}
 
@@ -700,11 +700,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	debug_rt_mutex_init_waiter(&waiter);
 	waiter.task = NULL;
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	/* Try to acquire the lock again: */
 	if (try_to_take_rt_mutex(lock)) {
-		spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&lock->wait_lock);
 		return 0;
 	}
 
@@ -731,7 +731,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	/* Remove pending timer: */
 	if (unlikely(timeout))
@@ -758,7 +758,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
 	int ret = 0;
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	if (likely(rt_mutex_owner(lock) != current)) {
 
@@ -770,7 +770,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
 		fixup_rt_mutex_waiters(lock);
 	}
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	return ret;
 }
@@ -781,7 +781,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
 static void __sched
 rt_mutex_slowunlock(struct rt_mutex *lock)
 {
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	debug_rt_mutex_unlock(lock);
 
@@ -789,13 +789,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
 
 	if (!rt_mutex_has_waiters(lock)) {
 		lock->owner = NULL;
-		spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&lock->wait_lock);
 		return;
 	}
 
 	wakeup_next_waiter(lock);
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	/* Undo pi boosting if necessary: */
 	rt_mutex_adjust_prio(current);
@@ -970,8 +970,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
 {
 	lock->owner = NULL;
-	spin_lock_init(&lock->wait_lock);
-	plist_head_init(&lock->wait_list, &lock->wait_lock);
+	raw_spin_lock_init(&lock->wait_lock);
+	plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
 
 	debug_rt_mutex_init(lock, name);
 }
@@ -1032,7 +1032,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
 	int ret;
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	mark_rt_mutex_waiters(lock);
 
@@ -1040,7 +1040,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 		/* We got the lock for task. */
 		debug_rt_mutex_lock(lock);
 		rt_mutex_set_owner(lock, task, 0);
-		spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&lock->wait_lock);
 		rt_mutex_deadlock_account_lock(lock, task);
 		return 1;
 	}
@@ -1056,7 +1056,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 		 */
 		ret = 0;
 	}
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	debug_rt_mutex_print_deadlock(waiter);
 
@@ -1106,7 +1106,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
 	int ret;
 
-	spin_lock(&lock->wait_lock);
+	raw_spin_lock(&lock->wait_lock);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1124,7 +1124,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&lock->wait_lock);
 
 	/*
 	 * Readjust priority, when we did not get the lock. We might have been
-- 
cgit v1.2.3


From 239007b8440abff689632f50cdf0f2b9e895b534 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 16:46:45 +0100
Subject: genirq: Convert irq_desc.lock to raw_spinlock

Convert locks which cannot be sleeping locks in preempt-rt to
raw_spinlocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/irq.c                 |  4 +-
 arch/arm/include/asm/mach/irq.h         |  4 +-
 arch/arm/kernel/irq.c                   | 12 ++---
 arch/arm/mach-ns9xxx/irq.c              |  8 +--
 arch/avr32/kernel/irq.c                 |  4 +-
 arch/blackfin/kernel/irqchip.c          |  6 +--
 arch/blackfin/kernel/traps.c            |  4 +-
 arch/cris/kernel/irq.c                  |  4 +-
 arch/frv/kernel/irq.c                   |  4 +-
 arch/h8300/kernel/irq.c                 |  4 +-
 arch/ia64/kernel/iosapic.c              |  6 +--
 arch/ia64/kernel/irq.c                  |  4 +-
 arch/ia64/kernel/irq_ia64.c             |  4 +-
 arch/m32r/kernel/irq.c                  |  4 +-
 arch/microblaze/kernel/irq.c            |  4 +-
 arch/mips/kernel/irq.c                  |  4 +-
 arch/mips/vr41xx/common/icu.c           | 92 ++++++++++++++++-----------------
 arch/mn10300/kernel/irq.c               |  4 +-
 arch/parisc/kernel/irq.c                |  4 +-
 arch/powerpc/kernel/irq.c               |  8 +--
 arch/powerpc/platforms/52xx/media5200.c |  8 +--
 arch/powerpc/platforms/cell/interrupt.c |  8 +--
 arch/powerpc/platforms/iseries/irq.c    |  4 +-
 arch/powerpc/platforms/pseries/xics.c   |  4 +-
 arch/powerpc/sysdev/fsl_msi.c           |  4 +-
 arch/powerpc/sysdev/uic.c               |  8 +--
 arch/sh/kernel/irq.c                    |  4 +-
 arch/sparc/kernel/irq_64.c              |  8 +--
 arch/um/kernel/irq.c                    |  4 +-
 arch/x86/kernel/apic/io_apic.c          |  4 +-
 arch/x86/kernel/irq.c                   | 14 ++---
 arch/xtensa/kernel/irq.c                |  4 +-
 include/linux/irq.h                     |  2 +-
 kernel/irq/autoprobe.c                  | 20 +++----
 kernel/irq/chip.c                       | 86 +++++++++++++++---------------
 kernel/irq/handle.c                     | 22 ++++----
 kernel/irq/internals.h                  |  2 +-
 kernel/irq/manage.c                     | 50 +++++++++---------
 kernel/irq/migration.c                  |  2 +-
 kernel/irq/numa_migrate.c               |  8 +--
 kernel/irq/pm.c                         |  8 +--
 kernel/irq/proc.c                       |  4 +-
 kernel/irq/spurious.c                   | 14 ++---
 43 files changed, 240 insertions(+), 240 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c0de072b8305..5f2cf23c4648 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -81,7 +81,7 @@ show_interrupts(struct seq_file *p, void *v)
 #endif
 
 	if (irq < ACTUAL_NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[irq].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[irq].lock, flags);
 		action = irq_desc[irq].action;
 		if (!action) 
 			goto unlock;
@@ -105,7 +105,7 @@ show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 unlock:
-		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	} else if (irq == ACTUAL_NR_IRQS) {
 #ifdef CONFIG_SMP
 		seq_puts(p, "IPI: ");
diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h
index acac5302e4ea..8920b2d6e3b8 100644
--- a/arch/arm/include/asm/mach/irq.h
+++ b/arch/arm/include/asm/mach/irq.h
@@ -26,9 +26,9 @@ extern int show_fiq_list(struct seq_file *, void *);
  */
 #define do_bad_IRQ(irq,desc)				\
 do {							\
-	spin_lock(&desc->lock);				\
+	raw_spin_lock(&desc->lock);			\
 	handle_bad_irq(irq, desc);			\
-	spin_unlock(&desc->lock);			\
+	raw_spin_unlock(&desc->lock);			\
 } while(0)
 
 #endif
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index c9a8619f3856..b7cb45bb91e8 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto unlock;
@@ -84,7 +84,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 unlock:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 #ifdef CONFIG_FIQ
 		show_fiq_list(p, v);
@@ -139,7 +139,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
 	}
 
 	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
 	if (iflags & IRQF_VALID)
 		desc->status &= ~IRQ_NOREQUEST;
@@ -147,7 +147,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
 		desc->status &= ~IRQ_NOPROBE;
 	if (!(iflags & IRQF_NOAUTOEN))
 		desc->status &= ~IRQ_NOAUTOEN;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void __init init_IRQ(void)
@@ -166,9 +166,9 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
 {
 	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->node, cpu);
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 	desc->chip->set_affinity(irq, cpumask_of(cpu));
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 }
 
 /*
diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c
index feb0e54a91de..038f24d47023 100644
--- a/arch/arm/mach-ns9xxx/irq.c
+++ b/arch/arm/mach-ns9xxx/irq.c
@@ -66,7 +66,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 
 	BUG_ON(desc->status & IRQ_INPROGRESS);
 
@@ -78,7 +78,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
 		goto out_mask;
 
 	desc->status |= IRQ_INPROGRESS;
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	action_ret = handle_IRQ_event(irq, action);
 
@@ -87,7 +87,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
 	 * Maybe this function should go to kernel/irq/chip.c? */
 	note_interrupt(irq, desc, action_ret);
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 
 	if (desc->status & IRQ_DISABLED)
@@ -97,7 +97,7 @@ out_mask:
 	/* ack unconditionally to unmask lower prio irqs */
 	desc->chip->ack(irq);
 
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 #define handle_irq handle_prio_irq
 #endif
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
index 9f572229d318..c8ab63e3a675 100644
--- a/arch/avr32/kernel/irq.c
+++ b/arch/avr32/kernel/irq.c
@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto unlock;
@@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 	unlock:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 
 	return 0;
diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index db9f9c91f11f..64cff54a8a58 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -23,7 +23,7 @@ void ack_bad_irq(unsigned int irq)
 
 static struct irq_desc bad_irq_desc = {
 	.handle_irq = handle_bad_irq,
-	.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+	.lock = __RAW_SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
 };
 
 #ifdef CONFIG_CPUMASK_OFFSTACK
@@ -39,7 +39,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	unsigned long flags;
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -53,7 +53,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
  skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 78cb3d38f899..9636bace00e8 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -1140,7 +1140,7 @@ void show_regs(struct pt_regs *fp)
 	if (fp->ipend & ~0x3F) {
 		for (i = 0; i < (NR_IRQS - 1); i++) {
 			if (!in_atomic)
-				spin_lock_irqsave(&irq_desc[i].lock, flags);
+				raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 
 			action = irq_desc[i].action;
 			if (!action)
@@ -1155,7 +1155,7 @@ void show_regs(struct pt_regs *fp)
 			verbose_printk("\n");
 unlock:
 			if (!in_atomic)
-				spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+				raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 		}
 	}
 
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 0ca7d9892cc6..b5ce0724a88f 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -52,7 +52,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 	return 0;
 }
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index af3e824b91b3..62d1aba615dc 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (action) {
 			seq_printf(p, "%3d: ", i);
@@ -85,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_putc(p, '\n');
 		}
 
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
 	}
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 5c913d472119..c25dc2c2b1da 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -186,7 +186,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_puts(p, "           CPU0");
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto unlock;
@@ -200,7 +200,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, ", %s", action->name);
 		seq_putc(p, '\n');
 unlock:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 	return 0;
 }
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index dab4d393908c..95ac77aeae9b 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -793,12 +793,12 @@ iosapic_register_intr (unsigned int gsi,
 			goto unlock_iosapic_lock;
 	}
 
-	spin_lock(&irq_desc[irq].lock);
+	raw_spin_lock(&irq_desc[irq].lock);
 	dest = get_target_cpu(gsi, irq);
 	dmode = choose_dmode();
 	err = register_intr(gsi, irq, dmode, polarity, trigger);
 	if (err < 0) {
-		spin_unlock(&irq_desc[irq].lock);
+		raw_spin_unlock(&irq_desc[irq].lock);
 		irq = err;
 		goto unlock_iosapic_lock;
 	}
@@ -817,7 +817,7 @@ iosapic_register_intr (unsigned int gsi,
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 	       cpu_logical_id(dest), dest, irq_to_vector(irq));
 
-	spin_unlock(&irq_desc[irq].lock);
+	raw_spin_unlock(&irq_desc[irq].lock);
  unlock_iosapic_lock:
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 	return irq;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7d8951229e7c..94ee9d067cbd 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -91,7 +91,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS)
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 	return 0;
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index dd9d7b54f1a1..70e4bad23432 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -345,7 +345,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
 
 		desc = irq_desc + irq;
 		cfg = irq_cfg + irq;
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
@@ -358,7 +358,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
 		spin_unlock_irqrestore(&vector_lock, flags);
 		cfg->move_cleanup_count--;
 	unlock:
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 	}
 	return IRQ_HANDLED;
 }
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 8dfd31e87c4c..3c71f776872c 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -40,7 +40,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -59,7 +59,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 	return 0;
 }
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index 7d5ddd62d4d2..0f06034d1fe0 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -68,7 +68,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < nr_irq) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -89,7 +89,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 	return 0;
 }
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 7b845ba9dff4..8b0b4181219f 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -118,7 +118,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_putc(p, '\n');
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c
index 6d39e222b170..6153b6a05ccf 100644
--- a/arch/mips/vr41xx/common/icu.c
+++ b/arch/mips/vr41xx/common/icu.c
@@ -159,9 +159,9 @@ void vr41xx_enable_piuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_set(MPIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -174,9 +174,9 @@ void vr41xx_disable_piuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_clear(MPIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -189,9 +189,9 @@ void vr41xx_enable_aiuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_set(MAIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -204,9 +204,9 @@ void vr41xx_disable_aiuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_clear(MAIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -219,9 +219,9 @@ void vr41xx_enable_kiuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_set(MKIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -234,9 +234,9 @@ void vr41xx_disable_kiuint(uint16_t mask)
 
 	if (current_cpu_type() == CPU_VR4111 ||
 	    current_cpu_type() == CPU_VR4121) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu1_clear(MKIUINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -247,9 +247,9 @@ void vr41xx_enable_macint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + ETHERNET_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu1_set(MMACINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_enable_macint);
@@ -259,9 +259,9 @@ void vr41xx_disable_macint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + ETHERNET_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu1_clear(MMACINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_disable_macint);
@@ -271,9 +271,9 @@ void vr41xx_enable_dsiuint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + DSIU_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu1_set(MDSIUINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_enable_dsiuint);
@@ -283,9 +283,9 @@ void vr41xx_disable_dsiuint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + DSIU_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu1_clear(MDSIUINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_disable_dsiuint);
@@ -295,9 +295,9 @@ void vr41xx_enable_firint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + FIR_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu2_set(MFIRINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_enable_firint);
@@ -307,9 +307,9 @@ void vr41xx_disable_firint(uint16_t mask)
 	struct irq_desc *desc = irq_desc + FIR_IRQ;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	icu2_clear(MFIRINTREG, mask);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 EXPORT_SYMBOL(vr41xx_disable_firint);
@@ -322,9 +322,9 @@ void vr41xx_enable_pciint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MPCIINTREG, PCIINT0);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -338,9 +338,9 @@ void vr41xx_disable_pciint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MPCIINTREG, 0);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -354,9 +354,9 @@ void vr41xx_enable_scuint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MSCUINTREG, SCUINT0);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -370,9 +370,9 @@ void vr41xx_disable_scuint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MSCUINTREG, 0);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -386,9 +386,9 @@ void vr41xx_enable_csiint(uint16_t mask)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_set(MCSIINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -402,9 +402,9 @@ void vr41xx_disable_csiint(uint16_t mask)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_clear(MCSIINTREG, mask);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -418,9 +418,9 @@ void vr41xx_enable_bcuint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MBCUINTREG, BCUINTR);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -434,9 +434,9 @@ void vr41xx_disable_bcuint(void)
 	if (current_cpu_type() == CPU_VR4122 ||
 	    current_cpu_type() == CPU_VR4131 ||
 	    current_cpu_type() == CPU_VR4133) {
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		icu2_write(MBCUINTREG, 0);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 
@@ -486,7 +486,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign)
 
 	pin = SYSINT1_IRQ_TO_PIN(irq);
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 
 	intassign0 = icu1_read(INTASSIGN0);
 	intassign1 = icu1_read(INTASSIGN1);
@@ -525,7 +525,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign)
 		intassign1 |= (uint16_t)assign << 9;
 		break;
 	default:
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 		return -EINVAL;
 	}
 
@@ -533,7 +533,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign)
 	icu1_write(INTASSIGN0, intassign0);
 	icu1_write(INTASSIGN1, intassign1);
 
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 
 	return 0;
 }
@@ -546,7 +546,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign)
 
 	pin = SYSINT2_IRQ_TO_PIN(irq);
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 
 	intassign2 = icu1_read(INTASSIGN2);
 	intassign3 = icu1_read(INTASSIGN3);
@@ -593,7 +593,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign)
 		intassign3 |= (uint16_t)assign << 12;
 		break;
 	default:
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 		return -EINVAL;
 	}
 
@@ -601,7 +601,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign)
 	icu1_write(INTASSIGN2, intassign2);
 	icu1_write(INTASSIGN3, intassign3);
 
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 
 	return 0;
 }
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 4c3c58ef5cda..e2d5ed891f37 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -215,7 +215,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		/* display information rows, one per active CPU */
 	case 1 ... NR_IRQS - 1:
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 
 		action = irq_desc[i].action;
 		if (action) {
@@ -235,7 +235,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_putc(p, '\n');
 		}
 
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 		break;
 
 		/* polish off with NMI and error counters */
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 2e7610cb33d5..f47465e8d040 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -180,7 +180,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (i < NR_IRQS) {
 		struct irqaction *action;
 
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -224,7 +224,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
  skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	}
 
 	return 0;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f6dca4f4b295..9040330b0530 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -210,7 +210,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (!desc)
 		return 0;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	action = desc->action;
 	if (!action || !action->handler)
@@ -237,7 +237,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	seq_putc(p, '\n');
 
 skip:
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return 0;
 }
@@ -1112,7 +1112,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 		if (!desc)
 			continue;
 
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 
 		if (desc->action && desc->action->handler) {
 			seq_printf(m, "%5d  ", i);
@@ -1131,7 +1131,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			seq_printf(m, "%s\n", p);
 		}
 
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index cc0c854291d7..0bac3a3dbecf 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -86,9 +86,9 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
 	u32 status, enable;
 
 	/* Mask off the cascaded IRQ */
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->chip->mask(virq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	/* Ask the FPGA for IRQ status.  If 'val' is 0, then no irqs
 	 * are pending.  'ffs()' is 1 based */
@@ -104,11 +104,11 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
 	}
 
 	/* Processing done; can reenable the cascade now */
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->chip->ack(virq);
 	if (!(desc->status & IRQ_DISABLED))
 		desc->chip->unmask(virq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 static int media5200_irq_map(struct irq_host *h, unsigned int virq,
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 7267effc8078..6829cf7e2bda 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -237,7 +237,7 @@ extern int noirqdebug;
 
 static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
 {
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
 
@@ -265,18 +265,18 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
 			goto out_eoi;
 
 		desc->status &= ~IRQ_PENDING;
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 		action_ret = handle_IRQ_event(irq, action);
 		if (!noirqdebug)
 			note_interrupt(irq, desc, action_ret);
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 
 	} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
 
 	desc->status &= ~IRQ_INPROGRESS;
 out_eoi:
 	desc->chip->eoi(irq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 static int iic_host_map(struct irq_host *h, unsigned int virq,
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 07762259c60a..86c4b29eea89 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -217,9 +217,9 @@ void __init iSeries_activate_IRQs()
 		struct irq_desc *desc = irq_to_desc(irq);
 
 		if (desc && desc->chip && desc->chip->startup) {
-			spin_lock_irqsave(&desc->lock, flags);
+			raw_spin_lock_irqsave(&desc->lock, flags);
 			desc->chip->startup(irq);
-			spin_unlock_irqrestore(&desc->lock, flags);
+			raw_spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
 }
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 7d01b58f3989..b9b9e11609ec 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -906,7 +906,7 @@ void xics_migrate_irqs_away(void)
 		    || desc->chip->set_affinity == NULL)
 			continue;
 
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 
 		status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 		if (status) {
@@ -930,7 +930,7 @@ void xics_migrate_irqs_away(void)
 		cpumask_setall(irq_to_desc(virq)->affinity);
 		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 #endif
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 62e50258cdef..c6e11b077108 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -173,7 +173,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	u32 intr_index;
 	u32 have_shift = 0;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	if ((msi_data->feature &  FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) {
 		if (desc->chip->mask_ack)
 			desc->chip->mask_ack(irq);
@@ -225,7 +225,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		break;
 	}
 unlock:
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 static int __devinit fsl_of_msi_probe(struct of_device *dev,
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 7d10074b3304..6f220a913e42 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -225,12 +225,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
 	int src;
 	int subvirq;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	if (desc->status & IRQ_LEVEL)
 		desc->chip->mask(virq);
 	else
 		desc->chip->mask_ack(virq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	msr = mfdcr(uic->dcrbase + UIC_MSR);
 	if (!msr) /* spurious interrupt */
@@ -242,12 +242,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
 	generic_handle_irq(subvirq);
 
 uic_irq_ret:
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	if (desc->status & IRQ_LEVEL)
 		desc->chip->ack(virq);
 	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(virq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 static struct uic * __init uic_init_one(struct device_node *node)
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index e1913f28f418..d2d41d046657 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (!desc)
 		return 0;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
 		any_count |= kstat_irqs_cpu(i, j);
 	action = desc->action;
@@ -97,7 +97,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	seq_putc(p, '\n');
 out:
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 #endif
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index ce996f97855f..8d6882bb480a 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -176,7 +176,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -195,7 +195,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
@@ -785,14 +785,14 @@ void fixup_irqs(void)
 	for (irq = 0; irq < NR_IRQS; irq++) {
 		unsigned long flags;
 
-		spin_lock_irqsave(&irq_desc[irq].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[irq].lock, flags);
 		if (irq_desc[irq].action &&
 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 			if (irq_desc[irq].chip->set_affinity)
 				irq_desc[irq].chip->set_affinity(irq,
 					irq_desc[irq].affinity);
 		}
-		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	}
 
 	tick_ops->disable_irq();
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 039270b9b73b..89474ba0741e 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -34,7 +34,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -53,7 +53,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS)
 		seq_putc(p, '\n');
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d5d498fbee4b..11a5851f1f50 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2431,7 +2431,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 			continue;
 
 		cfg = irq_cfg(irq);
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 
 		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
@@ -2450,7 +2450,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		}
 		__get_cpu_var(vector_irq)[vector] = -1;
 unlock:
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 	}
 
 	irq_exit();
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 664bcb7384ac..91fd0c70a18a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (!desc)
 		return 0;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
 		any_count |= kstat_irqs_cpu(i, j);
 	action = desc->action;
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	seq_putc(p, '\n');
 out:
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
@@ -294,12 +294,12 @@ void fixup_irqs(void)
 			continue;
 
 		/* interrupt's are disabled at this point */
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 
 		affinity = desc->affinity;
 		if (!irq_has_action(irq) ||
 		    cpumask_equal(affinity, cpu_online_mask)) {
-			spin_unlock(&desc->lock);
+			raw_spin_unlock(&desc->lock);
 			continue;
 		}
 
@@ -326,7 +326,7 @@ void fixup_irqs(void)
 		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
 			desc->chip->unmask(irq);
 
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
 			printk("Broke affinity for irq %i\n", irq);
@@ -356,10 +356,10 @@ void fixup_irqs(void)
 			irq = __get_cpu_var(vector_irq)[vector];
 
 			desc = irq_to_desc(irq);
-			spin_lock(&desc->lock);
+			raw_spin_lock(&desc->lock);
 			if (desc->chip->retrigger)
 				desc->chip->retrigger(irq);
-			spin_unlock(&desc->lock);
+			raw_spin_unlock(&desc->lock);
 		}
 	}
 }
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index a1badb32fcda..8cd38484e130 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
 		action = irq_desc[i].action;
 		if (!action)
 			goto skip;
@@ -109,7 +109,7 @@ int show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 	} else if (i == NR_IRQS) {
 		seq_printf(p, "NMI: ");
 		for_each_online_cpu(j)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index a287cfc0b1a6..451481c082b5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -192,7 +192,7 @@ struct irq_desc {
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
-	spinlock_t		lock;
+	raw_spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 	unsigned int		node;
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 1de9700f416e..2295a31ef110 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -45,7 +45,7 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
 			 * An old-style architecture might still have
@@ -61,7 +61,7 @@ unsigned long probe_irq_on(void)
 				desc->chip->set_type(i, IRQ_TYPE_PROBE);
 			desc->chip->startup(i);
 		}
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 
 	/* Wait for longstanding interrupts to trigger. */
@@ -73,13 +73,13 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
 			if (desc->chip->startup(i))
 				desc->status |= IRQ_PENDING;
 		}
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 
 	/*
@@ -91,7 +91,7 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		status = desc->status;
 
 		if (status & IRQ_AUTODETECT) {
@@ -103,7 +103,7 @@ unsigned long probe_irq_on(void)
 				if (i < 32)
 					mask |= 1 << i;
 		}
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 
 	return mask;
@@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		status = desc->status;
 
 		if (status & IRQ_AUTODETECT) {
@@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val)
 			desc->status = status & ~IRQ_AUTODETECT;
 			desc->chip->shutdown(i);
 		}
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 	mutex_unlock(&probing_active);
 
@@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		status = desc->status;
 
 		if (status & IRQ_AUTODETECT) {
@@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val)
 			desc->status = status & ~IRQ_AUTODETECT;
 			desc->chip->shutdown(i);
 		}
-		spin_unlock_irq(&desc->lock);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 	mutex_unlock(&probing_active);
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ba566c261adc..ecc3fa28f666 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq)
 	}
 
 	/* Ensure we don't have left over values from a previous use of this irq */
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
 	desc->chip = &no_irq_chip;
 	desc->handle_irq = handle_bad_irq;
@@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq)
 	cpumask_clear(desc->pending_mask);
 #endif
 #endif
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 /**
@@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq)
 		return;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	if (desc->action) {
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 		WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
 			irq);
 		return;
@@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	desc->chip = &no_irq_chip;
 	desc->name = NULL;
 	clear_kstat_irqs(desc);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 
@@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 	if (!chip)
 		chip = &no_irq_chip;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
 	desc->chip = chip;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return 0;
 }
@@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type)
 	if (type == IRQ_TYPE_NONE)
 		return 0;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	ret = __irq_set_trigger(desc, irq, type);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
@@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data)
 		return -EINVAL;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->handler_data = data;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL(set_irq_data);
@@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 		return -EINVAL;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->msi_desc = entry;
 	if (entry)
 		entry->irq = irq;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
@@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data)
 		return -EINVAL;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->chip_data = data;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return 0;
 }
@@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest)
 	if (!desc)
 		return;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	if (nest)
 		desc->status |= IRQ_NESTED_THREAD;
 	else
 		desc->status &= ~IRQ_NESTED_THREAD;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 EXPORT_SYMBOL_GPL(set_irq_nested_thread);
 
@@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq)
 
 	might_sleep();
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 
 	kstat_incr_irqs_this_cpu(irq, desc);
 
@@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq)
 		goto out_unlock;
 
 	desc->status |= IRQ_INPROGRESS;
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 
 	action_ret = action->thread_fn(action->irq, action->dev_id);
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 
 out_unlock:
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 }
 EXPORT_SYMBOL_GPL(handle_nested_irq);
 
@@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 		goto out_unlock;
 
 	desc->status |= IRQ_INPROGRESS;
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	action_ret = handle_IRQ_event(irq, action);
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 out_unlock:
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 /**
@@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
@@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 		goto out_unlock;
 
 	desc->status |= IRQ_INPROGRESS;
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	action_ret = handle_IRQ_event(irq, action);
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 
 	if (unlikely(desc->status & IRQ_ONESHOT))
@@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 	else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(irq);
 out_unlock:
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 EXPORT_SYMBOL_GPL(handle_level_irq);
 
@@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	struct irqaction *action;
 	irqreturn_t action_ret;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out;
@@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 
 	desc->status |= IRQ_INPROGRESS;
 	desc->status &= ~IRQ_PENDING;
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	action_ret = handle_IRQ_event(irq, action);
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
 
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 /**
@@ -530,7 +530,7 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
 
@@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		}
 
 		desc->status &= ~IRQ_PENDING;
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 		action_ret = handle_IRQ_event(irq, action);
 		if (!noirqdebug)
 			note_interrupt(irq, desc, action_ret);
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 
 	} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
 
 	desc->status &= ~IRQ_INPROGRESS;
 out_unlock:
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 /**
@@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	}
 
 	chip_bus_lock(irq, desc);
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
@@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->depth = 0;
 		desc->chip->startup(irq);
 	}
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL_GPL(__set_irq_handler);
@@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq)
 		return;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->status |= IRQ_NOPROBE;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void __init set_irq_probe(unsigned int irq)
@@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq)
 		return;
 	}
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->status &= ~IRQ_NOPROBE;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 17c71bb565c6..814940e7f485 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = {
 	.chip	    = &no_irq_chip,
 	.handle_irq = handle_bad_irq,
 	.depth      = 1,
-	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+	.lock       = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
 void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
-	spin_lock_init(&desc->lock);
+	raw_spin_lock_init(&desc->lock);
 	desc->irq = irq;
 #ifdef CONFIG_SMP
 	desc->node = node;
@@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 /*
  * Protect the sparse_irqs:
  */
-DEFINE_SPINLOCK(sparse_irq_lock);
+DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 
 struct irq_desc **irq_desc_ptrs __read_mostly;
 
@@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
 		.chip	    = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth	    = 1,
-		.lock	    = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+		.lock	    = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 	}
 };
 
@@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 	if (desc)
 		return desc;
 
-	spin_lock_irqsave(&sparse_irq_lock, flags);
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
 	desc = irq_desc_ptrs[irq];
@@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 	irq_desc_ptrs[irq] = desc;
 
 out_unlock:
-	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
 	return desc;
 }
@@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
 	}
 };
 
@@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq)
 		return 1;
 	}
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
 	/*
@@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq)
 	for (;;) {
 		irqreturn_t action_ret;
 
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 
 		action_ret = handle_IRQ_event(irq, action);
 		if (!noirqdebug)
 			note_interrupt(irq, desc, action_ret);
 
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
@@ -536,7 +536,7 @@ out:
 	 * disabled while the handler was running.
 	 */
 	desc->chip->end(irq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	return 1;
 }
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1b5d742c6a77..b2821f070a3d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 extern struct lock_class_key irq_desc_lock_class;
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
-extern spinlock_t sparse_irq_lock;
+extern raw_spinlock_t sparse_irq_lock;
 
 #ifdef CONFIG_SPARSE_IRQ
 /* irq_desc_ptrs allocated at boot time */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b297d1eb..eb6078ca60c7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq)
 			cpu_relax();
 
 		/* Ok, that indicated we're done: double-check carefully. */
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		status = desc->status;
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 		/* Oops, that failed? */
 	} while (status & IRQ_INPROGRESS);
@@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	if (!desc->chip->set_affinity)
 		return -EINVAL;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT) {
@@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	}
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 
@@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	ret = setup_affinity(irq, desc);
 	if (!ret)
 		irq_set_thread_affinity(desc);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return ret;
 }
@@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq)
 		return;
 
 	chip_bus_lock(irq, desc);
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	__disable_irq(desc, irq, false);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(disable_irq_nosync);
@@ -308,9 +308,9 @@ void enable_irq(unsigned int irq)
 		return;
 
 	chip_bus_lock(irq, desc);
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	__enable_irq(desc, irq, false);
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(enable_irq);
@@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
 	/* wakeup-capable irqs can be shared between drivers that
 	 * don't need to have the same sleep mode behaviors.
 	 */
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	if (on) {
 		if (desc->wake_depth++ == 0) {
 			ret = set_irq_wake_real(irq, on);
@@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
 		}
 	}
 
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(set_irq_wake);
@@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action)
 static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
 {
 	chip_bus_lock(irq, desc);
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 	if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
 		desc->status &= ~IRQ_MASKED;
 		desc->chip->unmask(irq);
 	}
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 	chip_bus_sync_unlock(irq, desc);
 }
 
@@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
 		return;
 	}
 
-	spin_lock_irq(&desc->lock);
+	raw_spin_lock_irq(&desc->lock);
 	cpumask_copy(mask, desc->affinity);
-	spin_unlock_irq(&desc->lock);
+	raw_spin_unlock_irq(&desc->lock);
 
 	set_cpus_allowed_ptr(current, mask);
 	free_cpumask_var(mask);
@@ -545,7 +545,7 @@ static int irq_thread(void *data)
 
 		atomic_inc(&desc->threads_active);
 
-		spin_lock_irq(&desc->lock);
+		raw_spin_lock_irq(&desc->lock);
 		if (unlikely(desc->status & IRQ_DISABLED)) {
 			/*
 			 * CHECKME: We might need a dedicated
@@ -555,9 +555,9 @@ static int irq_thread(void *data)
 			 * retriggers the interrupt itself --- tglx
 			 */
 			desc->status |= IRQ_PENDING;
-			spin_unlock_irq(&desc->lock);
+			raw_spin_unlock_irq(&desc->lock);
 		} else {
-			spin_unlock_irq(&desc->lock);
+			raw_spin_unlock_irq(&desc->lock);
 
 			action->thread_fn(action->irq, action->dev_id);
 
@@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	/*
 	 * The following block of code has to be executed atomically
 	 */
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	old_ptr = &desc->action;
 	old = *old_ptr;
 	if (old) {
@@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		__enable_irq(desc, irq, false);
 	}
 
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	/*
 	 * Strictly no need to wake it up, but hung_task complains
@@ -802,7 +802,7 @@ mismatch:
 	ret = -EBUSY;
 
 out_thread:
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	if (new->thread) {
 		struct task_struct *t = new->thread;
 
@@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	if (!desc)
 		return NULL;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/*
 	 * There can be multiple actions per IRQ descriptor, find the right
@@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 
 		if (!action) {
 			WARN(1, "Trying to free already-free IRQ %d\n", irq);
-			spin_unlock_irqrestore(&desc->lock, flags);
+			raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 			return NULL;
 		}
@@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 			desc->chip->disable(irq);
 	}
 
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	unregister_handler_proc(irq, action);
 
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index fcb6c96f2627..241962280836 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -27,7 +27,7 @@ void move_masked_irq(int irq)
 	if (!desc->chip->set_affinity)
 		return;
 
-	assert_spin_locked(&desc->lock);
+	assert_raw_spin_locked(&desc->lock);
 
 	/*
 	 * If there was a valid mask to work with, please
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 3fd30197da2e..26bac9d8f860 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
 				"for migration.\n", irq);
 		return false;
 	}
-	spin_lock_init(&desc->lock);
+	raw_spin_lock_init(&desc->lock);
 	desc->node = node;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
@@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 
 	irq = old_desc->irq;
 
-	spin_lock_irqsave(&sparse_irq_lock, flags);
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
 	desc = irq_desc_ptrs[irq];
@@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 	}
 
 	irq_desc_ptrs[irq] = desc;
-	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
 	/* free the old one */
 	free_one_irq_desc(old_desc, desc);
@@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 	return desc;
 
 out_unlock:
-	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
 	return desc;
 }
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index a0bb09e79867..0d4005d85b03 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -28,9 +28,9 @@ void suspend_device_irqs(void)
 	for_each_irq_desc(irq, desc) {
 		unsigned long flags;
 
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		__disable_irq(desc, irq, true);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
 	for_each_irq_desc(irq, desc)
@@ -56,9 +56,9 @@ void resume_device_irqs(void)
 		if (!(desc->status & IRQ_SUSPENDED))
 			continue;
 
-		spin_lock_irqsave(&desc->lock, flags);
+		raw_spin_lock_irqsave(&desc->lock, flags);
 		__enable_irq(desc, irq, true);
-		spin_unlock_irqrestore(&desc->lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145fea97..6f50eccc79c0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 	unsigned long flags;
 	int ret = 1;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 	for (action = desc->action ; action; action = action->next) {
 		if ((action != new_action) && action->name &&
 				!strcmp(new_action->name, action->name)) {
@@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 			break;
 		}
 	}
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e49ea1c5232d..89fb90ae534f 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 	struct irqaction *action;
 	int ok = 0, work = 0;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	/* Already running on another processor */
 	if (desc->status & IRQ_INPROGRESS) {
 		/*
@@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 		 */
 		if (desc->action && (desc->action->flags & IRQF_SHARED))
 			desc->status |= IRQ_PENDING;
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 		return ok;
 	}
 	/* Honour the normal IRQ locking */
 	desc->status |= IRQ_INPROGRESS;
 	action = desc->action;
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	while (action) {
 		/* Only shared IRQ handlers are safe to call */
@@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 	}
 	local_irq_disable();
 	/* Now clean up the flags */
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	action = desc->action;
 
 	/*
@@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 		 * Perform real IRQ processing for the IRQ we deferred
 		 */
 		work = 1;
-		spin_unlock(&desc->lock);
+		raw_spin_unlock(&desc->lock);
 		handle_IRQ_event(irq, action);
-		spin_lock(&desc->lock);
+		raw_spin_lock(&desc->lock);
 		desc->status &= ~IRQ_PENDING;
 	}
 	desc->status &= ~IRQ_INPROGRESS;
@@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 	 */
 	if (work && desc->chip && desc->chip->end)
 		desc->chip->end(irq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	return ok;
 }
-- 
cgit v1.2.3


From ecb49d1a639acbacfc3771cae5ec07bed5df3847 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 16:36:54 +0100
Subject: hrtimers: Convert to raw_spinlocks

Convert locks which cannot be sleeping locks in preempt-rt to
raw_spinlocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h   |  2 +-
 kernel/hrtimer.c          | 50 +++++++++++++++++++++++------------------------
 kernel/time/timer_list.c  |  6 +++---
 kernel/time/timer_stats.c | 17 ++++++++--------
 4 files changed, 38 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index af634e95871d..5d86fb2309d2 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -169,7 +169,7 @@ struct hrtimer_clock_base {
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
  */
 struct hrtimer_cpu_base {
-	spinlock_t			lock;
+	raw_spinlock_t			lock;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d2f9239dc6ba..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 	for (;;) {
 		base = timer->base;
 		if (likely(base != NULL)) {
-			spin_lock_irqsave(&base->cpu_base->lock, *flags);
+			raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
 			if (likely(base == timer->base))
 				return base;
 			/* The timer has migrated to another CPU: */
-			spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
+			raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
 		}
 		cpu_relax();
 	}
@@ -208,13 +208,13 @@ again:
 
 		/* See the comment in lock_timer_base() */
 		timer->base = NULL;
-		spin_unlock(&base->cpu_base->lock);
-		spin_lock(&new_base->cpu_base->lock);
+		raw_spin_unlock(&base->cpu_base->lock);
+		raw_spin_lock(&new_base->cpu_base->lock);
 
 		if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
 			cpu = this_cpu;
-			spin_unlock(&new_base->cpu_base->lock);
-			spin_lock(&base->cpu_base->lock);
+			raw_spin_unlock(&new_base->cpu_base->lock);
+			raw_spin_lock(&base->cpu_base->lock);
 			timer->base = base;
 			goto again;
 		}
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 {
 	struct hrtimer_clock_base *base = timer->base;
 
-	spin_lock_irqsave(&base->cpu_base->lock, *flags);
+	raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
 
 	return base;
 }
@@ -628,12 +628,12 @@ static void retrigger_next_event(void *arg)
 	base = &__get_cpu_var(hrtimer_bases);
 
 	/* Adjust CLOCK_REALTIME offset */
-	spin_lock(&base->lock);
+	raw_spin_lock(&base->lock);
 	base->clock_base[CLOCK_REALTIME].offset =
 		timespec_to_ktime(realtime_offset);
 
 	hrtimer_force_reprogram(base, 0);
-	spin_unlock(&base->lock);
+	raw_spin_unlock(&base->lock);
 }
 
 /*
@@ -694,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 {
 	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
 		if (wakeup) {
-			spin_unlock(&base->cpu_base->lock);
+			raw_spin_unlock(&base->cpu_base->lock);
 			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-			spin_lock(&base->cpu_base->lock);
+			raw_spin_lock(&base->cpu_base->lock);
 		} else
 			__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 
@@ -790,7 +790,7 @@ static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
 static inline
 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 {
-	spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
+	raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
 }
 
 /**
@@ -1123,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
 	unsigned long flags;
 	int i;
 
-	spin_lock_irqsave(&cpu_base->lock, flags);
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	if (!hrtimer_hres_active()) {
 		for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1140,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
 		}
 	}
 
-	spin_unlock_irqrestore(&cpu_base->lock, flags);
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	if (mindelta.tv64 < 0)
 		mindelta.tv64 = 0;
@@ -1222,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 	 * they get migrated to another cpu, therefore its safe to unlock
 	 * the timer base.
 	 */
-	spin_unlock(&cpu_base->lock);
+	raw_spin_unlock(&cpu_base->lock);
 	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
 	trace_hrtimer_expire_exit(timer);
-	spin_lock(&cpu_base->lock);
+	raw_spin_lock(&cpu_base->lock);
 
 	/*
 	 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1261,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 retry:
 	expires_next.tv64 = KTIME_MAX;
 
-	spin_lock(&cpu_base->lock);
+	raw_spin_lock(&cpu_base->lock);
 	/*
 	 * We set expires_next to KTIME_MAX here with cpu_base->lock
 	 * held to prevent that a timer is enqueued in our queue via
@@ -1317,7 +1317,7 @@ retry:
 	 * against it.
 	 */
 	cpu_base->expires_next = expires_next;
-	spin_unlock(&cpu_base->lock);
+	raw_spin_unlock(&cpu_base->lock);
 
 	/* Reprogramming necessary ? */
 	if (expires_next.tv64 == KTIME_MAX ||
@@ -1457,7 +1457,7 @@ void hrtimer_run_queues(void)
 			gettime = 0;
 		}
 
-		spin_lock(&cpu_base->lock);
+		raw_spin_lock(&cpu_base->lock);
 
 		while ((node = base->first)) {
 			struct hrtimer *timer;
@@ -1469,7 +1469,7 @@ void hrtimer_run_queues(void)
 
 			__run_hrtimer(timer, &base->softirq_time);
 		}
-		spin_unlock(&cpu_base->lock);
+		raw_spin_unlock(&cpu_base->lock);
 	}
 }
 
@@ -1625,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
 	int i;
 
-	spin_lock_init(&cpu_base->lock);
+	raw_spin_lock_init(&cpu_base->lock);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
 		cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1683,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
 	 * The caller is globally serialized and nobody else
 	 * takes two locks at once, deadlock is not possible.
 	 */
-	spin_lock(&new_base->lock);
-	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+	raw_spin_lock(&new_base->lock);
+	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		migrate_hrtimer_list(&old_base->clock_base[i],
 				     &new_base->clock_base[i]);
 	}
 
-	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
+	raw_spin_unlock(&old_base->lock);
+	raw_spin_unlock(&new_base->lock);
 
 	/* Check, if we got expired work to do */
 	__hrtimer_peek_ahead_timers();
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 9d80db4747d4..28265636b6c2 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
 
 next_one:
 	i = 0;
-	spin_lock_irqsave(&base->cpu_base->lock, flags);
+	raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
 
 	curr = base->first;
 	/*
@@ -100,13 +100,13 @@ next_one:
 
 		timer = rb_entry(curr, struct hrtimer, node);
 		tmp = *timer;
-		spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+		raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
 		print_timer(m, timer, &tmp, i, now);
 		next++;
 		goto next_one;
 	}
-	spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+	raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 }
 
 static void
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 63b117e9eba1..2f3b585b8d7d 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
 /*
  * Per-CPU lookup locks for fast hash lookup:
  */
-static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
 
 /*
  * Mutex to serialize state changes with show-stats activities:
@@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 	/*
 	 * It doesnt matter which lock we take:
 	 */
-	spinlock_t *lock;
+	raw_spinlock_t *lock;
 	struct entry *entry, input;
 	unsigned long flags;
 
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 	input.pid = pid;
 	input.timer_flag = timer_flag;
 
-	spin_lock_irqsave(lock, flags);
+	raw_spin_lock_irqsave(lock, flags);
 	if (!timer_stats_active)
 		goto out_unlock;
 
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 		atomic_inc(&overflow_count);
 
  out_unlock:
-	spin_unlock_irqrestore(lock, flags);
+	raw_spin_unlock_irqrestore(lock, flags);
 }
 
 static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,10 +348,11 @@ static void sync_access(void)
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
-		spin_lock_irqsave(lock, flags);
+		raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
+
+		raw_spin_lock_irqsave(lock, flags);
 		/* nothing */
-		spin_unlock_irqrestore(lock, flags);
+		raw_spin_unlock_irqrestore(lock, flags);
 	}
 }
 
@@ -409,7 +410,7 @@ void __init init_timer_stats(void)
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
+		raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
 }
 
 static int __init init_tstats_procfs(void)
-- 
cgit v1.2.3


From e625cce1b73fb38b74e5387226534f7bcbfc36fe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 18:02:06 +0100
Subject: perf_event: Convert to raw_spinlock

Convert locks which cannot be sleeping locks in preempt-rt to
raw_spinlocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   2 +-
 kernel/hw_breakpoint.c     |   4 +-
 kernel/perf_event.c        | 106 ++++++++++++++++++++++-----------------------
 3 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 64a53f74c9a9..da7bdc23f279 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -681,7 +681,7 @@ struct perf_event_context {
 	 * Protect the states of the events in the list,
 	 * nr_active, and the list:
 	 */
-	spinlock_t			lock;
+	raw_spinlock_t			lock;
 	/*
 	 * Protect the list of events.  Locking either mutex or lock
 	 * is sufficient to ensure the list doesn't change; to change
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 366eedf949c0..dbcbf6a33a08 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -96,7 +96,7 @@ static int task_bp_pinned(struct task_struct *tsk)
 
 	list = &ctx->event_list;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	raw_spin_lock_irqsave(&ctx->lock, flags);
 
 	/*
 	 * The current breakpoint counter is not included in the list
@@ -107,7 +107,7 @@ static int task_bp_pinned(struct task_struct *tsk)
 			count++;
 	}
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
 	return count;
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e73e53c7582f..9052d6c8c9fd 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -203,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
 		 * if so.  If we locked the right context, then it
 		 * can't get swapped on us any more.
 		 */
-		spin_lock_irqsave(&ctx->lock, *flags);
+		raw_spin_lock_irqsave(&ctx->lock, *flags);
 		if (ctx != rcu_dereference(task->perf_event_ctxp)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
 			goto retry;
 		}
 
 		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
 			ctx = NULL;
 		}
 	}
@@ -231,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
 	ctx = perf_lock_task_context(task, &flags);
 	if (ctx) {
 		++ctx->pin_count;
-		spin_unlock_irqrestore(&ctx->lock, flags);
+		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	}
 	return ctx;
 }
@@ -240,9 +240,9 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	raw_spin_lock_irqsave(&ctx->lock, flags);
 	--ctx->pin_count;
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	put_ctx(ctx);
 }
 
@@ -427,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	/*
 	 * Protect the list operation against NMI by disabling the
 	 * events on a global level.
@@ -449,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
 	}
 
 	perf_enable();
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 
@@ -488,12 +488,12 @@ retry:
 	task_oncpu_function_call(task, __perf_event_remove_from_context,
 				 event);
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
 	if (ctx->nr_active && !list_empty(&event->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
+		raw_spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
 
@@ -504,7 +504,7 @@ retry:
 	 */
 	if (!list_empty(&event->group_entry))
 		list_del_event(event, ctx);
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 /*
@@ -535,7 +535,7 @@ static void __perf_event_disable(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 
 	/*
 	 * If the event is on, turn it off.
@@ -551,7 +551,7 @@ static void __perf_event_disable(void *info)
 		event->state = PERF_EVENT_STATE_OFF;
 	}
 
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -584,12 +584,12 @@ void perf_event_disable(struct perf_event *event)
  retry:
 	task_oncpu_function_call(task, __perf_event_disable, event);
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 	/*
 	 * If the event is still active, we need to retry the cross-call.
 	 */
 	if (event->state == PERF_EVENT_STATE_ACTIVE) {
-		spin_unlock_irq(&ctx->lock);
+		raw_spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
 
@@ -602,7 +602,7 @@ void perf_event_disable(struct perf_event *event)
 		event->state = PERF_EVENT_STATE_OFF;
 	}
 
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 static int
@@ -770,7 +770,7 @@ static void __perf_install_in_context(void *info)
 		cpuctx->task_ctx = ctx;
 	}
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
@@ -820,7 +820,7 @@ static void __perf_install_in_context(void *info)
  unlock:
 	perf_enable();
 
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -856,12 +856,12 @@ retry:
 	task_oncpu_function_call(task, __perf_install_in_context,
 				 event);
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 	/*
 	 * we need to retry the smp call.
 	 */
 	if (ctx->is_active && list_empty(&event->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
+		raw_spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
 
@@ -872,7 +872,7 @@ retry:
 	 */
 	if (list_empty(&event->group_entry))
 		add_event_to_ctx(event, ctx);
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 /*
@@ -917,7 +917,7 @@ static void __perf_event_enable(void *info)
 		cpuctx->task_ctx = ctx;
 	}
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
@@ -959,7 +959,7 @@ static void __perf_event_enable(void *info)
 	}
 
  unlock:
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -985,7 +985,7 @@ void perf_event_enable(struct perf_event *event)
 		return;
 	}
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 	if (event->state >= PERF_EVENT_STATE_INACTIVE)
 		goto out;
 
@@ -1000,10 +1000,10 @@ void perf_event_enable(struct perf_event *event)
 		event->state = PERF_EVENT_STATE_OFF;
 
  retry:
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 	task_oncpu_function_call(task, __perf_event_enable, event);
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 
 	/*
 	 * If the context is active and the event is still off,
@@ -1020,7 +1020,7 @@ void perf_event_enable(struct perf_event *event)
 		__perf_event_mark_enabled(event, ctx);
 
  out:
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1042,7 +1042,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
 {
 	struct perf_event *event;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_events))
 		goto out;
@@ -1055,7 +1055,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
 	}
 	perf_enable();
  out:
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1193,8 +1193,8 @@ void perf_event_task_sched_out(struct task_struct *task,
 		 * order we take the locks because no other cpu could
 		 * be trying to lock both of these tasks.
 		 */
-		spin_lock(&ctx->lock);
-		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		raw_spin_lock(&ctx->lock);
+		raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
 		if (context_equiv(ctx, next_ctx)) {
 			/*
 			 * XXX do we need a memory barrier of sorts
@@ -1208,8 +1208,8 @@ void perf_event_task_sched_out(struct task_struct *task,
 
 			perf_event_sync_stat(ctx, next_ctx);
 		}
-		spin_unlock(&next_ctx->lock);
-		spin_unlock(&ctx->lock);
+		raw_spin_unlock(&next_ctx->lock);
+		raw_spin_unlock(&ctx->lock);
 	}
 	rcu_read_unlock();
 
@@ -1251,7 +1251,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 	struct perf_event *event;
 	int can_add_hw = 1;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	if (likely(!ctx->nr_events))
 		goto out;
@@ -1306,7 +1306,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 	}
 	perf_enable();
  out:
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1370,7 +1370,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 	struct hw_perf_event *hwc;
 	u64 interrupts, freq;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -1425,7 +1425,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 			perf_enable();
 		}
 	}
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1438,7 +1438,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	if (!ctx->nr_events)
 		return;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	/*
 	 * Rotate the first entry last (works just fine for group events too):
 	 */
@@ -1449,7 +1449,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	}
 	perf_enable();
 
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 }
 
 void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1498,7 +1498,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	__perf_event_task_sched_out(ctx);
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 
 	list_for_each_entry(event, &ctx->group_list, group_entry) {
 		if (!event->attr.enable_on_exec)
@@ -1516,7 +1516,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 	if (enabled)
 		unclone_ctx(ctx);
 
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 
 	perf_event_task_sched_in(task, smp_processor_id());
  out:
@@ -1542,10 +1542,10 @@ static void __perf_event_read(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	raw_spin_lock(&ctx->lock);
 	update_context_time(ctx);
 	update_event_times(event);
-	spin_unlock(&ctx->lock);
+	raw_spin_unlock(&ctx->lock);
 
 	event->pmu->read(event);
 }
@@ -1563,10 +1563,10 @@ static u64 perf_event_read(struct perf_event *event)
 		struct perf_event_context *ctx = event->ctx;
 		unsigned long flags;
 
-		spin_lock_irqsave(&ctx->lock, flags);
+		raw_spin_lock_irqsave(&ctx->lock, flags);
 		update_context_time(ctx);
 		update_event_times(event);
-		spin_unlock_irqrestore(&ctx->lock, flags);
+		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	}
 
 	return atomic64_read(&event->count);
@@ -1579,7 +1579,7 @@ static void
 __perf_event_init_context(struct perf_event_context *ctx,
 			    struct task_struct *task)
 {
-	spin_lock_init(&ctx->lock);
+	raw_spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->group_list);
 	INIT_LIST_HEAD(&ctx->event_list);
@@ -1649,7 +1649,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 	ctx = perf_lock_task_context(task, &flags);
 	if (ctx) {
 		unclone_ctx(ctx);
-		spin_unlock_irqrestore(&ctx->lock, flags);
+		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	}
 
 	if (!ctx) {
@@ -1987,7 +1987,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 	if (!value)
 		return -EINVAL;
 
-	spin_lock_irq(&ctx->lock);
+	raw_spin_lock_irq(&ctx->lock);
 	if (event->attr.freq) {
 		if (value > sysctl_perf_event_sample_rate) {
 			ret = -EINVAL;
@@ -2000,7 +2000,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 		event->hw.sample_period = value;
 	}
 unlock:
-	spin_unlock_irq(&ctx->lock);
+	raw_spin_unlock_irq(&ctx->lock);
 
 	return ret;
 }
@@ -4992,7 +4992,7 @@ void perf_event_exit_task(struct task_struct *child)
 	 * reading child->perf_event_ctxp, we wait until it has
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
-	spin_lock(&child_ctx->lock);
+	raw_spin_lock(&child_ctx->lock);
 	child->perf_event_ctxp = NULL;
 	/*
 	 * If this context is a clone; unclone it so it can't get
@@ -5001,7 +5001,7 @@ void perf_event_exit_task(struct task_struct *child)
 	 */
 	unclone_ctx(child_ctx);
 	update_context_time(child_ctx);
-	spin_unlock_irqrestore(&child_ctx->lock, flags);
+	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
 	/*
 	 * Report the task dead after unscheduling the events so that we
@@ -5292,11 +5292,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
 	perf_reserved_percpu = val;
 	for_each_online_cpu(cpu) {
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		spin_lock_irq(&cpuctx->ctx.lock);
+		raw_spin_lock_irq(&cpuctx->ctx.lock);
 		mpt = min(perf_max_events - cpuctx->ctx.nr_events,
 			  perf_max_events - perf_reserved_percpu);
 		cpuctx->max_pertask = mpt;
-		spin_unlock_irq(&cpuctx->ctx.lock);
+		raw_spin_unlock_irq(&cpuctx->ctx.lock);
 	}
 	spin_unlock(&perf_resource_lock);
 
-- 
cgit v1.2.3


From 4056c9a344d60ee96471a5f3b0a3c8a90371c8fd Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:04 +0200
Subject: nfsd: Remove unused dprintk

This doesn't appear to be useful.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 8f641c908450..2973e1135343 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -20,7 +20,6 @@
 # include <linux/fs.h>
 #endif
 #include <linux/nfsd/const.h>
-#include <linux/nfsd/debug.h>
 
 /*
  * This is the old "dentry style" Linux NFSv2 file handle.
@@ -329,9 +328,6 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
 	struct dentry	*dentry = fhp->fh_dentry;
 	struct inode	*inode;
 
-	dfprintk(FILEOP, "nfsd: fh_lock(%s) locked = %d\n",
-			SVCFH_fmt(fhp), fhp->fh_locked);
-
 	BUG_ON(!dentry);
 
 	if (fhp->fh_locked) {
-- 
cgit v1.2.3


From a600ffcbb3743cf1296bee2a41d4824c719d7181 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:35 +0200
Subject: sunrpc: Clean never used include files

Remove include of two headers never used by this file.
Doing so exposed a missing #include <linux/types.h> in
include/linux/sunrpc/rpc_rdma.h.

I did not see any other users dependency but if exist they
should be fixed since these headers are totally irrelevant
to here.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/debug.h    | 3 ---
 include/linux/sunrpc/rpc_rdma.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 10709cbe96fd..c2786f20016f 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -28,9 +28,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-
 /*
  * Enable RPC debugging/profiling.
  */
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 87b895d5c786..b78f16b1dea3 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -40,6 +40,8 @@
 #ifndef _LINUX_SUNRPC_RPC_RDMA_H
 #define _LINUX_SUNRPC_RPC_RDMA_H
 
+#include <linux/types.h>
+
 struct rpcrdma_segment {
 	__be32 rs_handle;	/* Registered memory handle */
 	__be32 rs_length;	/* Length of the chunk in bytes */
-- 
cgit v1.2.3


From d703158229329af7152d159753f849aa7bd55ee6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:47 +0200
Subject: nfsd: Fix independence of a few nfsd related headers

An header should be compilation independent, .i.e pull in
any header who's declarations are directly used by this header.
And not let users re-include all it's dependencies all over
again.

[At the end of the day what's the use of a header if it does
 not have more then one user?]

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs_xdr.h | 1 +
 include/linux/nfsacl.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 62f63fb0c4c8..00a0c8170816 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/nfsacl.h>
+#include <linux/nfs3.h>
 
 /*
  * To change the maximum rsize and wsize supported by the NFS client, adjust
diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index 43011b69297c..f321b578edeb 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -29,6 +29,7 @@
 #ifdef __KERNEL__
 
 #include <linux/posix_acl.h>
+#include <linux/sunrpc/xdr.h>
 
 /* Maximum number of ACL entries over NFS */
 #define NFS_ACL_MAX_ENTRIES	1024
-- 
cgit v1.2.3


From 72579ac9cd68081108277c31781b127d0f420d61 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:59 +0200
Subject: nfsd: Headers Independence and include cleanups

* Add includes that are directly used by headers
* Remove includes that are not needed

These are the changes made:

[xdr.h]
struct nfsd_readdirres has an embedded struct readdir_cd from nfsd.h
fixing that we can drop other includes

[xdr4.h]
embedded types defined both at state.h and nfsd.h

[syscall.h]
After export.h fix none of these stuff is needed.
fix extra space in # include <> statement

[stats.h]
does not need <linux/nfs4.h> but was export to user-mode
so I don't touch it

[state.h]
embedded types from nfsfh.h like struct knfsd_fh. bringing that
eliminates the need for all other includes

[nfsfh.h]
directly manipulating types from sunrpc/svc.h.
Removed Other unused headers.

[nfsd.h]
removed unused headers include

[export.h]
lots of sunrpc/svc.h types and a single prototype declaration
with pointer from nfsfh.h, but all users of export.h do need
nfsfh.h any way. remove now un-needed include.

[const.h]
Unfixed (not independent)

[cache.h]
could do with a forward declaration of "struct svc_rqst;"
from sunrpc/svc.h but all users absolutely will need
sunrpc/svc.h it is easier overall this way.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/cache.h   | 3 +--
 include/linux/nfsd/export.h  | 2 +-
 include/linux/nfsd/nfsd.h    | 4 ----
 include/linux/nfsd/nfsfh.h   | 3 +--
 include/linux/nfsd/state.h   | 4 +---
 include/linux/nfsd/syscall.h | 8 +-------
 include/linux/nfsd/xdr.h     | 3 +--
 include/linux/nfsd/xdr4.h    | 3 ++-
 8 files changed, 8 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 3a3f58934f5e..a165425dea41 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -10,8 +10,7 @@
 #ifndef NFSCACHE_H
 #define NFSCACHE_H
 
-#include <linux/in.h>
-#include <linux/uio.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Representation of a reply cache entry.
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a6d9ef2bb34a..ef3d416fcf67 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -12,7 +12,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/in.h>
+# include <linux/nfsd/nfsfh.h>
 #endif
 
 /*
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index e4518d090a8c..74f67c2aca34 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -11,13 +11,9 @@
 #define LINUX_NFSD_NFSD_H
 
 #include <linux/types.h>
-#include <linux/unistd.h>
-#include <linux/fs.h>
-#include <linux/posix_acl.h>
 #include <linux/mount.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/nfsd/stats.h>
 /*
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 2973e1135343..49523edbc510 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -16,8 +16,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/string.h>
-# include <linux/fs.h>
+# include <linux/sunrpc/svc.h>
 #endif
 #include <linux/nfsd/const.h>
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 5aadf8aa3a97..2af75686e0d3 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -37,9 +37,7 @@
 #ifndef _NFSD4_STATE_H
 #define _NFSD4_STATE_H
 
-#include <linux/list.h>
-#include <linux/kref.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsfh.h>
 
 typedef struct {
 	u32             cl_boot;
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h
index 7a3b565b898f..812bc1e160dc 100644
--- a/include/linux/nfsd/syscall.h
+++ b/include/linux/nfsd/syscall.h
@@ -9,14 +9,8 @@
 #ifndef NFSD_SYSCALL_H
 #define NFSD_SYSCALL_H
 
-# include <linux/types.h>
-#ifdef __KERNEL__
-# include <linux/in.h>
-#endif 
-#include <linux/posix_types.h>
-#include <linux/nfsd/const.h>
+#include <linux/types.h>
 #include <linux/nfsd/export.h>
-#include <linux/nfsd/nfsfh.h>
 
 /*
  * Version of the syscall interface
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index a0132ef58f21..58f824d854c2 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -7,9 +7,8 @@
 #ifndef LINUX_NFSD_H
 #define LINUX_NFSD_H
 
-#include <linux/fs.h>
 #include <linux/vfs.h>
-#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 73164c2b3d29..1bf266239c7e 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -39,7 +39,8 @@
 #ifndef _LINUX_NFSD_XDR4_H
 #define _LINUX_NFSD_XDR4_H
 
-#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/nfsd.h>
 
 #define NFSD4_MAX_TAGLEN	128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
-- 
cgit v1.2.3


From 9a74af21330c8d46efa977d088a62cc1bfa954e9 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:30:56 +0200
Subject: nfsd: Move private headers to source directory

Lots of include/linux/nfsd/* headers are only used by
nfsd module. Move them to the source directory

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/auth.c             |   2 +-
 fs/nfsd/cache.h            |  85 +++++++
 fs/nfsd/export.c           |   3 +-
 fs/nfsd/lockd.c            |   2 +-
 fs/nfsd/nfs2acl.c          |   7 +-
 fs/nfsd/nfs3acl.c          |   7 +-
 fs/nfsd/nfs3proc.c         |   4 +-
 fs/nfsd/nfs3xdr.c          |   2 +-
 fs/nfsd/nfs4callback.c     |   4 +-
 fs/nfsd/nfs4proc.c         |   4 +-
 fs/nfsd/nfs4recover.c      |   5 +-
 fs/nfsd/nfs4state.c        |   2 +-
 fs/nfsd/nfs4xdr.c          |   3 +-
 fs/nfsd/nfscache.c         |   4 +-
 fs/nfsd/nfsctl.c           |   5 +-
 fs/nfsd/nfsd.h             | 335 +++++++++++++++++++++++++++
 fs/nfsd/nfsfh.c            |   2 +-
 fs/nfsd/nfsproc.c          |   4 +-
 fs/nfsd/nfssvc.c           |   4 +-
 fs/nfsd/nfsxdr.c           |   2 +-
 fs/nfsd/state.h            | 409 ++++++++++++++++++++++++++++++++
 fs/nfsd/stats.c            |   4 +-
 fs/nfsd/vfs.c              |  18 +-
 fs/nfsd/xdr.h              | 176 ++++++++++++++
 fs/nfsd/xdr3.h             | 346 +++++++++++++++++++++++++++
 fs/nfsd/xdr4.h             | 564 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/cache.h |  85 -------
 include/linux/nfsd/nfsd.h  | 335 ---------------------------
 include/linux/nfsd/state.h | 409 --------------------------------
 include/linux/nfsd/xdr.h   | 176 --------------
 include/linux/nfsd/xdr3.h  | 346 ---------------------------
 include/linux/nfsd/xdr4.h  | 564 ---------------------------------------------
 32 files changed, 1963 insertions(+), 1955 deletions(-)
 create mode 100644 fs/nfsd/cache.h
 create mode 100644 fs/nfsd/nfsd.h
 create mode 100644 fs/nfsd/state.h
 create mode 100644 fs/nfsd/xdr.h
 create mode 100644 fs/nfsd/xdr3.h
 create mode 100644 fs/nfsd/xdr4.h
 delete mode 100644 include/linux/nfsd/cache.h
 delete mode 100644 include/linux/nfsd/nfsd.h
 delete mode 100644 include/linux/nfsd/state.h
 delete mode 100644 include/linux/nfsd/xdr.h
 delete mode 100644 include/linux/nfsd/xdr3.h
 delete mode 100644 include/linux/nfsd/xdr4.h

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index ad354d284cf8..71209d4993d0 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/sched.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "auth.h"
 
 int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..a165425dea41
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,85 @@
+/*
+ * include/linux/nfsd/cache.h
+ *
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef NFSCACHE_H
+#define NFSCACHE_H
+
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+	struct hlist_node	c_hash;
+	struct list_head	c_lru;
+
+	unsigned char		c_state,	/* unused, inprog, done */
+				c_type,		/* status, buffer */
+				c_secure : 1;	/* req came from port < 1024 */
+	struct sockaddr_in	c_addr;
+	__be32			c_xid;
+	u32			c_prot;
+	u32			c_proc;
+	u32			c_vers;
+	unsigned long		c_timestamp;
+	union {
+		struct kvec	u_vec;
+		__be32		u_status;
+	}			c_u;
+};
+
+#define c_replvec		c_u.u_vec
+#define c_replstat		c_u.u_status
+
+/* cache entry states */
+enum {
+	RC_UNUSED,
+	RC_INPROG,
+	RC_DONE
+};
+
+/* return values */
+enum {
+	RC_DROPIT,
+	RC_REPLY,
+	RC_DOIT,
+	RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+	RC_NOCACHE,
+	RC_REPLSTAT,
+	RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY		(HZ/5)
+
+int	nfsd_reply_cache_init(void);
+void	nfsd_reply_cache_shutdown(void);
+int	nfsd_cache_lookup(struct svc_rqst *, int);
+void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+
+#ifdef CONFIG_NFSD_V4
+void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
+#else  /* CONFIG_NFSD_V4 */
+static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
+{
+}
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 68e63f441444..cb3dae2fcd86 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -18,10 +18,11 @@
 #include <linux/module.h>
 #include <linux/exportfs.h>
 
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/syscall.h>
 #include <net/ipv6.h>
 
+#include "nfsd.h"
+
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
 typedef struct auth_domain	svc_client;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 801ef7104ff4..6f12777ed227 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -9,8 +9,8 @@
  */
 
 #include <linux/file.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "nfsd.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index a54628de7715..874e2a94bf4f 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2f5c61bea908..c6011ddbadc0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b694b4304544..90b19ca75b34 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -10,8 +10,8 @@
 #include <linux/ext2_fs.h>
 #include <linux/magic.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 623e13aa6259..c523bb88c10b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/namei.h>
-#include <linux/nfsd/xdr3.h>
+#include "xdr3.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4fe396071b61..f7a315827638 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -34,8 +34,8 @@
  */
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
+#include "nfsd.h"
+#include "state.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 61f682c77e7f..e2b5666f25d1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -36,8 +36,8 @@
  */
 #include <linux/file.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr4.h>
+#include "cache.h"
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 48742f243c25..6744e7f2da0e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,12 +33,13 @@
 *
 */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
 #include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/crypto.h>
 #include <linux/sched.h>
+
+#include "nfsd.h"
+#include "state.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1fe6e29fd500..2923e6c1da18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -36,11 +36,11 @@
 
 #include <linux/file.h>
 #include <linux/smp_lock.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
 #include <linux/swap.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2fa96821f5b5..cab978031100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -43,10 +43,11 @@
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/nfsd_idmap.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/svcauth_gss.h>
+
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 96694b8345ef..18aa9729a380 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -10,8 +10,8 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
+#include "nfsd.h"
+#include "cache.h"
 
 /* Size of reply cache. Common values are:
  * 4.3BSD:	128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e4f49fd6af44..0415680d3f58 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,12 +11,13 @@
 
 #include <linux/nfsd_idmap.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/lockd.h>
 #include <linux/sunrpc/clnt.h>
 
+#include "nfsd.h"
+#include "cache.h"
+
 /*
  *	We have a single directory with 9 nodes in it.
  */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..74f67c2aca34
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,335 @@
+/*
+ * linux/include/linux/nfsd/nfsd.h
+ *
+ * Hodge-podge collection of knfsd-related stuff.
+ * I will sort this out later.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_NFSD_H
+#define LINUX_NFSD_NFSD_H
+
+#include <linux/types.h>
+#include <linux/mount.h>
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/stats.h>
+/*
+ * nfsd version
+ */
+#define NFSD_SUPPORTED_MINOR_VERSION	1
+
+struct readdir_cd {
+	__be32			err;	/* 0, nfserr, or nfserr_eof */
+};
+
+
+extern struct svc_program	nfsd_program;
+extern struct svc_version	nfsd_version2, nfsd_version3,
+				nfsd_version4;
+extern u32			nfsd_supported_minorversion;
+extern struct mutex		nfsd_mutex;
+extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
+
+extern const struct seq_operations nfs_exports_op;
+
+/*
+ * Function prototypes.
+ */
+int		nfsd_svc(unsigned short port, int nrservs);
+int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+
+int		nfsd_nrthreads(void);
+int		nfsd_nrpools(void);
+int		nfsd_get_nrthreads(int n, int *);
+int		nfsd_set_nrthreads(int n, int *);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+#endif
+
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+int nfsd_minorversion(u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
+extern int nfsd_max_blksize;
+
+/* 
+ * NFSv4 State
+ */
+#ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
+int nfs4_state_init(void);
+void nfsd4_free_slabs(void);
+int nfs4_state_start(void);
+void nfs4_state_shutdown(void);
+time_t nfs4_lease_time(void);
+void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
+#else
+static inline int nfs4_state_init(void) { return 0; }
+static inline void nfsd4_free_slabs(void) { }
+static inline int nfs4_state_start(void) { return 0; }
+static inline void nfs4_state_shutdown(void) { }
+static inline time_t nfs4_lease_time(void) { return 0; }
+static inline void nfs4_reset_lease(time_t leasetime) { }
+static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+#endif
+
+/*
+ * lockd binding
+ */
+void		nfsd_lockd_init(void);
+void		nfsd_lockd_shutdown(void);
+
+
+/*
+ * These macros provide pre-xdr'ed values for faster operation.
+ */
+#define	nfs_ok			cpu_to_be32(NFS_OK)
+#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
+#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
+#define	nfserr_io		cpu_to_be32(NFSERR_IO)
+#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
+#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
+#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
+#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
+#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
+#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
+#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
+#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
+#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
+#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
+#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
+#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
+#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
+#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
+#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
+#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
+#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
+#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
+#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
+#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
+#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
+#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
+#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
+#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
+#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
+#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
+#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
+#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
+#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
+#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
+#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
+#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
+#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
+#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
+#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
+#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
+#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
+#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
+#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
+#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
+#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
+#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
+#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
+#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
+#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
+#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
+#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
+#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
+#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
+#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
+#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
+#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
+#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
+#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
+#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
+#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
+#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
+#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
+#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
+#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
+#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
+#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
+#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
+#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
+#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
+#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
+#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
+#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
+#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
+#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
+#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
+#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
+#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
+#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
+#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
+#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
+#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
+#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
+#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
+#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
+#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
+#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
+#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
+#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
+#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
+#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
+#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
+#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
+#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
+#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
+#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
+#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
+#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
+
+/* error codes for internal use */
+/* if a request fails due to kmalloc failure, it gets dropped.
+ *  Client should resend eventually
+ */
+#define	nfserr_dropit		cpu_to_be32(30000)
+/* end-of-file indicator in readdir */
+#define	nfserr_eof		cpu_to_be32(30001)
+/* replay detected */
+#define	nfserr_replay_me	cpu_to_be32(11001)
+/* nfs41 replay detected */
+#define	nfserr_replay_cache	cpu_to_be32(11002)
+
+/* Check for dir entries '.' and '..' */
+#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
+
+/*
+ * Time of server startup
+ */
+extern struct timeval	nfssvc_boot;
+
+#ifdef CONFIG_NFSD_V4
+
+/* before processing a COMPOUND operation, we have to check that there
+ * is enough space in the buffer for XDR encode to succeed.  otherwise,
+ * we might process an operation with side effects, and be unable to
+ * tell the client that the operation succeeded.
+ *
+ * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
+ * READ, READDIR, and READLINK have their own buffer checks.)  if we
+ * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
+ *
+ * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
+ * care is taken to ensure that we never fall below this level for any
+ * reason.
+ */
+#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+
+#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+
+/*
+ * The following attributes are currently not supported by the NFSv4 server:
+ *    ARCHIVE       (deprecated anyway)
+ *    HIDDEN        (unlikely to be supported any time soon)
+ *    MIMETYPE      (unlikely to be supported any time soon)
+ *    QUOTA_*       (will be supported in a forthcoming patch)
+ *    SYSTEM        (unlikely to be supported any time soon)
+ *    TIME_BACKUP   (unlikely to be supported any time soon)
+ *    TIME_CREATE   (unlikely to be supported any time soon)
+ */
+#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
+(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
+ | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
+ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
+ | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
+ | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
+ | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
+ | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
+ | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
+ | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
+ | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
+ | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
+ | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
+ | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
+ | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
+ | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD2 0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+	NFSD4_SUPPORTED_ATTRS_WORD0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+	NFSD4_SUPPORTED_ATTRS_WORD1
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+
+static inline u32 nfsd_suppattrs0(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
+			    : NFSD4_SUPPORTED_ATTRS_WORD0;
+}
+
+static inline u32 nfsd_suppattrs1(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
+			    : NFSD4_SUPPORTED_ATTRS_WORD1;
+}
+
+static inline u32 nfsd_suppattrs2(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
+			    : NFSD4_SUPPORTED_ATTRS_WORD2;
+}
+
+/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
+#define NFSD_WRITEONLY_ATTRS_WORD1							    \
+(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+
+/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
+(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
+#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD2 0
+
+#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
+	NFSD_WRITEABLE_ATTRS_WORD0
+/*
+ * we currently store the exclusive create verifier in the v_{a,m}time
+ * attributes so the client can't set these at create time using EXCLUSIVE4_1
+ */
+#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
+	(NFSD_WRITEABLE_ATTRS_WORD1 & \
+	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
+#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
+	NFSD_WRITEABLE_ATTRS_WORD2
+
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 739948165034..0eb1c59f5ab8 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -12,7 +12,7 @@
 #include <linux/exportfs.h>
 
 #include <linux/sunrpc/svcauth_gss.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "vfs.h"
 #include "auth.h"
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index b6bd9e0d7cd0..21a5f793c3d1 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -9,8 +9,8 @@
 
 #include <linux/namei.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
+#include "cache.h"
+#include "xdr.h"
 #include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b2d7ffac0357..b520ce10bd15 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -15,11 +15,11 @@
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "nfsd.h"
+#include "cache.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 5e0603da39e7..3bec831704af 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/xdr.h>
+#include "xdr.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..2af75686e0d3
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,409 @@
+/*
+ *  linux/include/nfsd/state.h
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson <andros@umich.edu>
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NFSD4_STATE_H
+#define _NFSD4_STATE_H
+
+#include <linux/nfsd/nfsfh.h>
+
+typedef struct {
+	u32             cl_boot;
+	u32             cl_id;
+} clientid_t;
+
+typedef struct {
+	u32             so_boot;
+	u32             so_stateownerid;
+	u32             so_fileid;
+} stateid_opaque_t;
+
+typedef struct {
+	u32                     si_generation;
+	stateid_opaque_t        si_opaque;
+} stateid_t;
+#define si_boot           si_opaque.so_boot
+#define si_stateownerid   si_opaque.so_stateownerid
+#define si_fileid         si_opaque.so_fileid
+
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
+struct nfs4_delegation {
+	struct list_head	dl_perfile;
+	struct list_head	dl_perclnt;
+	struct list_head	dl_recall_lru;  /* delegation recalled */
+	atomic_t		dl_count;       /* ref count */
+	struct nfs4_client	*dl_client;
+	struct nfs4_file	*dl_file;
+	struct file_lock	*dl_flock;
+	struct file		*dl_vfs_file;
+	u32			dl_type;
+	time_t			dl_time;
+/* For recall: */
+	u32			dl_ident;
+	stateid_t		dl_stateid;
+	struct knfsd_fh		dl_fh;
+	int			dl_retries;
+};
+
+/* client delegation callback info */
+struct nfs4_cb_conn {
+	/* SETCLIENTID info */
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
+	u32                     cb_prog;
+	u32			cb_minorversion;
+	u32                     cb_ident;	/* minorversion 0 only */
+	/* RPC client info */
+	atomic_t		cb_set;     /* successful CB_NULL call */
+	struct rpc_clnt *       cb_client;
+};
+
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
+
+struct nfsd4_slot {
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
+};
+
+struct nfsd4_channel_attrs {
+	u32		headerpadsz;
+	u32		maxreq_sz;
+	u32		maxresp_sz;
+	u32		maxresp_cached;
+	u32		maxops;
+	u32		maxreqs;
+	u32		nr_rdma_attrs;
+	u32		rdma_attrs;
+};
+
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
+struct nfsd4_session {
+	struct kref		se_ref;
+	struct list_head	se_hash;	/* hash by sessionid */
+	struct list_head	se_perclnt;
+	u32			se_flags;
+	struct nfs4_client	*se_client;	/* for expire_client */
+	struct nfs4_sessionid	se_sessionid;
+	struct nfsd4_channel_attrs se_fchannel;
+	struct nfsd4_channel_attrs se_bchannel;
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+};
+
+static inline void
+nfsd4_put_session(struct nfsd4_session *ses)
+{
+	extern void free_session(struct kref *kref);
+	kref_put(&ses->se_ref, free_session);
+}
+
+static inline void
+nfsd4_get_session(struct nfsd4_session *ses)
+{
+	kref_get(&ses->se_ref);
+}
+
+/* formatted contents of nfs4_sessionid */
+struct nfsd4_sessionid {
+	clientid_t	clientid;
+	u32		sequence;
+	u32		reserved;
+};
+
+#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
+/*
+ * struct nfs4_client - one per client.  Clientids live here.
+ * 	o Each nfs4_client is hashed by clientid.
+ *
+ * 	o Each nfs4_clients is also hashed by name 
+ * 	  (the opaque quantity initially sent by the client to identify itself).
+ * 	  
+ *	o cl_perclient list is used to ensure no dangling stateowner references
+ *	  when we expire the nfs4_client
+ */
+struct nfs4_client {
+	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
+	struct list_head	cl_strhash; 	/* hash by cl_name */
+	struct list_head	cl_openowners;
+	struct list_head	cl_delegations;
+	struct list_head        cl_lru;         /* tail queue */
+	struct xdr_netobj	cl_name; 	/* id generated by client */
+	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
+	nfs4_verifier		cl_verifier; 	/* generated by client */
+	time_t                  cl_time;        /* time of last lease renewal */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
+	u32			cl_flavor;	/* setclientid pseudoflavor */
+	char			*cl_principal;	/* setclientid principal name */
+	struct svc_cred		cl_cred; 	/* setclientid principal */
+	clientid_t		cl_clientid;	/* generated by server */
+	nfs4_verifier		cl_confirm;	/* generated by server */
+	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+	atomic_t		cl_count;	/* ref count */
+	u32			cl_firststate;	/* recovery dir creation */
+
+	/* for nfs41 */
+	struct list_head	cl_sessions;
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+	u32			cl_exchange_flags;
+	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
+};
+
+/* struct nfs4_client_reset
+ * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+ * upon lease reset, or from upcall to state_daemon (to read in state
+ * from non-volitile storage) upon reboot.
+ */
+struct nfs4_client_reclaim {
+	struct list_head	cr_strhash;	/* hash by cr_name */
+	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
+};
+
+static inline void
+update_stateid(stateid_t *stateid)
+{
+	stateid->si_generation++;
+}
+
+/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
+ * The OPEN response, typically the largest, requires 
+ *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
+ *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
+ *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
+ */
+
+#define NFSD4_REPLAY_ISIZE       112 
+
+/*
+ * Replay buffer, where the result of the last seqid-mutating operation 
+ * is cached. 
+ */
+struct nfs4_replay {
+	__be32			rp_status;
+	unsigned int		rp_buflen;
+	char			*rp_buf;
+	unsigned		intrp_allocated;
+	struct knfsd_fh		rp_openfh;
+	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
+};
+
+/*
+* nfs4_stateowner can either be an open_owner, or a lock_owner
+*
+*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
+*         for lock_owner
+*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
+*         for lock_owner
+*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
+*         struct is reaped.
+*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
+*         and is used to ensure no dangling nfs4_stateid references when we 
+*         release a stateowner.
+*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
+*         close is called to reap associated byte-range locks
+*    so_close_lru: (open) stateowner is placed on this list instead of being
+*         reaped (when so_perfilestate is empty) to hold the last close replay.
+*         reaped by laundramat thread after lease period.
+*/
+struct nfs4_stateowner {
+	struct kref		so_ref;
+	struct list_head        so_idhash;   /* hash by so_id */
+	struct list_head        so_strhash;   /* hash by op_name */
+	struct list_head        so_perclient;
+	struct list_head        so_stateids;
+	struct list_head        so_perstateid; /* for lockowners only */
+	struct list_head	so_close_lru; /* tail queue */
+	time_t			so_time; /* time of placement on so_close_lru */
+	int			so_is_open_owner; /* 1=openowner,0=lockowner */
+	u32                     so_id;
+	struct nfs4_client *    so_client;
+	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+	 * sequence id expected from the client: */
+	u32                     so_seqid;
+	struct xdr_netobj       so_owner;     /* open owner name */
+	int                     so_confirmed; /* successful OPEN_CONFIRM? */
+	struct nfs4_replay	so_replay;
+};
+
+/*
+*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
+*    o fi_perfile list is used to search for conflicting 
+*      share_acces, share_deny on the file.
+*/
+struct nfs4_file {
+	atomic_t		fi_ref;
+	struct list_head        fi_hash;    /* hash by "struct inode *" */
+	struct list_head        fi_stateids;
+	struct list_head	fi_delegations;
+	struct inode		*fi_inode;
+	u32                     fi_id;      /* used with stateowner->so_id 
+					     * for stateid_hashtbl hash */
+	bool			fi_had_conflict;
+};
+
+/*
+* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+*
+* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
+*
+* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
+* 	st_perfile: file_hashtbl[] entry.
+* 	st_perfile_state: nfs4_stateowner->so_perfilestate
+*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
+* 	st_access_bmap: used only for open stateid
+* 	st_deny_bmap: used only for open stateid
+*	st_openstp: open stateid lock stateid was derived from
+*
+* XXX: open stateids and lock stateids have diverged sufficiently that
+* we should consider defining separate structs for the two cases.
+*/
+
+struct nfs4_stateid {
+	struct list_head              st_hash; 
+	struct list_head              st_perfile;
+	struct list_head              st_perstateowner;
+	struct list_head              st_lockowners;
+	struct nfs4_stateowner      * st_stateowner;
+	struct nfs4_file            * st_file;
+	stateid_t                     st_stateid;
+	struct file                 * st_vfs_file;
+	unsigned long                 st_access_bmap;
+	unsigned long                 st_deny_bmap;
+	struct nfs4_stateid         * st_openstp;
+};
+
+/* flags for preprocess_seqid_op() */
+#define HAS_SESSION             0x00000001
+#define CONFIRM                 0x00000002
+#define OPEN_STATE              0x00000004
+#define LOCK_STATE              0x00000008
+#define RD_STATE	        0x00000010
+#define WR_STATE	        0x00000020
+#define CLOSE_STATE             0x00000040
+
+#define seqid_mutating_err(err)                       \
+	(((err) != nfserr_stale_clientid) &&    \
+	((err) != nfserr_bad_seqid) &&          \
+	((err) != nfserr_stale_stateid) &&      \
+	((err) != nfserr_bad_stateid))
+
+struct nfsd4_compound_state;
+
+extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+		stateid_t *stateid, int flags, struct file **filp);
+extern void nfs4_lock_state(void);
+extern void nfs4_unlock_state(void);
+extern int nfs4_in_grace(void);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+extern void put_nfs4_client(struct nfs4_client *clp);
+extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern void nfsd4_init_recdir(char *recdir_name);
+extern int nfsd4_recdir_load(void);
+extern void nfsd4_shutdown_recdir(void);
+extern int nfs4_client_to_reclaim(const char *name);
+extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern void nfsd4_recdir_purge_old(void);
+extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+
+static inline void
+nfs4_put_stateowner(struct nfs4_stateowner *so)
+{
+	kref_put(&so->so_ref, nfs4_free_stateowner);
+}
+
+static inline void
+nfs4_get_stateowner(struct nfs4_stateowner *so)
+{
+	kref_get(&so->so_ref);
+}
+
+#endif   /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index e3e411e9fe4a..3fc69dfd3091 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -25,11 +25,11 @@
 
 #include <linux/seq_file.h>
 #include <linux/module.h>
-
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/stats.h>
 
+#include "nfsd.h"
+
 struct nfsd_stats	nfsdstats;
 struct svc_stat		nfsd_svcstats = {
 	.program	= &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 81ce108c114e..04bdba12d21b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -22,23 +22,25 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
 #include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
+#include <linux/jhash.h>
+#include <linux/ima.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
 #ifdef CONFIG_NFSD_V4
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
 #endif /* CONFIG_NFSD_V4 */
-#include <linux/jhash.h>
-#include <linux/ima.h>
-#include "vfs.h"
 
-#include <asm/uaccess.h>
+#include "nfsd.h"
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..235ee5c3be54
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,176 @@
+/*
+ * linux/include/linux/nfsd/xdr.h
+ *
+ * XDR types for nfsd. This is mainly a typing exercise.
+ */
+
+#ifndef LINUX_NFSD_H
+#define LINUX_NFSD_H
+
+#include <linux/vfs.h>
+#include "nfsd.h"
+
+struct nfsd_fhandle {
+	struct svc_fh		fh;
+};
+
+struct nfsd_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+};
+
+struct nfsd_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd_readargs {
+	struct svc_fh		fh;
+	__u32			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd_writeargs {
+	svc_fh			fh;
+	__u32			offset;
+	int			len;
+	int			vlen;
+};
+
+struct nfsd_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	struct iattr		attrs;
+};
+
+struct nfsd_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+	
+struct nfsd_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd_readdirargs {
+	struct svc_fh		fh;
+	__u32			cookie;
+	__u32			count;
+	__be32 *		buffer;
+};
+
+struct nfsd_attrstat {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_diropres  {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_readlinkres {
+	int			len;
+};
+
+struct nfsd_readres {
+	struct svc_fh		fh;
+	unsigned long		count;
+	struct kstat		stat;
+};
+
+struct nfsd_readdirres {
+	int			count;
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd_statfsres {
+	struct kstatfs		stats;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd_xdrstore {
+	struct nfsd_sattrargs	sattr;
+	struct nfsd_diropargs	dirop;
+	struct nfsd_readargs	read;
+	struct nfsd_writeargs	write;
+	struct nfsd_createargs	create;
+	struct nfsd_renameargs	rename;
+	struct nfsd_linkargs	link;
+	struct nfsd_symlinkargs	symlink;
+	struct nfsd_readdirargs	readdir;
+};
+
+#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
+
+
+int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd_sattrargs *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd_diropargs *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readargs *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd_writeargs *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd_createargs *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd_renameargs *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readlinkargs *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_linkargs *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_symlinkargs *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readdirargs *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+
+int nfssvc_encode_entry(void *, const char *name,
+			int namlen, loff_t offset, u64 ino, unsigned int);
+
+int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+
+/* Helper functions for NFSv2 ACL code */
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..b330756973cf
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,346 @@
+/*
+ * linux/include/linux/nfsd/xdr3.h
+ *
+ * XDR types for NFSv3 in nfsd.
+ *
+ * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LINUX_NFSD_XDR3_H
+#define _LINUX_NFSD_XDR3_H
+
+#include "xdr.h"
+
+struct nfsd3_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+	int			check_guard;
+	time_t			guardtime;
+};
+
+struct nfsd3_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd3_accessargs {
+	struct svc_fh		fh;
+	unsigned int		access;
+};
+
+struct nfsd3_readargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd3_writeargs {
+	svc_fh			fh;
+	__u64			offset;
+	__u32			count;
+	int			stable;
+	__u32			len;
+	int			vlen;
+};
+
+struct nfsd3_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	int			createmode;
+	struct iattr		attrs;
+	__be32 *		verf;
+};
+
+struct nfsd3_mknodargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	__u32			ftype;
+	__u32			major, minor;
+	struct iattr		attrs;
+};
+
+struct nfsd3_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+
+struct nfsd3_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd3_readdirargs {
+	struct svc_fh		fh;
+	__u64			cookie;
+	__u32			dircount;
+	__u32			count;
+	__be32 *		verf;
+	__be32 *		buffer;
+};
+
+struct nfsd3_commitargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+};
+
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+struct nfsd3_attrstat {
+	__be32			status;
+	struct svc_fh		fh;
+	struct kstat            stat;
+};
+
+/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
+struct nfsd3_diropres  {
+	__be32			status;
+	struct svc_fh		dirfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_accessres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			access;
+};
+
+struct nfsd3_readlinkres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			len;
+};
+
+struct nfsd3_readres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			eof;
+};
+
+struct nfsd3_writeres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			committed;
+};
+
+struct nfsd3_renameres {
+	__be32			status;
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+};
+
+struct nfsd3_linkres {
+	__be32			status;
+	struct svc_fh		tfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_readdirres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			count;
+	__be32			verf[2];
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+	__be32 *		offset1;
+	struct svc_rqst *	rqstp;
+
+};
+
+struct nfsd3_fsstatres {
+	__be32			status;
+	struct kstatfs		stats;
+	__u32			invarsec;
+};
+
+struct nfsd3_fsinfores {
+	__be32			status;
+	__u32			f_rtmax;
+	__u32			f_rtpref;
+	__u32			f_rtmult;
+	__u32			f_wtmax;
+	__u32			f_wtpref;
+	__u32			f_wtmult;
+	__u32			f_dtpref;
+	__u64			f_maxfilesize;
+	__u32			f_properties;
+};
+
+struct nfsd3_pathconfres {
+	__be32			status;
+	__u32			p_link_max;
+	__u32			p_name_max;
+	__u32			p_no_trunc;
+	__u32			p_chown_restricted;
+	__u32			p_case_insensitive;
+	__u32			p_case_preserving;
+};
+
+struct nfsd3_commitres {
+	__be32			status;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_getaclres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+/* dummy type for release */
+struct nfsd3_fhandle_pair {
+	__u32			dummy;
+	struct svc_fh		fh1;
+	struct svc_fh		fh2;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd3_xdrstore {
+	struct nfsd3_sattrargs		sattrargs;
+	struct nfsd3_diropargs		diropargs;
+	struct nfsd3_readargs		readargs;
+	struct nfsd3_writeargs		writeargs;
+	struct nfsd3_createargs		createargs;
+	struct nfsd3_renameargs		renameargs;
+	struct nfsd3_linkargs		linkargs;
+	struct nfsd3_symlinkargs	symlinkargs;
+	struct nfsd3_readdirargs	readdirargs;
+	struct nfsd3_diropres 		diropres;
+	struct nfsd3_accessres		accessres;
+	struct nfsd3_readlinkres	readlinkres;
+	struct nfsd3_readres		readres;
+	struct nfsd3_writeres		writeres;
+	struct nfsd3_renameres		renameres;
+	struct nfsd3_linkres		linkres;
+	struct nfsd3_readdirres		readdirres;
+	struct nfsd3_fsstatres		fsstatres;
+	struct nfsd3_fsinfores		fsinfores;
+	struct nfsd3_pathconfres	pathconfres;
+	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
+};
+
+#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
+
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_sattrargs *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropargs *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessargs *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readargs *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_writeargs *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_mknodargs *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameargs *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkargs *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkargs *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_symlinkargs *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitargs *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessres *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkres *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameres *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkres *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirres *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsstatres *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsinfores *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
+				struct nfsd3_pathconfres *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitres *);
+
+int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
+				struct nfsd3_fhandle_pair *);
+int nfs3svc_encode_entry(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+int nfs3svc_encode_entry_plus(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
+				struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+
+#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..83202a1cf07b
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,564 @@
+/*
+ *  include/linux/nfsd/xdr4.h
+ *
+ *  Server-side types for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LINUX_NFSD_XDR4_H
+#define _LINUX_NFSD_XDR4_H
+
+#include "state.h"
+#include "nfsd.h"
+
+#define NFSD4_MAX_TAGLEN	128
+#define XDR_LEN(n)                     (((n) + 3) & ~3)
+
+struct nfsd4_compound_state {
+	struct svc_fh		current_fh;
+	struct svc_fh		save_fh;
+	struct nfs4_stateowner	*replay_owner;
+	/* For sessions DRC */
+	struct nfsd4_session	*session;
+	struct nfsd4_slot	*slot;
+	__be32			*datap;
+	size_t			iovlen;
+	u32			minorversion;
+	u32			status;
+};
+
+static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
+{
+	return cs->slot != NULL;
+}
+
+struct nfsd4_change_info {
+	u32		atomic;
+	bool		change_supported;
+	u32		before_ctime_sec;
+	u32		before_ctime_nsec;
+	u64		before_change;
+	u32		after_ctime_sec;
+	u32		after_ctime_nsec;
+	u64		after_change;
+};
+
+struct nfsd4_access {
+	u32		ac_req_access;      /* request */
+	u32		ac_supported;       /* response */
+	u32		ac_resp_access;     /* response */
+};
+
+struct nfsd4_close {
+	u32		cl_seqid;           /* request */
+	stateid_t	cl_stateid;         /* request+response */
+	struct nfs4_stateowner * cl_stateowner;	/* response */
+};
+
+struct nfsd4_commit {
+	u64		co_offset;          /* request */
+	u32		co_count;           /* request */
+	nfs4_verifier	co_verf;            /* response */
+};
+
+struct nfsd4_create {
+	u32		cr_namelen;         /* request */
+	char *		cr_name;            /* request */
+	u32		cr_type;            /* request */
+	union {                             /* request */
+		struct {
+			u32 namelen;
+			char *name;
+		} link;   /* NF4LNK */
+		struct {
+			u32 specdata1;
+			u32 specdata2;
+		} dev;    /* NF4BLK, NF4CHR */
+	} u;
+	u32		cr_bmval[3];        /* request */
+	struct iattr	cr_iattr;           /* request */
+	struct nfsd4_change_info  cr_cinfo; /* response */
+	struct nfs4_acl *cr_acl;
+};
+#define cr_linklen	u.link.namelen
+#define cr_linkname	u.link.name
+#define cr_specdata1	u.dev.specdata1
+#define cr_specdata2	u.dev.specdata2
+
+struct nfsd4_delegreturn {
+	stateid_t	dr_stateid;
+};
+
+struct nfsd4_getattr {
+	u32		ga_bmval[3];        /* request */
+	struct svc_fh	*ga_fhp;            /* response */
+};
+
+struct nfsd4_link {
+	u32		li_namelen;         /* request */
+	char *		li_name;            /* request */
+	struct nfsd4_change_info  li_cinfo; /* response */
+};
+
+struct nfsd4_lock_denied {
+	clientid_t	ld_clientid;
+	struct nfs4_stateowner   *ld_sop;
+	u64             ld_start;
+	u64             ld_length;
+	u32             ld_type;
+};
+
+struct nfsd4_lock {
+	/* request */
+	u32             lk_type;
+	u32             lk_reclaim;         /* boolean */
+	u64             lk_offset;
+	u64             lk_length;
+	u32             lk_is_new;
+	union {
+		struct {
+			u32             open_seqid;
+			stateid_t       open_stateid;
+			u32             lock_seqid;
+			clientid_t      clientid;
+			struct xdr_netobj owner;
+		} new;
+		struct {
+			stateid_t       lock_stateid;
+			u32             lock_seqid;
+		} old;
+	} v;
+
+	/* response */
+	union {
+		struct {
+			stateid_t               stateid;
+		} ok;
+		struct nfsd4_lock_denied        denied;
+	} u;
+	/* The lk_replay_owner is the open owner in the open_to_lock_owner
+	 * case and the lock owner otherwise: */
+	struct nfs4_stateowner *lk_replay_owner;
+};
+#define lk_new_open_seqid       v.new.open_seqid
+#define lk_new_open_stateid     v.new.open_stateid
+#define lk_new_lock_seqid       v.new.lock_seqid
+#define lk_new_clientid         v.new.clientid
+#define lk_new_owner            v.new.owner
+#define lk_old_lock_stateid     v.old.lock_stateid
+#define lk_old_lock_seqid       v.old.lock_seqid
+
+#define lk_rflags       u.ok.rflags
+#define lk_resp_stateid u.ok.stateid
+#define lk_denied       u.denied
+
+
+struct nfsd4_lockt {
+	u32				lt_type;
+	clientid_t			lt_clientid;
+	struct xdr_netobj		lt_owner;
+	u64				lt_offset;
+	u64				lt_length;
+	struct nfs4_stateowner * 	lt_stateowner;
+	struct nfsd4_lock_denied  	lt_denied;
+};
+
+ 
+struct nfsd4_locku {
+	u32             lu_type;
+	u32             lu_seqid;
+	stateid_t       lu_stateid;
+	u64             lu_offset;
+	u64             lu_length;
+	struct nfs4_stateowner  *lu_stateowner;
+};
+
+
+struct nfsd4_lookup {
+	u32		lo_len;             /* request */
+	char *		lo_name;            /* request */
+};
+
+struct nfsd4_putfh {
+	u32		pf_fhlen;           /* request */
+	char		*pf_fhval;          /* request */
+};
+
+struct nfsd4_open {
+	u32		op_claim_type;      /* request */
+	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
+	u32		op_delegate_type;   /* request - CLAIM_PREV only */
+	stateid_t       op_delegate_stateid; /* request - response */
+	u32		op_create;     	    /* request */
+	u32		op_createmode;      /* request */
+	u32		op_bmval[3];        /* request */
+	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
+	nfs4_verifier	verf;               /* EXCLUSIVE4 */
+	clientid_t	op_clientid;        /* request */
+	struct xdr_netobj op_owner;           /* request */
+	u32		op_seqid;           /* request */
+	u32		op_share_access;    /* request */
+	u32		op_share_deny;      /* request */
+	stateid_t	op_stateid;         /* response */
+	u32		op_recall;          /* recall */
+	struct nfsd4_change_info  op_cinfo; /* response */
+	u32		op_rflags;          /* response */
+	int		op_truncate;        /* used during processing */
+	struct nfs4_stateowner *op_stateowner; /* used during processing */
+	struct nfs4_acl *op_acl;
+};
+#define op_iattr	iattr
+#define op_verf		verf
+
+struct nfsd4_open_confirm {
+	stateid_t	oc_req_stateid		/* request */;
+	u32		oc_seqid    		/* request */;
+	stateid_t	oc_resp_stateid		/* response */;
+	struct nfs4_stateowner * oc_stateowner;	/* response */
+};
+
+struct nfsd4_open_downgrade {
+	stateid_t       od_stateid;
+	u32             od_seqid;
+	u32             od_share_access;
+	u32             od_share_deny;
+	struct nfs4_stateowner *od_stateowner;
+};
+
+
+struct nfsd4_read {
+	stateid_t	rd_stateid;         /* request */
+	u64		rd_offset;          /* request */
+	u32		rd_length;          /* request */
+	int		rd_vlen;
+	struct file     *rd_filp;
+	
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+};
+
+struct nfsd4_readdir {
+	u64		rd_cookie;          /* request */
+	nfs4_verifier	rd_verf;            /* request */
+	u32		rd_dircount;        /* request */
+	u32		rd_maxcount;        /* request */
+	u32		rd_bmval[3];        /* request */
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd4_release_lockowner {
+	clientid_t        rl_clientid;
+	struct xdr_netobj rl_owner;
+};
+struct nfsd4_readlink {
+	struct svc_rqst *rl_rqstp;          /* request */
+	struct svc_fh *	rl_fhp;             /* request */
+};
+
+struct nfsd4_remove {
+	u32		rm_namelen;         /* request */
+	char *		rm_name;            /* request */
+	struct nfsd4_change_info  rm_cinfo; /* response */
+};
+
+struct nfsd4_rename {
+	u32		rn_snamelen;        /* request */
+	char *		rn_sname;           /* request */
+	u32		rn_tnamelen;        /* request */
+	char *		rn_tname;           /* request */
+	struct nfsd4_change_info  rn_sinfo; /* response */
+	struct nfsd4_change_info  rn_tinfo; /* response */
+};
+
+struct nfsd4_secinfo {
+	u32 si_namelen;					/* request */
+	char *si_name;					/* request */
+	struct svc_export *si_exp;			/* response */
+};
+
+struct nfsd4_setattr {
+	stateid_t	sa_stateid;         /* request */
+	u32		sa_bmval[3];        /* request */
+	struct iattr	sa_iattr;           /* request */
+	struct nfs4_acl *sa_acl;
+};
+
+struct nfsd4_setclientid {
+	nfs4_verifier	se_verf;            /* request */
+	u32		se_namelen;         /* request */
+	char *		se_name;            /* request */
+	u32		se_callback_prog;   /* request */
+	u32		se_callback_netid_len;  /* request */
+	char *		se_callback_netid_val;  /* request */
+	u32		se_callback_addr_len;   /* request */
+	char *		se_callback_addr_val;   /* request */
+	u32		se_callback_ident;  /* request */
+	clientid_t	se_clientid;        /* response */
+	nfs4_verifier	se_confirm;         /* response */
+};
+
+struct nfsd4_setclientid_confirm {
+	clientid_t	sc_clientid;
+	nfs4_verifier	sc_confirm;
+};
+
+/* also used for NVERIFY */
+struct nfsd4_verify {
+	u32		ve_bmval[3];        /* request */
+	u32		ve_attrlen;         /* request */
+	char *		ve_attrval;         /* request */
+};
+
+struct nfsd4_write {
+	stateid_t	wr_stateid;         /* request */
+	u64		wr_offset;          /* request */
+	u32		wr_stable_how;      /* request */
+	u32		wr_buflen;          /* request */
+	int		wr_vlen;
+
+	u32		wr_bytes_written;   /* response */
+	u32		wr_how_written;     /* response */
+	nfs4_verifier	wr_verifier;        /* response */
+};
+
+struct nfsd4_exchange_id {
+	nfs4_verifier	verifier;
+	struct xdr_netobj clname;
+	u32		flags;
+	clientid_t	clientid;
+	u32		seqid;
+	int		spa_how;
+};
+
+struct nfsd4_sequence {
+	struct nfs4_sessionid	sessionid;		/* request/response */
+	u32			seqid;			/* request/response */
+	u32			slotid;			/* request/response */
+	u32			maxslots;		/* request/response */
+	u32			cachethis;		/* request */
+#if 0
+	u32			target_maxslots;	/* response */
+	u32			status_flags;		/* response */
+#endif /* not yet */
+};
+
+struct nfsd4_destroy_session {
+	struct nfs4_sessionid	sessionid;
+};
+
+struct nfsd4_op {
+	int					opnum;
+	__be32					status;
+	union {
+		struct nfsd4_access		access;
+		struct nfsd4_close		close;
+		struct nfsd4_commit		commit;
+		struct nfsd4_create		create;
+		struct nfsd4_delegreturn	delegreturn;
+		struct nfsd4_getattr		getattr;
+		struct svc_fh *			getfh;
+		struct nfsd4_link		link;
+		struct nfsd4_lock		lock;
+		struct nfsd4_lockt		lockt;
+		struct nfsd4_locku		locku;
+		struct nfsd4_lookup		lookup;
+		struct nfsd4_verify		nverify;
+		struct nfsd4_open		open;
+		struct nfsd4_open_confirm	open_confirm;
+		struct nfsd4_open_downgrade	open_downgrade;
+		struct nfsd4_putfh		putfh;
+		struct nfsd4_read		read;
+		struct nfsd4_readdir		readdir;
+		struct nfsd4_readlink		readlink;
+		struct nfsd4_remove		remove;
+		struct nfsd4_rename		rename;
+		clientid_t			renew;
+		struct nfsd4_secinfo		secinfo;
+		struct nfsd4_setattr		setattr;
+		struct nfsd4_setclientid	setclientid;
+		struct nfsd4_setclientid_confirm setclientid_confirm;
+		struct nfsd4_verify		verify;
+		struct nfsd4_write		write;
+		struct nfsd4_release_lockowner	release_lockowner;
+
+		/* NFSv4.1 */
+		struct nfsd4_exchange_id	exchange_id;
+		struct nfsd4_create_session	create_session;
+		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_sequence		sequence;
+	} u;
+	struct nfs4_replay *			replay;
+};
+
+struct nfsd4_compoundargs {
+	/* scratch variables for XDR decode */
+	__be32 *			p;
+	__be32 *			end;
+	struct page **			pagelist;
+	int				pagelen;
+	__be32				tmp[8];
+	__be32 *			tmpp;
+	struct tmpbuf {
+		struct tmpbuf *next;
+		void (*release)(const void *);
+		void *buf;
+	}				*to_free;
+
+	struct svc_rqst			*rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				minorversion;
+	u32				opcnt;
+	struct nfsd4_op			*ops;
+	struct nfsd4_op			iops[8];
+};
+
+struct nfsd4_compoundres {
+	/* scratch variables for XDR encode */
+	__be32 *			p;
+	__be32 *			end;
+	struct xdr_buf *		xbuf;
+	struct svc_rqst *		rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				opcnt;
+	__be32 *			tagp; /* tag, opcount encode location */
+	struct nfsd4_compound_state	cstate;
+};
+
+static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
+{
+	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
+}
+
+static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+{
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
+}
+
+#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
+
+static inline void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
+	cinfo->atomic = 1;
+	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+	if (cinfo->change_supported) {
+		cinfo->before_change = fhp->fh_pre_change;
+		cinfo->after_change = fhp->fh_post_change;
+	} else {
+		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+	}
+}
+
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundargs *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundres *);
+void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+		       struct dentry *dentry, __be32 *buffer, int *countp,
+		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid *setclid);
+extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid_confirm *setclientid_confirm);
+extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
+extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+		struct nfsd4_sequence *seq);
+extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+struct nfsd4_exchange_id *);
+		extern __be32 nfsd4_create_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_create_session *);
+extern __be32 nfsd4_sequence(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_sequence *);
+extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_destroy_session *);
+extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+		struct nfsd4_open *open);
+extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+		struct svc_fh *current_fh, struct nfsd4_open *open);
+extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_close *close);
+extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_open_downgrade *od);
+extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		struct nfsd4_lock *lock);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_lockt *lockt);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_locku *locku);
+extern __be32
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_release_lockowner *rlockowner);
+extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
+			  struct nfsd4_compound_state *, clientid_t *clid);
+#endif
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
deleted file mode 100644
index a165425dea41..000000000000
--- a/include/linux/nfsd/cache.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * include/linux/nfsd/cache.h
- *
- * Request reply cache. This was heavily inspired by the
- * implementation in 4.3BSD/4.4BSD.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef NFSCACHE_H
-#define NFSCACHE_H
-
-#include <linux/sunrpc/svc.h>
-
-/*
- * Representation of a reply cache entry.
- */
-struct svc_cacherep {
-	struct hlist_node	c_hash;
-	struct list_head	c_lru;
-
-	unsigned char		c_state,	/* unused, inprog, done */
-				c_type,		/* status, buffer */
-				c_secure : 1;	/* req came from port < 1024 */
-	struct sockaddr_in	c_addr;
-	__be32			c_xid;
-	u32			c_prot;
-	u32			c_proc;
-	u32			c_vers;
-	unsigned long		c_timestamp;
-	union {
-		struct kvec	u_vec;
-		__be32		u_status;
-	}			c_u;
-};
-
-#define c_replvec		c_u.u_vec
-#define c_replstat		c_u.u_status
-
-/* cache entry states */
-enum {
-	RC_UNUSED,
-	RC_INPROG,
-	RC_DONE
-};
-
-/* return values */
-enum {
-	RC_DROPIT,
-	RC_REPLY,
-	RC_DOIT,
-	RC_INTR
-};
-
-/*
- * Cache types.
- * We may want to add more types one day, e.g. for diropres and
- * attrstat replies. Using cache entries with fixed length instead
- * of buffer pointers may be more efficient.
- */
-enum {
-	RC_NOCACHE,
-	RC_REPLSTAT,
-	RC_REPLBUFF,
-};
-
-/*
- * If requests are retransmitted within this interval, they're dropped.
- */
-#define RC_DELAY		(HZ/5)
-
-int	nfsd_reply_cache_init(void);
-void	nfsd_reply_cache_shutdown(void);
-int	nfsd_cache_lookup(struct svc_rqst *, int);
-void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-
-#ifdef CONFIG_NFSD_V4
-void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
-#else  /* CONFIG_NFSD_V4 */
-static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-}
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* NFSCACHE_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
deleted file mode 100644
index 74f67c2aca34..000000000000
--- a/include/linux/nfsd/nfsd.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * linux/include/linux/nfsd/nfsd.h
- *
- * Hodge-podge collection of knfsd-related stuff.
- * I will sort this out later.
- *
- * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_NFSD_NFSD_H
-#define LINUX_NFSD_NFSD_H
-
-#include <linux/types.h>
-#include <linux/mount.h>
-
-#include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
-#include <linux/nfsd/stats.h>
-/*
- * nfsd version
- */
-#define NFSD_SUPPORTED_MINOR_VERSION	1
-
-struct readdir_cd {
-	__be32			err;	/* 0, nfserr, or nfserr_eof */
-};
-
-
-extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
-				nfsd_version4;
-extern u32			nfsd_supported_minorversion;
-extern struct mutex		nfsd_mutex;
-extern struct svc_serv		*nfsd_serv;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_mem;
-extern unsigned int		nfsd_drc_mem_used;
-
-extern const struct seq_operations nfs_exports_op;
-
-/*
- * Function prototypes.
- */
-int		nfsd_svc(unsigned short port, int nrservs);
-int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
-
-int		nfsd_nrthreads(void);
-int		nfsd_nrpools(void);
-int		nfsd_get_nrthreads(int n, int *);
-int		nfsd_set_nrthreads(int n, int *);
-
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-#ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
-#else
-#define nfsd_acl_version2 NULL
-#endif
-#ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
-#else
-#define nfsd_acl_version3 NULL
-#endif
-#endif
-
-enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
-int nfsd_create_serv(void);
-
-extern int nfsd_max_blksize;
-
-/* 
- * NFSv4 State
- */
-#ifdef CONFIG_NFSD_V4
-extern unsigned int max_delegations;
-int nfs4_state_init(void);
-void nfsd4_free_slabs(void);
-int nfs4_state_start(void);
-void nfs4_state_shutdown(void);
-time_t nfs4_lease_time(void);
-void nfs4_reset_lease(time_t leasetime);
-int nfs4_reset_recoverydir(char *recdir);
-#else
-static inline int nfs4_state_init(void) { return 0; }
-static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { return 0; }
-static inline void nfs4_state_shutdown(void) { }
-static inline time_t nfs4_lease_time(void) { return 0; }
-static inline void nfs4_reset_lease(time_t leasetime) { }
-static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
-#endif
-
-/*
- * lockd binding
- */
-void		nfsd_lockd_init(void);
-void		nfsd_lockd_shutdown(void);
-
-
-/*
- * These macros provide pre-xdr'ed values for faster operation.
- */
-#define	nfs_ok			cpu_to_be32(NFS_OK)
-#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
-#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
-#define	nfserr_io		cpu_to_be32(NFSERR_IO)
-#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
-#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
-#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
-#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
-#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
-#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
-#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
-#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
-#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
-#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
-#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
-#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
-#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
-#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
-#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
-#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
-#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
-#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
-#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
-#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
-#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
-#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
-#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
-#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
-#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
-#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
-#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
-#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
-#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
-#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
-#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
-#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
-#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
-#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
-#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
-#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
-#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
-#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
-#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
-#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
-#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
-#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
-#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
-#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
-#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
-#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
-#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
-#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
-#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
-#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
-#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
-#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
-#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
-#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
-#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
-#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
-#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
-#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
-#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
-#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
-#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
-#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
-#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
-#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
-#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
-#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
-#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
-#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
-#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
-#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
-#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
-#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
-#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
-#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
-#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
-#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
-#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
-#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
-#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
-#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
-#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
-#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
-#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
-#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
-#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
-#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
-#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
-#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
-#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
-#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
-#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
-#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
-#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
-#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
-#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
-
-/* error codes for internal use */
-/* if a request fails due to kmalloc failure, it gets dropped.
- *  Client should resend eventually
- */
-#define	nfserr_dropit		cpu_to_be32(30000)
-/* end-of-file indicator in readdir */
-#define	nfserr_eof		cpu_to_be32(30001)
-/* replay detected */
-#define	nfserr_replay_me	cpu_to_be32(11001)
-/* nfs41 replay detected */
-#define	nfserr_replay_cache	cpu_to_be32(11002)
-
-/* Check for dir entries '.' and '..' */
-#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
-
-/*
- * Time of server startup
- */
-extern struct timeval	nfssvc_boot;
-
-#ifdef CONFIG_NFSD_V4
-
-/* before processing a COMPOUND operation, we have to check that there
- * is enough space in the buffer for XDR encode to succeed.  otherwise,
- * we might process an operation with side effects, and be unable to
- * tell the client that the operation succeeded.
- *
- * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
- * READ, READDIR, and READLINK have their own buffer checks.)  if we
- * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
- *
- * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
- * care is taken to ensure that we never fall below this level for any
- * reason.
- */
-#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
-#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
-
-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
-#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
-
-/*
- * The following attributes are currently not supported by the NFSv4 server:
- *    ARCHIVE       (deprecated anyway)
- *    HIDDEN        (unlikely to be supported any time soon)
- *    MIMETYPE      (unlikely to be supported any time soon)
- *    QUOTA_*       (will be supported in a forthcoming patch)
- *    SYSTEM        (unlikely to be supported any time soon)
- *    TIME_BACKUP   (unlikely to be supported any time soon)
- *    TIME_CREATE   (unlikely to be supported any time soon)
- */
-#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
-(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
- | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
- | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
- | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
- | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
- | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
- | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
- | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
- | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
- | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
- | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
- | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
- | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
- | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
- | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD2 0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
-	NFSD4_SUPPORTED_ATTRS_WORD0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
-	NFSD4_SUPPORTED_ATTRS_WORD1
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
-
-static inline u32 nfsd_suppattrs0(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
-			    : NFSD4_SUPPORTED_ATTRS_WORD0;
-}
-
-static inline u32 nfsd_suppattrs1(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
-			    : NFSD4_SUPPORTED_ATTRS_WORD1;
-}
-
-static inline u32 nfsd_suppattrs2(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
-			    : NFSD4_SUPPORTED_ATTRS_WORD2;
-}
-
-/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1							    \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
-
-/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
-#define NFSD_WRITEABLE_ATTRS_WORD2 0
-
-#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
-	NFSD_WRITEABLE_ATTRS_WORD0
-/*
- * we currently store the exclusive create verifier in the v_{a,m}time
- * attributes so the client can't set these at create time using EXCLUSIVE4_1
- */
-#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
-	(NFSD_WRITEABLE_ATTRS_WORD1 & \
-	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
-#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
-	NFSD_WRITEABLE_ATTRS_WORD2
-
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* LINUX_NFSD_NFSD_H */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
deleted file mode 100644
index 2af75686e0d3..000000000000
--- a/include/linux/nfsd/state.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- *  linux/include/nfsd/state.h
- *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson <andros@umich.edu>
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _NFSD4_STATE_H
-#define _NFSD4_STATE_H
-
-#include <linux/nfsd/nfsfh.h>
-
-typedef struct {
-	u32             cl_boot;
-	u32             cl_id;
-} clientid_t;
-
-typedef struct {
-	u32             so_boot;
-	u32             so_stateownerid;
-	u32             so_fileid;
-} stateid_opaque_t;
-
-typedef struct {
-	u32                     si_generation;
-	stateid_opaque_t        si_opaque;
-} stateid_t;
-#define si_boot           si_opaque.so_boot
-#define si_stateownerid   si_opaque.so_stateownerid
-#define si_fileid         si_opaque.so_fileid
-
-#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
-#define STATEID_VAL(s) \
-	(s)->si_boot, \
-	(s)->si_stateownerid, \
-	(s)->si_fileid, \
-	(s)->si_generation
-
-struct nfsd4_cb_sequence {
-	/* args/res */
-	u32			cbs_minorversion;
-	struct nfs4_client	*cbs_clp;
-};
-
-struct nfs4_delegation {
-	struct list_head	dl_perfile;
-	struct list_head	dl_perclnt;
-	struct list_head	dl_recall_lru;  /* delegation recalled */
-	atomic_t		dl_count;       /* ref count */
-	struct nfs4_client	*dl_client;
-	struct nfs4_file	*dl_file;
-	struct file_lock	*dl_flock;
-	struct file		*dl_vfs_file;
-	u32			dl_type;
-	time_t			dl_time;
-/* For recall: */
-	u32			dl_ident;
-	stateid_t		dl_stateid;
-	struct knfsd_fh		dl_fh;
-	int			dl_retries;
-};
-
-/* client delegation callback info */
-struct nfs4_cb_conn {
-	/* SETCLIENTID info */
-	struct sockaddr_storage	cb_addr;
-	size_t			cb_addrlen;
-	u32                     cb_prog;
-	u32			cb_minorversion;
-	u32                     cb_ident;	/* minorversion 0 only */
-	/* RPC client info */
-	atomic_t		cb_set;     /* successful CB_NULL call */
-	struct rpc_clnt *       cb_client;
-};
-
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND	16
-/* Maximum  session per slot cache size */
-#define NFSD_SLOT_CACHE_SIZE		1024
-/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
-#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
-#define NFSD_MAX_MEM_PER_SESSION  \
-		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
-
-struct nfsd4_slot {
-	bool	sl_inuse;
-	bool	sl_cachethis;
-	u16	sl_opcnt;
-	u32	sl_seqid;
-	__be32	sl_status;
-	u32	sl_datalen;
-	char	sl_data[];
-};
-
-struct nfsd4_channel_attrs {
-	u32		headerpadsz;
-	u32		maxreq_sz;
-	u32		maxresp_sz;
-	u32		maxresp_cached;
-	u32		maxops;
-	u32		maxreqs;
-	u32		nr_rdma_attrs;
-	u32		rdma_attrs;
-};
-
-struct nfsd4_create_session {
-	clientid_t			clientid;
-	struct nfs4_sessionid		sessionid;
-	u32				seqid;
-	u32				flags;
-	struct nfsd4_channel_attrs	fore_channel;
-	struct nfsd4_channel_attrs	back_channel;
-	u32				callback_prog;
-	u32				uid;
-	u32				gid;
-};
-
-/* The single slot clientid cache structure */
-struct nfsd4_clid_slot {
-	u32				sl_seqid;
-	__be32				sl_status;
-	struct nfsd4_create_session	sl_cr_ses;
-};
-
-struct nfsd4_session {
-	struct kref		se_ref;
-	struct list_head	se_hash;	/* hash by sessionid */
-	struct list_head	se_perclnt;
-	u32			se_flags;
-	struct nfs4_client	*se_client;	/* for expire_client */
-	struct nfs4_sessionid	se_sessionid;
-	struct nfsd4_channel_attrs se_fchannel;
-	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
-};
-
-static inline void
-nfsd4_put_session(struct nfsd4_session *ses)
-{
-	extern void free_session(struct kref *kref);
-	kref_put(&ses->se_ref, free_session);
-}
-
-static inline void
-nfsd4_get_session(struct nfsd4_session *ses)
-{
-	kref_get(&ses->se_ref);
-}
-
-/* formatted contents of nfs4_sessionid */
-struct nfsd4_sessionid {
-	clientid_t	clientid;
-	u32		sequence;
-	u32		reserved;
-};
-
-#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
-
-/*
- * struct nfs4_client - one per client.  Clientids live here.
- * 	o Each nfs4_client is hashed by clientid.
- *
- * 	o Each nfs4_clients is also hashed by name 
- * 	  (the opaque quantity initially sent by the client to identify itself).
- * 	  
- *	o cl_perclient list is used to ensure no dangling stateowner references
- *	  when we expire the nfs4_client
- */
-struct nfs4_client {
-	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
-	struct list_head	cl_strhash; 	/* hash by cl_name */
-	struct list_head	cl_openowners;
-	struct list_head	cl_delegations;
-	struct list_head        cl_lru;         /* tail queue */
-	struct xdr_netobj	cl_name; 	/* id generated by client */
-	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
-	nfs4_verifier		cl_verifier; 	/* generated by client */
-	time_t                  cl_time;        /* time of last lease renewal */
-	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
-	u32			cl_flavor;	/* setclientid pseudoflavor */
-	char			*cl_principal;	/* setclientid principal name */
-	struct svc_cred		cl_cred; 	/* setclientid principal */
-	clientid_t		cl_clientid;	/* generated by server */
-	nfs4_verifier		cl_confirm;	/* generated by server */
-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
-	atomic_t		cl_count;	/* ref count */
-	u32			cl_firststate;	/* recovery dir creation */
-
-	/* for nfs41 */
-	struct list_head	cl_sessions;
-	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
-	u32			cl_exchange_flags;
-	struct nfs4_sessionid	cl_sessionid;
-
-	/* for nfs41 callbacks */
-	/* We currently support a single back channel with a single slot */
-	unsigned long		cl_cb_slot_busy;
-	u32			cl_cb_seq_nr;
-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
-	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
-						/* wait here for slots */
-};
-
-/* struct nfs4_client_reset
- * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
- * upon lease reset, or from upcall to state_daemon (to read in state
- * from non-volitile storage) upon reboot.
- */
-struct nfs4_client_reclaim {
-	struct list_head	cr_strhash;	/* hash by cr_name */
-	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
-};
-
-static inline void
-update_stateid(stateid_t *stateid)
-{
-	stateid->si_generation++;
-}
-
-/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
- * The OPEN response, typically the largest, requires 
- *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
- *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
- *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
- */
-
-#define NFSD4_REPLAY_ISIZE       112 
-
-/*
- * Replay buffer, where the result of the last seqid-mutating operation 
- * is cached. 
- */
-struct nfs4_replay {
-	__be32			rp_status;
-	unsigned int		rp_buflen;
-	char			*rp_buf;
-	unsigned		intrp_allocated;
-	struct knfsd_fh		rp_openfh;
-	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
-};
-
-/*
-* nfs4_stateowner can either be an open_owner, or a lock_owner
-*
-*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
-*         for lock_owner
-*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
-*         for lock_owner
-*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
-*         struct is reaped.
-*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
-*         and is used to ensure no dangling nfs4_stateid references when we 
-*         release a stateowner.
-*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
-*         close is called to reap associated byte-range locks
-*    so_close_lru: (open) stateowner is placed on this list instead of being
-*         reaped (when so_perfilestate is empty) to hold the last close replay.
-*         reaped by laundramat thread after lease period.
-*/
-struct nfs4_stateowner {
-	struct kref		so_ref;
-	struct list_head        so_idhash;   /* hash by so_id */
-	struct list_head        so_strhash;   /* hash by op_name */
-	struct list_head        so_perclient;
-	struct list_head        so_stateids;
-	struct list_head        so_perstateid; /* for lockowners only */
-	struct list_head	so_close_lru; /* tail queue */
-	time_t			so_time; /* time of placement on so_close_lru */
-	int			so_is_open_owner; /* 1=openowner,0=lockowner */
-	u32                     so_id;
-	struct nfs4_client *    so_client;
-	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
-	 * sequence id expected from the client: */
-	u32                     so_seqid;
-	struct xdr_netobj       so_owner;     /* open owner name */
-	int                     so_confirmed; /* successful OPEN_CONFIRM? */
-	struct nfs4_replay	so_replay;
-};
-
-/*
-*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
-*    o fi_perfile list is used to search for conflicting 
-*      share_acces, share_deny on the file.
-*/
-struct nfs4_file {
-	atomic_t		fi_ref;
-	struct list_head        fi_hash;    /* hash by "struct inode *" */
-	struct list_head        fi_stateids;
-	struct list_head	fi_delegations;
-	struct inode		*fi_inode;
-	u32                     fi_id;      /* used with stateowner->so_id 
-					     * for stateid_hashtbl hash */
-	bool			fi_had_conflict;
-};
-
-/*
-* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
-*
-* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
-*
-* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
-* 	st_perfile: file_hashtbl[] entry.
-* 	st_perfile_state: nfs4_stateowner->so_perfilestate
-*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
-* 	st_access_bmap: used only for open stateid
-* 	st_deny_bmap: used only for open stateid
-*	st_openstp: open stateid lock stateid was derived from
-*
-* XXX: open stateids and lock stateids have diverged sufficiently that
-* we should consider defining separate structs for the two cases.
-*/
-
-struct nfs4_stateid {
-	struct list_head              st_hash; 
-	struct list_head              st_perfile;
-	struct list_head              st_perstateowner;
-	struct list_head              st_lockowners;
-	struct nfs4_stateowner      * st_stateowner;
-	struct nfs4_file            * st_file;
-	stateid_t                     st_stateid;
-	struct file                 * st_vfs_file;
-	unsigned long                 st_access_bmap;
-	unsigned long                 st_deny_bmap;
-	struct nfs4_stateid         * st_openstp;
-};
-
-/* flags for preprocess_seqid_op() */
-#define HAS_SESSION             0x00000001
-#define CONFIRM                 0x00000002
-#define OPEN_STATE              0x00000004
-#define LOCK_STATE              0x00000008
-#define RD_STATE	        0x00000010
-#define WR_STATE	        0x00000020
-#define CLOSE_STATE             0x00000040
-
-#define seqid_mutating_err(err)                       \
-	(((err) != nfserr_stale_clientid) &&    \
-	((err) != nfserr_bad_seqid) &&          \
-	((err) != nfserr_stale_stateid) &&      \
-	((err) != nfserr_bad_stateid))
-
-struct nfsd4_compound_state;
-
-extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
-		stateid_t *stateid, int flags, struct file **filp);
-extern void nfs4_lock_state(void);
-extern void nfs4_unlock_state(void);
-extern int nfs4_in_grace(void);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
-extern void put_nfs4_client(struct nfs4_client *clp);
-extern void nfs4_free_stateowner(struct kref *kref);
-extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp);
-extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
-extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern void nfsd4_init_recdir(char *recdir_name);
-extern int nfsd4_recdir_load(void);
-extern void nfsd4_shutdown_recdir(void);
-extern int nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
-extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
-extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
-
-static inline void
-nfs4_put_stateowner(struct nfs4_stateowner *so)
-{
-	kref_put(&so->so_ref, nfs4_free_stateowner);
-}
-
-static inline void
-nfs4_get_stateowner(struct nfs4_stateowner *so)
-{
-	kref_get(&so->so_ref);
-}
-
-#endif   /* NFSD4_STATE_H */
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
deleted file mode 100644
index 58f824d854c2..000000000000
--- a/include/linux/nfsd/xdr.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr.h
- *
- * XDR types for nfsd. This is mainly a typing exercise.
- */
-
-#ifndef LINUX_NFSD_H
-#define LINUX_NFSD_H
-
-#include <linux/vfs.h>
-#include <linux/nfsd/nfsd.h>
-
-struct nfsd_fhandle {
-	struct svc_fh		fh;
-};
-
-struct nfsd_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-};
-
-struct nfsd_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd_readargs {
-	struct svc_fh		fh;
-	__u32			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd_writeargs {
-	svc_fh			fh;
-	__u32			offset;
-	int			len;
-	int			vlen;
-};
-
-struct nfsd_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	struct iattr		attrs;
-};
-
-struct nfsd_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-	
-struct nfsd_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd_readdirargs {
-	struct svc_fh		fh;
-	__u32			cookie;
-	__u32			count;
-	__be32 *		buffer;
-};
-
-struct nfsd_attrstat {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_diropres  {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_readlinkres {
-	int			len;
-};
-
-struct nfsd_readres {
-	struct svc_fh		fh;
-	unsigned long		count;
-	struct kstat		stat;
-};
-
-struct nfsd_readdirres {
-	int			count;
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd_statfsres {
-	struct kstatfs		stats;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd_xdrstore {
-	struct nfsd_sattrargs	sattr;
-	struct nfsd_diropargs	dirop;
-	struct nfsd_readargs	read;
-	struct nfsd_writeargs	write;
-	struct nfsd_createargs	create;
-	struct nfsd_renameargs	rename;
-	struct nfsd_linkargs	link;
-	struct nfsd_symlinkargs	symlink;
-	struct nfsd_readdirargs	readdir;
-};
-
-#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
-
-
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
-
-int nfssvc_encode_entry(void *, const char *name,
-			int namlen, loff_t offset, u64 ino, unsigned int);
-
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-
-/* Helper functions for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
-__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-#endif /* LINUX_NFSD_H */
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
deleted file mode 100644
index 421eddd65a25..000000000000
--- a/include/linux/nfsd/xdr3.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr3.h
- *
- * XDR types for NFSv3 in nfsd.
- *
- * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef _LINUX_NFSD_XDR3_H
-#define _LINUX_NFSD_XDR3_H
-
-#include <linux/nfsd/xdr.h>
-
-struct nfsd3_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-	int			check_guard;
-	time_t			guardtime;
-};
-
-struct nfsd3_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd3_accessargs {
-	struct svc_fh		fh;
-	unsigned int		access;
-};
-
-struct nfsd3_readargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd3_writeargs {
-	svc_fh			fh;
-	__u64			offset;
-	__u32			count;
-	int			stable;
-	__u32			len;
-	int			vlen;
-};
-
-struct nfsd3_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	int			createmode;
-	struct iattr		attrs;
-	__be32 *		verf;
-};
-
-struct nfsd3_mknodargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	__u32			ftype;
-	__u32			major, minor;
-	struct iattr		attrs;
-};
-
-struct nfsd3_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-
-struct nfsd3_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd3_readdirargs {
-	struct svc_fh		fh;
-	__u64			cookie;
-	__u32			dircount;
-	__u32			count;
-	__be32 *		verf;
-	__be32 *		buffer;
-};
-
-struct nfsd3_commitargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-};
-
-struct nfsd3_getaclargs {
-	struct svc_fh		fh;
-	int			mask;
-};
-
-struct posix_acl;
-struct nfsd3_setaclargs {
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-struct nfsd3_attrstat {
-	__be32			status;
-	struct svc_fh		fh;
-	struct kstat            stat;
-};
-
-/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
-struct nfsd3_diropres  {
-	__be32			status;
-	struct svc_fh		dirfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_accessres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			access;
-};
-
-struct nfsd3_readlinkres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			len;
-};
-
-struct nfsd3_readres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			eof;
-};
-
-struct nfsd3_writeres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			committed;
-};
-
-struct nfsd3_renameres {
-	__be32			status;
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-};
-
-struct nfsd3_linkres {
-	__be32			status;
-	struct svc_fh		tfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_readdirres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			count;
-	__be32			verf[2];
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-	__be32 *		offset1;
-	struct svc_rqst *	rqstp;
-
-};
-
-struct nfsd3_fsstatres {
-	__be32			status;
-	struct kstatfs		stats;
-	__u32			invarsec;
-};
-
-struct nfsd3_fsinfores {
-	__be32			status;
-	__u32			f_rtmax;
-	__u32			f_rtpref;
-	__u32			f_rtmult;
-	__u32			f_wtmax;
-	__u32			f_wtpref;
-	__u32			f_wtmult;
-	__u32			f_dtpref;
-	__u64			f_maxfilesize;
-	__u32			f_properties;
-};
-
-struct nfsd3_pathconfres {
-	__be32			status;
-	__u32			p_link_max;
-	__u32			p_name_max;
-	__u32			p_no_trunc;
-	__u32			p_chown_restricted;
-	__u32			p_case_insensitive;
-	__u32			p_case_preserving;
-};
-
-struct nfsd3_commitres {
-	__be32			status;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_getaclres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-/* dummy type for release */
-struct nfsd3_fhandle_pair {
-	__u32			dummy;
-	struct svc_fh		fh1;
-	struct svc_fh		fh2;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd3_xdrstore {
-	struct nfsd3_sattrargs		sattrargs;
-	struct nfsd3_diropargs		diropargs;
-	struct nfsd3_readargs		readargs;
-	struct nfsd3_writeargs		writeargs;
-	struct nfsd3_createargs		createargs;
-	struct nfsd3_renameargs		renameargs;
-	struct nfsd3_linkargs		linkargs;
-	struct nfsd3_symlinkargs	symlinkargs;
-	struct nfsd3_readdirargs	readdirargs;
-	struct nfsd3_diropres 		diropres;
-	struct nfsd3_accessres		accessres;
-	struct nfsd3_readlinkres	readlinkres;
-	struct nfsd3_readres		readres;
-	struct nfsd3_writeres		writeres;
-	struct nfsd3_renameres		renameres;
-	struct nfsd3_linkres		linkres;
-	struct nfsd3_readdirres		readdirres;
-	struct nfsd3_fsstatres		fsstatres;
-	struct nfsd3_fsinfores		fsinfores;
-	struct nfsd3_pathconfres	pathconfres;
-	struct nfsd3_commitres		commitres;
-	struct nfsd3_getaclres		getaclres;
-};
-
-#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
-
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
-
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
-int nfs3svc_encode_entry(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-int nfs3svc_encode_entry_plus(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-/* Helper functions for NFSv3 ACL code */
-__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
-				struct svc_fh *fhp);
-__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-
-#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
deleted file mode 100644
index 1bf266239c7e..000000000000
--- a/include/linux/nfsd/xdr4.h
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  include/linux/nfsd/xdr4.h
- *
- *  Server-side types for NFSv4.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson   <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _LINUX_NFSD_XDR4_H
-#define _LINUX_NFSD_XDR4_H
-
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/nfsd.h>
-
-#define NFSD4_MAX_TAGLEN	128
-#define XDR_LEN(n)                     (((n) + 3) & ~3)
-
-struct nfsd4_compound_state {
-	struct svc_fh		current_fh;
-	struct svc_fh		save_fh;
-	struct nfs4_stateowner	*replay_owner;
-	/* For sessions DRC */
-	struct nfsd4_session	*session;
-	struct nfsd4_slot	*slot;
-	__be32			*datap;
-	size_t			iovlen;
-	u32			minorversion;
-	u32			status;
-};
-
-static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
-{
-	return cs->slot != NULL;
-}
-
-struct nfsd4_change_info {
-	u32		atomic;
-	bool		change_supported;
-	u32		before_ctime_sec;
-	u32		before_ctime_nsec;
-	u64		before_change;
-	u32		after_ctime_sec;
-	u32		after_ctime_nsec;
-	u64		after_change;
-};
-
-struct nfsd4_access {
-	u32		ac_req_access;      /* request */
-	u32		ac_supported;       /* response */
-	u32		ac_resp_access;     /* response */
-};
-
-struct nfsd4_close {
-	u32		cl_seqid;           /* request */
-	stateid_t	cl_stateid;         /* request+response */
-	struct nfs4_stateowner * cl_stateowner;	/* response */
-};
-
-struct nfsd4_commit {
-	u64		co_offset;          /* request */
-	u32		co_count;           /* request */
-	nfs4_verifier	co_verf;            /* response */
-};
-
-struct nfsd4_create {
-	u32		cr_namelen;         /* request */
-	char *		cr_name;            /* request */
-	u32		cr_type;            /* request */
-	union {                             /* request */
-		struct {
-			u32 namelen;
-			char *name;
-		} link;   /* NF4LNK */
-		struct {
-			u32 specdata1;
-			u32 specdata2;
-		} dev;    /* NF4BLK, NF4CHR */
-	} u;
-	u32		cr_bmval[3];        /* request */
-	struct iattr	cr_iattr;           /* request */
-	struct nfsd4_change_info  cr_cinfo; /* response */
-	struct nfs4_acl *cr_acl;
-};
-#define cr_linklen	u.link.namelen
-#define cr_linkname	u.link.name
-#define cr_specdata1	u.dev.specdata1
-#define cr_specdata2	u.dev.specdata2
-
-struct nfsd4_delegreturn {
-	stateid_t	dr_stateid;
-};
-
-struct nfsd4_getattr {
-	u32		ga_bmval[3];        /* request */
-	struct svc_fh	*ga_fhp;            /* response */
-};
-
-struct nfsd4_link {
-	u32		li_namelen;         /* request */
-	char *		li_name;            /* request */
-	struct nfsd4_change_info  li_cinfo; /* response */
-};
-
-struct nfsd4_lock_denied {
-	clientid_t	ld_clientid;
-	struct nfs4_stateowner   *ld_sop;
-	u64             ld_start;
-	u64             ld_length;
-	u32             ld_type;
-};
-
-struct nfsd4_lock {
-	/* request */
-	u32             lk_type;
-	u32             lk_reclaim;         /* boolean */
-	u64             lk_offset;
-	u64             lk_length;
-	u32             lk_is_new;
-	union {
-		struct {
-			u32             open_seqid;
-			stateid_t       open_stateid;
-			u32             lock_seqid;
-			clientid_t      clientid;
-			struct xdr_netobj owner;
-		} new;
-		struct {
-			stateid_t       lock_stateid;
-			u32             lock_seqid;
-		} old;
-	} v;
-
-	/* response */
-	union {
-		struct {
-			stateid_t               stateid;
-		} ok;
-		struct nfsd4_lock_denied        denied;
-	} u;
-	/* The lk_replay_owner is the open owner in the open_to_lock_owner
-	 * case and the lock owner otherwise: */
-	struct nfs4_stateowner *lk_replay_owner;
-};
-#define lk_new_open_seqid       v.new.open_seqid
-#define lk_new_open_stateid     v.new.open_stateid
-#define lk_new_lock_seqid       v.new.lock_seqid
-#define lk_new_clientid         v.new.clientid
-#define lk_new_owner            v.new.owner
-#define lk_old_lock_stateid     v.old.lock_stateid
-#define lk_old_lock_seqid       v.old.lock_seqid
-
-#define lk_rflags       u.ok.rflags
-#define lk_resp_stateid u.ok.stateid
-#define lk_denied       u.denied
-
-
-struct nfsd4_lockt {
-	u32				lt_type;
-	clientid_t			lt_clientid;
-	struct xdr_netobj		lt_owner;
-	u64				lt_offset;
-	u64				lt_length;
-	struct nfs4_stateowner * 	lt_stateowner;
-	struct nfsd4_lock_denied  	lt_denied;
-};
-
- 
-struct nfsd4_locku {
-	u32             lu_type;
-	u32             lu_seqid;
-	stateid_t       lu_stateid;
-	u64             lu_offset;
-	u64             lu_length;
-	struct nfs4_stateowner  *lu_stateowner;
-};
-
-
-struct nfsd4_lookup {
-	u32		lo_len;             /* request */
-	char *		lo_name;            /* request */
-};
-
-struct nfsd4_putfh {
-	u32		pf_fhlen;           /* request */
-	char		*pf_fhval;          /* request */
-};
-
-struct nfsd4_open {
-	u32		op_claim_type;      /* request */
-	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
-	u32		op_delegate_type;   /* request - CLAIM_PREV only */
-	stateid_t       op_delegate_stateid; /* request - response */
-	u32		op_create;     	    /* request */
-	u32		op_createmode;      /* request */
-	u32		op_bmval[3];        /* request */
-	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
-	nfs4_verifier	verf;               /* EXCLUSIVE4 */
-	clientid_t	op_clientid;        /* request */
-	struct xdr_netobj op_owner;           /* request */
-	u32		op_seqid;           /* request */
-	u32		op_share_access;    /* request */
-	u32		op_share_deny;      /* request */
-	stateid_t	op_stateid;         /* response */
-	u32		op_recall;          /* recall */
-	struct nfsd4_change_info  op_cinfo; /* response */
-	u32		op_rflags;          /* response */
-	int		op_truncate;        /* used during processing */
-	struct nfs4_stateowner *op_stateowner; /* used during processing */
-	struct nfs4_acl *op_acl;
-};
-#define op_iattr	iattr
-#define op_verf		verf
-
-struct nfsd4_open_confirm {
-	stateid_t	oc_req_stateid		/* request */;
-	u32		oc_seqid    		/* request */;
-	stateid_t	oc_resp_stateid		/* response */;
-	struct nfs4_stateowner * oc_stateowner;	/* response */
-};
-
-struct nfsd4_open_downgrade {
-	stateid_t       od_stateid;
-	u32             od_seqid;
-	u32             od_share_access;
-	u32             od_share_deny;
-	struct nfs4_stateowner *od_stateowner;
-};
-
-
-struct nfsd4_read {
-	stateid_t	rd_stateid;         /* request */
-	u64		rd_offset;          /* request */
-	u32		rd_length;          /* request */
-	int		rd_vlen;
-	struct file     *rd_filp;
-	
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-};
-
-struct nfsd4_readdir {
-	u64		rd_cookie;          /* request */
-	nfs4_verifier	rd_verf;            /* request */
-	u32		rd_dircount;        /* request */
-	u32		rd_maxcount;        /* request */
-	u32		rd_bmval[3];        /* request */
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd4_release_lockowner {
-	clientid_t        rl_clientid;
-	struct xdr_netobj rl_owner;
-};
-struct nfsd4_readlink {
-	struct svc_rqst *rl_rqstp;          /* request */
-	struct svc_fh *	rl_fhp;             /* request */
-};
-
-struct nfsd4_remove {
-	u32		rm_namelen;         /* request */
-	char *		rm_name;            /* request */
-	struct nfsd4_change_info  rm_cinfo; /* response */
-};
-
-struct nfsd4_rename {
-	u32		rn_snamelen;        /* request */
-	char *		rn_sname;           /* request */
-	u32		rn_tnamelen;        /* request */
-	char *		rn_tname;           /* request */
-	struct nfsd4_change_info  rn_sinfo; /* response */
-	struct nfsd4_change_info  rn_tinfo; /* response */
-};
-
-struct nfsd4_secinfo {
-	u32 si_namelen;					/* request */
-	char *si_name;					/* request */
-	struct svc_export *si_exp;			/* response */
-};
-
-struct nfsd4_setattr {
-	stateid_t	sa_stateid;         /* request */
-	u32		sa_bmval[3];        /* request */
-	struct iattr	sa_iattr;           /* request */
-	struct nfs4_acl *sa_acl;
-};
-
-struct nfsd4_setclientid {
-	nfs4_verifier	se_verf;            /* request */
-	u32		se_namelen;         /* request */
-	char *		se_name;            /* request */
-	u32		se_callback_prog;   /* request */
-	u32		se_callback_netid_len;  /* request */
-	char *		se_callback_netid_val;  /* request */
-	u32		se_callback_addr_len;   /* request */
-	char *		se_callback_addr_val;   /* request */
-	u32		se_callback_ident;  /* request */
-	clientid_t	se_clientid;        /* response */
-	nfs4_verifier	se_confirm;         /* response */
-};
-
-struct nfsd4_setclientid_confirm {
-	clientid_t	sc_clientid;
-	nfs4_verifier	sc_confirm;
-};
-
-/* also used for NVERIFY */
-struct nfsd4_verify {
-	u32		ve_bmval[3];        /* request */
-	u32		ve_attrlen;         /* request */
-	char *		ve_attrval;         /* request */
-};
-
-struct nfsd4_write {
-	stateid_t	wr_stateid;         /* request */
-	u64		wr_offset;          /* request */
-	u32		wr_stable_how;      /* request */
-	u32		wr_buflen;          /* request */
-	int		wr_vlen;
-
-	u32		wr_bytes_written;   /* response */
-	u32		wr_how_written;     /* response */
-	nfs4_verifier	wr_verifier;        /* response */
-};
-
-struct nfsd4_exchange_id {
-	nfs4_verifier	verifier;
-	struct xdr_netobj clname;
-	u32		flags;
-	clientid_t	clientid;
-	u32		seqid;
-	int		spa_how;
-};
-
-struct nfsd4_sequence {
-	struct nfs4_sessionid	sessionid;		/* request/response */
-	u32			seqid;			/* request/response */
-	u32			slotid;			/* request/response */
-	u32			maxslots;		/* request/response */
-	u32			cachethis;		/* request */
-#if 0
-	u32			target_maxslots;	/* response */
-	u32			status_flags;		/* response */
-#endif /* not yet */
-};
-
-struct nfsd4_destroy_session {
-	struct nfs4_sessionid	sessionid;
-};
-
-struct nfsd4_op {
-	int					opnum;
-	__be32					status;
-	union {
-		struct nfsd4_access		access;
-		struct nfsd4_close		close;
-		struct nfsd4_commit		commit;
-		struct nfsd4_create		create;
-		struct nfsd4_delegreturn	delegreturn;
-		struct nfsd4_getattr		getattr;
-		struct svc_fh *			getfh;
-		struct nfsd4_link		link;
-		struct nfsd4_lock		lock;
-		struct nfsd4_lockt		lockt;
-		struct nfsd4_locku		locku;
-		struct nfsd4_lookup		lookup;
-		struct nfsd4_verify		nverify;
-		struct nfsd4_open		open;
-		struct nfsd4_open_confirm	open_confirm;
-		struct nfsd4_open_downgrade	open_downgrade;
-		struct nfsd4_putfh		putfh;
-		struct nfsd4_read		read;
-		struct nfsd4_readdir		readdir;
-		struct nfsd4_readlink		readlink;
-		struct nfsd4_remove		remove;
-		struct nfsd4_rename		rename;
-		clientid_t			renew;
-		struct nfsd4_secinfo		secinfo;
-		struct nfsd4_setattr		setattr;
-		struct nfsd4_setclientid	setclientid;
-		struct nfsd4_setclientid_confirm setclientid_confirm;
-		struct nfsd4_verify		verify;
-		struct nfsd4_write		write;
-		struct nfsd4_release_lockowner	release_lockowner;
-
-		/* NFSv4.1 */
-		struct nfsd4_exchange_id	exchange_id;
-		struct nfsd4_create_session	create_session;
-		struct nfsd4_destroy_session	destroy_session;
-		struct nfsd4_sequence		sequence;
-	} u;
-	struct nfs4_replay *			replay;
-};
-
-struct nfsd4_compoundargs {
-	/* scratch variables for XDR decode */
-	__be32 *			p;
-	__be32 *			end;
-	struct page **			pagelist;
-	int				pagelen;
-	__be32				tmp[8];
-	__be32 *			tmpp;
-	struct tmpbuf {
-		struct tmpbuf *next;
-		void (*release)(const void *);
-		void *buf;
-	}				*to_free;
-
-	struct svc_rqst			*rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				minorversion;
-	u32				opcnt;
-	struct nfsd4_op			*ops;
-	struct nfsd4_op			iops[8];
-};
-
-struct nfsd4_compoundres {
-	/* scratch variables for XDR encode */
-	__be32 *			p;
-	__be32 *			end;
-	struct xdr_buf *		xbuf;
-	struct svc_rqst *		rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				opcnt;
-	__be32 *			tagp; /* tag, opcount encode location */
-	struct nfsd4_compound_state	cstate;
-};
-
-static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
-}
-
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-{
-	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
-}
-
-#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
-	cinfo->atomic = 1;
-	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
-	if (cinfo->change_supported) {
-		cinfo->before_change = fhp->fh_pre_change;
-		cinfo->after_change = fhp->fh_post_change;
-	} else {
-		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-	}
-}
-
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
-void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
-void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		       struct dentry *dentry, __be32 *buffer, int *countp,
-		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
-extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
-extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
-extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
-extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
-		struct nfsd4_sequence *seq);
-extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-struct nfsd4_exchange_id *);
-		extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
-extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
-extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
-		struct nfsd4_open *open);
-extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
-		struct svc_fh *current_fh, struct nfsd4_open *open);
-extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
-extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
-extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
-extern __be32
-nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
-extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
-extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
-#endif
-
-/*
- * Local variables:
- *  c-basic-offset: 8
- * End:
- */
-- 
cgit v1.2.3


From e8e8753f7a32ce4f636771126fc8eba0dc4ad817 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 14 Dec 2009 12:53:32 -0500
Subject: nfsd: new interface to advertise export features

Soon we will add the new V4ROOT flag, and allow the INSECURE flag to
vary by pseudoflavor.  It would be useful for nfs-utils (for example,
for improved exportfs error reporting) to be able to know when this
happens.  Use this new interface for that purpose.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c            | 21 +++++++++++++++++++++
 include/linux/nfsd/export.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0415680d3f58..e7051ac4dc73 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -31,6 +31,7 @@ enum {
 	NFSD_Getfd,
 	NFSD_Getfs,
 	NFSD_List,
+	NFSD_Export_features,
 	NFSD_Fh,
 	NFSD_FO_UnlockIP,
 	NFSD_FO_UnlockFS,
@@ -149,6 +150,24 @@ static const struct file_operations exports_operations = {
 	.owner		= THIS_MODULE,
 };
 
+static int export_features_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
+	return 0;
+}
+
+static int export_features_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, export_features_show, NULL);
+}
+
+static struct file_operations export_features_operations = {
+	.open		= export_features_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
@@ -1306,6 +1325,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+		[NFSD_Export_features] = {"export_features",
+					&export_features_operations, S_IRUGO},
 		[NFSD_FO_UnlockIP] = {"unlock_ip",
 					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index ef3d416fcf67..4f1df1d7312c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,7 +39,8 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
-#define NFSEXP_ALLFLAGS		0xFE3F
+/* All flags that we claim to support.  (Note we don't support NOACL.) */
+#define NFSEXP_ALLFLAGS		0x7E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From 12045a6ee9908b38b6d286530c7d816e39071346 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 8 Dec 2009 18:15:52 -0500
Subject: nfsd: let "insecure" flag vary by pseudoflavor

This was an oversight; it should be among the export flags that can be
allowed to vary by pseudoflavor.  This allows an administrator to (for
example) allow auth_sys mounts only from low ports, but allow auth_krb5
mounts to use any port.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsfh.c             | 4 +++-
 include/linux/nfsd/export.h | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0eb1c59f5ab8..951938d6c495 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -88,8 +88,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
 static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 					  struct svc_export *exp)
 {
+	int flags = nfsexp_flags(rqstp, exp);
+
 	/* Check if the request originated from a secure port. */
-	if (!rqstp->rq_secure && EX_SECURE(exp)) {
+	if (!rqstp->rq_secure && (flags & NFSEXP_INSECURE_PORT)) {
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 		dprintk(KERN_WARNING
 		       "nfsd: request from insecure port %s!\n",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4f1df1d7312c..4cafbe1255f0 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -44,7 +44,8 @@
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-					| NFSEXP_ALLSQUASH)
+					| NFSEXP_ALLSQUASH \
+					| NFSEXP_INSECURE_PORT)
 
 #ifdef __KERNEL__
 
@@ -109,7 +110,6 @@ struct svc_expkey {
 	struct path		ek_path;
 };
 
-#define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
 #define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-- 
cgit v1.2.3


From 9e1b9b80721661bd63b3662453767b22cd614fe7 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 7 Nov 2009 21:03:54 +0000
Subject: module: make MODULE_SYMBOL_PREFIX into a CONFIG option

The next commit will require the use of MODULE_SYMBOL_PREFIX in
.tmp_exports-asm.S.  Currently it is mixed in with C structure
definitions in "asm/module.h".  Move the definition of this arch option
into Kconfig, so it can be easily accessed by any code.

This also lets modpost.c use the same definition.  Previously modpost
relied on a hardcoded list of architectures in mk_elfconfig.c.

A build test for blackfin, one of the two MODULE_SYMBOL_PREFIX archs,
showed the generated code was unchanged.  vmlinux was identical save
for build ids, and an apparently randomized suffix on a single "__key"
symbol in the kallsyms data).

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Mike Frysinger <vapier@gentoo.org> (blackfin)
CC: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/blackfin/Kconfig              | 4 ++++
 arch/blackfin/include/asm/module.h | 2 --
 arch/blackfin/kernel/vmlinux.lds.S | 2 --
 arch/h8300/Kconfig                 | 4 ++++
 arch/h8300/include/asm/module.h    | 2 --
 arch/h8300/kernel/vmlinux.lds.S    | 1 -
 include/asm-generic/vmlinux.lds.h  | 8 ++++++--
 include/linux/module.h             | 6 ++++--
 scripts/Makefile.lib               | 5 +++++
 scripts/mod/Makefile               | 2 +-
 scripts/mod/mk_elfconfig.c         | 9 ---------
 scripts/mod/modpost.c              | 9 +++++++++
 12 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ae6a60f10120..2180433213b7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -5,6 +5,10 @@
 
 mainmenu "Blackfin Kernel Configuration"
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	def_bool n
 
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index 9c1cfffddd9b..4282b169ead9 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,8 +7,6 @@
 #ifndef _ASM_BFIN_MODULE_H
 #define _ASM_BFIN_MODULE_H
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #define Elf_Shdr        Elf32_Shdr
 #define Elf_Sym         Elf32_Sym
 #define Elf_Ehdr        Elf32_Ehdr
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 10e12539000e..f39707c6590d 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -4,8 +4,6 @@
  * Licensed under the GPL-2 or later
  */
 
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/mem_map.h>
 #include <asm/page.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 9420648352b8..53cc669e6d59 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -10,6 +10,10 @@ config H8300
 	default y
 	select HAVE_IDE
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	bool
 	default n
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
index de23231f3196..8e46724b7c09 100644
--- a/arch/h8300/include/asm/module.h
+++ b/arch/h8300/include/asm/module.h
@@ -8,6 +8,4 @@ struct mod_arch_specific { };
 #define Elf_Sym Elf32_Sym
 #define Elf_Ehdr Elf32_Ehdr
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index b9e24907e6ea..03d356d96e5d 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,3 @@
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b6e818f4b247..67e652068e0e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,8 +52,12 @@
 #define LOAD_OFFSET 0
 #endif
 
-#ifndef VMLINUX_SYMBOL
-#define VMLINUX_SYMBOL(_sym_) _sym_
+#ifndef SYMBOL_PREFIX
+#define VMLINUX_SYMBOL(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
 #endif
 
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 482efc865acf..6cb1a3cab5d3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -25,8 +25,10 @@
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
-/* some toolchains uses a `_' prefix for all user symbols */
-#ifndef MODULE_SYMBOL_PREFIX
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
 #define MODULE_SYMBOL_PREFIX ""
 #endif
 
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ffdafb26f539..224d85e72ef1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \
 		$(CFLAGS_GCOV))
 endif
 
+ifdef CONFIG_SYMBOL_PREFIX
+_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))
+endif
+
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 11d69c35e5b4..ff954f8168c1 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -8,7 +8,7 @@ modpost-objs	:= modpost.o file2alias.o sumversion.o
 $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
 
 quiet_cmd_elfconfig = MKELF   $@
-      cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@
+      cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@
 
 $(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE
 	$(call if_changed,elfconfig)
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 6a96d47bd1e6..639bca7ba559 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -9,9 +9,6 @@ main(int argc, char **argv)
 	unsigned char ei[EI_NIDENT];
 	union { short s; char c[2]; } endian_test;
 
-	if (argc != 2) {
-		fprintf(stderr, "Error: no arch\n");
-	}
 	if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) {
 		fprintf(stderr, "Error: input truncated\n");
 		return 1;
@@ -55,12 +52,6 @@ main(int argc, char **argv)
 	else
 		exit(1);
 
-	if ((strcmp(argv[1], "h8300") == 0)
-	    || (strcmp(argv[1], "blackfin") == 0))
-		printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
-	else
-		printf("#define MODULE_SYMBOL_PREFIX \"\"\n");
-
 	return 0;
 }
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 801a16a17545..fb0f9b711af3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -15,8 +15,17 @@
 #include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
+#include "../../include/linux/autoconf.h"
 #include "../../include/linux/license.h"
 
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define MODULE_SYMBOL_PREFIX ""
+#endif
+
+
 /* Are we using CONFIG_MODVERSIONS? */
 int modversions = 0;
 /* Warn about undefined symbols? (do so if we have vmlinux) */
-- 
cgit v1.2.3


From e36c54582c6f14adc9e10473e2aec2cc4f0acc03 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Dec 2009 15:58:33 -0500
Subject: tracing: Fix return of trace_dump_stack()

The trace_dump_stack() returned a value for a void function.

Also, added the missing stub for trace_dump_stack() when tracing is
not configured.

Reported-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20091214162713.GA31060@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 1 +
 kernel/trace/trace.c   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5ad4199fb073..f1dc752da0d2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -527,6 +527,7 @@ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
+static inline void trace_dump_stack(void) { }
 static inline int
 trace_printk(const char *fmt, ...)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bd7b969a729a..ee61915935d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1158,7 +1158,7 @@ void trace_dump_stack(void)
 	unsigned long flags;
 
 	if (tracing_disabled || tracing_selftest_running)
-		return 0;
+		return;
 
 	local_save_flags(flags);
 
-- 
cgit v1.2.3


From 4e7b8a6cef64a4c1f1194f9926f794c2b75ebdd7 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 14 Dec 2009 17:58:13 -0800
Subject: nodemask: make NODEMASK_ALLOC more general

This is a series of patches to provide control over the location of the
allocation and freeing of persistent huge pages on a NUMA platform.
Please consider for merging into mmotm.

This series uses two mechanisms to constrain the nodes from which
persistent huge pages are allocated: 1) the task NUMA mempolicy of the
task modifying a new sysctl "nr_hugepages_mempolicy", based on a
suggestion by Mel Gorman; and 2) a subset of the hugepages hstate sysfs
attributes have been added [in V4] to each node system device under:

	/sys/devices/node/node[0-9]*/hugepages

The per node attibutes allow direct assignment of a huge page count on a
specific node, regardless of the task's mempolicy or cpuset constraints.

This patch:

NODEMASK_ALLOC(x, m) assumes x is a type of struct, which is unnecessary.
It's perfectly reasonable to use this macro to allocate a nodemask_t,
which is anonymous, either dynamically or on the stack depending on
NODES_SHIFT.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index b359c4a9ec9e..ca9b489a27f8 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -481,14 +481,14 @@ static inline int num_node_state(enum node_states state)
 
 /*
  * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h)
+ * NODEMASK_ALLOC(x, m) allocates an object of type 'x' with the name 'm'.
  */
-
 #if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */
-#define NODEMASK_ALLOC(x, m) struct x *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define NODEMASK_FREE(m) kfree(m)
+#define NODEMASK_ALLOC(x, m)		x *m = kmalloc(sizeof(*m), GFP_KERNEL)
+#define NODEMASK_FREE(m)		kfree(m)
 #else
-#define NODEMASK_ALLOC(x, m) struct x _m, *m = &_m
-#define NODEMASK_FREE(m)
+#define NODEMASK_ALLOC(x, m)		x _m, *m = &_m
+#define NODEMASK_FREE(m)		do {} while (0)
 #endif
 
 /* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
@@ -497,8 +497,9 @@ struct nodemask_scratch {
 	nodemask_t	mask2;
 };
 
-#define NODEMASK_SCRATCH(x) NODEMASK_ALLOC(nodemask_scratch, x)
-#define NODEMASK_SCRATCH_FREE(x)  NODEMASK_FREE(x)
+#define NODEMASK_SCRATCH(x)	\
+		NODEMASK_ALLOC(struct nodemask_scratch, x)
+#define NODEMASK_SCRATCH_FREE(x)	NODEMASK_FREE(x)
 
 
 #endif /* __LINUX_NODEMASK_H */
-- 
cgit v1.2.3


From c1e6c8d074ea3621106548654cc244d2edc12ead Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:17 -0800
Subject: hugetlb: factor init_nodemask_of_node()

Factor init_nodemask_of_node() out of the nodemask_of_node() macro.

This will be used to populate the huge pages "nodes_allowed" nodemask for
a single node when basing nodes_allowed on a preferred/local mempolicy or
when a persistent huge page pool page count is modified via a per node
sysfs attribute.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index ca9b489a27f8..cbd521a03127 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -245,14 +245,19 @@ static inline int __next_node(int n, const nodemask_t *srcp)
 	return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
 }
 
+static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+{
+	nodes_clear(*mask);
+	node_set(node, *mask);
+}
+
 #define nodemask_of_node(node)						\
 ({									\
 	typeof(_unused_nodemask_arg_) m;				\
 	if (sizeof(m) == sizeof(unsigned long)) {			\
-		m.bits[0] = 1UL<<(node);				\
+		m.bits[0] = 1UL << (node);				\
 	} else {							\
-		nodes_clear(m);						\
-		node_set((node), m);					\
+		init_nodemask_of_node(&m, (node));			\
 	}								\
 	m;								\
 })
-- 
cgit v1.2.3


From 06808b0827e1cd14eedc96bac2655d5b37ac246c Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:21 -0800
Subject: hugetlb: derive huge pages nodes allowed from task mempolicy

This patch derives a "nodes_allowed" node mask from the numa mempolicy of
the task modifying the number of persistent huge pages to control the
allocation, freeing and adjusting of surplus huge pages when the pool page
count is modified via the new sysctl or sysfs attribute
"nr_hugepages_mempolicy".  The nodes_allowed mask is derived as follows:

* For "default" [NULL] task mempolicy, a NULL nodemask_t pointer
  is produced.  This will cause the hugetlb subsystem to use
  node_online_map as the "nodes_allowed".  This preserves the
  behavior before this patch.
* For "preferred" mempolicy, including explicit local allocation,
  a nodemask with the single preferred node will be produced.
  "local" policy will NOT track any internode migrations of the
  task adjusting nr_hugepages.
* For "bind" and "interleave" policy, the mempolicy's nodemask
  will be used.
* Other than to inform the construction of the nodes_allowed node
  mask, the actual mempolicy mode is ignored.  That is, all modes
  behave like interleave over the resulting nodes_allowed mask
  with no "fallback".

See the updated documentation [next patch] for more information
about the implications of this patch.

Examples:

Starting with:

	Node 0 HugePages_Total:     0
	Node 1 HugePages_Total:     0
	Node 2 HugePages_Total:     0
	Node 3 HugePages_Total:     0

Default behavior [with or without this patch] balances persistent
hugepage allocation across nodes [with sufficient contiguous memory]:

	sysctl vm.nr_hugepages[_mempolicy]=32

yields:

	Node 0 HugePages_Total:     8
	Node 1 HugePages_Total:     8
	Node 2 HugePages_Total:     8
	Node 3 HugePages_Total:     8

Of course, we only have nr_hugepages_mempolicy with the patch,
but with default mempolicy, nr_hugepages_mempolicy behaves the
same as nr_hugepages.

Applying mempolicy--e.g., with numactl [using '-m' a.k.a.
'--membind' because it allows multiple nodes to be specified
and it's easy to type]--we can allocate huge pages on
individual nodes or sets of nodes.  So, starting from the
condition above, with 8 huge pages per node, add 8 more to
node 2 using:

	numactl -m 2 sysctl vm.nr_hugepages_mempolicy=40

This yields:

	Node 0 HugePages_Total:     8
	Node 1 HugePages_Total:     8
	Node 2 HugePages_Total:    16
	Node 3 HugePages_Total:     8

The incremental 8 huge pages were restricted to node 2 by the
specified mempolicy.

Similarly, we can use mempolicy to free persistent huge pages
from specified nodes:

	numactl -m 0,1 sysctl vm.nr_hugepages_mempolicy=32

yields:

	Node 0 HugePages_Total:     4
	Node 1 HugePages_Total:     4
	Node 2 HugePages_Total:    16
	Node 3 HugePages_Total:     8

The 8 huge pages freed were balanced over nodes 0 and 1.

[rientjes@google.com: accomodate reworked NODEMASK_ALLOC]
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h   |  6 +++
 include/linux/mempolicy.h |  3 ++
 kernel/sysctl.c           | 15 +++++++-
 mm/hugetlb.c              | 97 ++++++++++++++++++++++++++++++++++++++++-------
 mm/mempolicy.c            | 47 +++++++++++++++++++++++
 5 files changed, 153 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 41a59afc70fa..78b4bc64c006 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,6 +23,12 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+
+#ifdef CONFIG_NUMA
+int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
+					void __user *, size_t *, loff_t *);
+#endif
+
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			struct page **, struct vm_area_struct **,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 085c903fe0f1..1cc966cd3e5f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -201,6 +201,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 				unsigned long addr, gfp_t gfp_flags,
 				struct mempolicy **mpol, nodemask_t **nodemask);
+extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
 extern unsigned slab_node(struct mempolicy *policy);
 
 extern enum zone_type policy_zone;
@@ -328,6 +329,8 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 	return node_zonelist(0, gfp_flags);
 }
 
+static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; }
+
 static inline int do_migrate_pages(struct mm_struct *mm,
 			const nodemask_t *from_nodes,
 			const nodemask_t *to_nodes, int flags)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 554ac4894f0f..60fc93131095 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1051,7 +1051,7 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one_hundred,
 	},
 #ifdef CONFIG_HUGETLB_PAGE
-	 {
+	{
 		.procname	= "nr_hugepages",
 		.data		= NULL,
 		.maxlen		= sizeof(unsigned long),
@@ -1059,7 +1059,18 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= hugetlb_sysctl_handler,
 		.extra1		= (void *)&hugetlb_zero,
 		.extra2		= (void *)&hugetlb_infinity,
-	 },
+	},
+#ifdef CONFIG_NUMA
+	{
+		.procname       = "nr_hugepages_mempolicy",
+		.data           = NULL,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
+		.extra1		= (void *)&hugetlb_zero,
+		.extra2		= (void *)&hugetlb_infinity,
+	},
+#endif
 	 {
 		.procname	= "hugetlb_shm_group",
 		.data		= &sysctl_hugetlb_shm_group,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 324d1abae876..1125d818ea06 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1330,29 +1330,71 @@ static struct hstate *kobj_to_hstate(struct kobject *kobj)
 	return NULL;
 }
 
-static ssize_t nr_hugepages_show(struct kobject *kobj,
+static ssize_t nr_hugepages_show_common(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
 	struct hstate *h = kobj_to_hstate(kobj);
 	return sprintf(buf, "%lu\n", h->nr_huge_pages);
 }
-static ssize_t nr_hugepages_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+			struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t len)
 {
 	int err;
-	unsigned long input;
+	unsigned long count;
 	struct hstate *h = kobj_to_hstate(kobj);
+	NODEMASK_ALLOC(nodemask_t, nodes_allowed);
 
-	err = strict_strtoul(buf, 10, &input);
+	err = strict_strtoul(buf, 10, &count);
 	if (err)
 		return 0;
 
-	h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map);
+	if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
+		NODEMASK_FREE(nodes_allowed);
+		nodes_allowed = &node_online_map;
+	}
+	h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
 
-	return count;
+	if (nodes_allowed != &node_online_map)
+		NODEMASK_FREE(nodes_allowed);
+
+	return len;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+	       struct kobj_attribute *attr, const char *buf, size_t len)
+{
+	return nr_hugepages_store_common(false, kobj, attr, buf, len);
 }
 HSTATE_ATTR(nr_hugepages);
 
+#ifdef CONFIG_NUMA
+
+/*
+ * hstate attribute for optionally mempolicy-based constraint on persistent
+ * huge page alloc/free.
+ */
+static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
+	       struct kobj_attribute *attr, const char *buf, size_t len)
+{
+	return nr_hugepages_store_common(true, kobj, attr, buf, len);
+}
+HSTATE_ATTR(nr_hugepages_mempolicy);
+#endif
+
+
 static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
@@ -1408,6 +1450,9 @@ static struct attribute *hstate_attrs[] = {
 	&free_hugepages_attr.attr,
 	&resv_hugepages_attr.attr,
 	&surplus_hugepages_attr.attr,
+#ifdef CONFIG_NUMA
+	&nr_hugepages_mempolicy_attr.attr,
+#endif
 	NULL,
 };
 
@@ -1574,9 +1619,9 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 }
 
 #ifdef CONFIG_SYSCTL
-int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-			   void __user *buffer,
-			   size_t *length, loff_t *ppos)
+static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
+			 struct ctl_table *table, int write,
+			 void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
 	unsigned long tmp;
@@ -1588,13 +1633,39 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
-	if (write)
-		h->max_huge_pages = set_max_huge_pages(h, tmp,
-							&node_online_map);
+	if (write) {
+		NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+		if (!(obey_mempolicy &&
+			       init_nodemask_of_mempolicy(nodes_allowed))) {
+			NODEMASK_FREE(nodes_allowed);
+			nodes_allowed = &node_states[N_HIGH_MEMORY];
+		}
+		h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
+
+		if (nodes_allowed != &node_states[N_HIGH_MEMORY])
+			NODEMASK_FREE(nodes_allowed);
+	}
 
 	return 0;
 }
 
+int hugetlb_sysctl_handler(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *length, loff_t *ppos)
+{
+
+	return hugetlb_sysctl_handler_common(false, table, write,
+							buffer, length, ppos);
+}
+
+#ifdef CONFIG_NUMA
+int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *length, loff_t *ppos)
+{
+	return hugetlb_sysctl_handler_common(true, table, write,
+							buffer, length, ppos);
+}
+#endif /* CONFIG_NUMA */
+
 int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
 			void __user *buffer,
 			size_t *length, loff_t *ppos)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0f89eabbaf3e..f11fdad06204 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1568,6 +1568,53 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
 	}
 	return zl;
 }
+
+/*
+ * init_nodemask_of_mempolicy
+ *
+ * If the current task's mempolicy is "default" [NULL], return 'false'
+ * to indicate default policy.  Otherwise, extract the policy nodemask
+ * for 'bind' or 'interleave' policy into the argument nodemask, or
+ * initialize the argument nodemask to contain the single node for
+ * 'preferred' or 'local' policy and return 'true' to indicate presence
+ * of non-default mempolicy.
+ *
+ * We don't bother with reference counting the mempolicy [mpol_get/put]
+ * because the current task is examining it's own mempolicy and a task's
+ * mempolicy is only ever changed by the task itself.
+ *
+ * N.B., it is the caller's responsibility to free a returned nodemask.
+ */
+bool init_nodemask_of_mempolicy(nodemask_t *mask)
+{
+	struct mempolicy *mempolicy;
+	int nid;
+
+	if (!(mask && current->mempolicy))
+		return false;
+
+	mempolicy = current->mempolicy;
+	switch (mempolicy->mode) {
+	case MPOL_PREFERRED:
+		if (mempolicy->flags & MPOL_F_LOCAL)
+			nid = numa_node_id();
+		else
+			nid = mempolicy->v.preferred_node;
+		init_nodemask_of_node(mask, nid);
+		break;
+
+	case MPOL_BIND:
+		/* Fall through */
+	case MPOL_INTERLEAVE:
+		*mask =  mempolicy->v.nodes;
+		break;
+
+	default:
+		BUG();
+	}
+
+	return true;
+}
 #endif
 
 /* Allocate a page in interleaved policy.
-- 
cgit v1.2.3


From 4e25b2576efda24c02e2d6b9bcb5965a3f865f33 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:23 -0800
Subject: hugetlb: add generic definition of NUMA_NO_NODE

Move definition of NUMA_NO_NODE from ia64 and x86_64 arch specific headers
to generic header 'linux/numa.h' for use in generic code.  NUMA_NO_NODE
replaces bare '-1' where it's used in this series to indicate "no node id
specified".  Ultimately, it can be used to replace the -1 elsewhere where
it is used similarly.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/numa.h    | 2 --
 arch/x86/include/asm/topology.h | 9 +++++++--
 include/linux/numa.h            | 2 ++
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
index 3499ff57bf42..6a8a27cfae3e 100644
--- a/arch/ia64/include/asm/numa.h
+++ b/arch/ia64/include/asm/numa.h
@@ -22,8 +22,6 @@
 
 #include <asm/mmzone.h>
 
-#define NUMA_NO_NODE	-1
-
 extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
 extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
 extern pg_data_t *pgdat_list[MAX_NUMNODES];
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 40e37b10c6c0..c5087d796587 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -35,11 +35,16 @@
 # endif
 #endif
 
-/* Node not present */
-#define NUMA_NO_NODE	(-1)
+/*
+ * to preserve the visibility of NUMA_NO_NODE definition,
+ * moved to there from here.  May be used independent of
+ * CONFIG_NUMA.
+ */
+#include <linux/numa.h>
 
 #ifdef CONFIG_NUMA
 #include <linux/cpumask.h>
+
 #include <asm/mpspec.h>
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/numa.h b/include/linux/numa.h
index a31a7301b159..3aaa31603a86 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -10,4 +10,6 @@
 
 #define MAX_NUMNODES    (1 << NODES_SHIFT)
 
+#define	NUMA_NO_NODE	(-1)
+
 #endif /* _LINUX_NUMA_H */
-- 
cgit v1.2.3


From 9a30523066cde73c1442b76224bb540de9f9b0b0 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:25 -0800
Subject: hugetlb: add per node hstate attributes

Add the per huge page size control/query attributes to the per node
sysdevs:

/sys/devices/system/node/node<ID>/hugepages/hugepages-<size>/
	nr_hugepages       - r/w
	free_huge_pages    - r/o
	surplus_huge_pages - r/o

The patch attempts to re-use/share as much of the existing global hstate
attribute initialization and handling, and the "nodes_allowed" constraint
processing as possible.

Calling set_max_huge_pages() with no node indicates a change to global
hstate parameters.  In this case, any non-default task mempolicy will be
used to generate the nodes_allowed mask.  A valid node id indicates an
update to that node's hstate parameters, and the count argument specifies
the target count for the specified node.  From this info, we compute the
target global count for the hstate and construct a nodes_allowed node mask
contain only the specified node.

Setting the node specific nr_hugepages via the per node attribute
effectively ignores any task mempolicy or cpuset constraints.

With this patch:

(me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB
./  ../  free_hugepages  nr_hugepages  surplus_hugepages

Starting from:
Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:     0
Node 2 HugePages_Free:      0
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0
vm.nr_hugepages = 0

Allocate 16 persistent huge pages on node 2:
(me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages

[Note that this is equivalent to:
	numactl -m 2 hugeadmin --pool-pages-min 2M:+16
]

Yields:
Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:    16
Node 2 HugePages_Free:     16
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0
vm.nr_hugepages = 16

Global controls work as expected--reduce pool to 8 persistent huge pages:
(me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages

Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:     8
Node 2 HugePages_Free:      8
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  |  39 ++++++++
 include/linux/node.h |  11 +++
 mm/hugetlb.c         | 274 ++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 298 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 1fe5536d404f..f502711d28db 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev,
 }
 static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
 
+#ifdef CONFIG_HUGETLBFS
+/*
+ * hugetlbfs per node attributes registration interface:
+ * When/if hugetlb[fs] subsystem initializes [sometime after this module],
+ * it will register its per node attributes for all nodes online at that
+ * time.  It will also call register_hugetlbfs_with_node(), below, to
+ * register its attribute registration functions with this node driver.
+ * Once these hooks have been initialized, the node driver will call into
+ * the hugetlb module to [un]register attributes for hot-plugged nodes.
+ */
+static node_registration_func_t __hugetlb_register_node;
+static node_registration_func_t __hugetlb_unregister_node;
+
+static inline void hugetlb_register_node(struct node *node)
+{
+	if (__hugetlb_register_node)
+		__hugetlb_register_node(node);
+}
+
+static inline void hugetlb_unregister_node(struct node *node)
+{
+	if (__hugetlb_unregister_node)
+		__hugetlb_unregister_node(node);
+}
+
+void register_hugetlbfs_with_node(node_registration_func_t doregister,
+				  node_registration_func_t unregister)
+{
+	__hugetlb_register_node   = doregister;
+	__hugetlb_unregister_node = unregister;
+}
+#else
+static inline void hugetlb_register_node(struct node *node) {}
+
+static inline void hugetlb_unregister_node(struct node *node) {}
+#endif
+
 
 /*
  * register_node - Setup a sysfs device for a node.
@@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent)
 		sysdev_create_file(&node->sysdev, &attr_distance);
 
 		scan_unevictable_register_node(node);
+		hugetlb_register_node(node);
 	}
 	return error;
 }
@@ -216,6 +254,7 @@ void unregister_node(struct node *node)
 	sysdev_remove_file(&node->sysdev, &attr_distance);
 
 	scan_unevictable_unregister_node(node);
+	hugetlb_unregister_node(node);
 
 	sysdev_unregister(&node->sysdev);
 }
diff --git a/include/linux/node.h b/include/linux/node.h
index 681a697b9a86..dae1521e1f05 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -28,6 +28,7 @@ struct node {
 
 struct memory_block;
 extern struct node node_devices[];
+typedef  void (*node_registration_func_t)(struct node *);
 
 extern int register_node(struct node *, int, struct node *);
 extern void unregister_node(struct node *node);
@@ -39,6 +40,11 @@ extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
 						int nid);
 extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
+
+#ifdef CONFIG_HUGETLBFS
+extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
+					 node_registration_func_t unregister);
+#endif
 #else
 static inline int register_one_node(int nid)
 {
@@ -65,6 +71,11 @@ static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
 {
 	return 0;
 }
+
+static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
+						node_registration_func_t unreg)
+{
+}
 #endif
 
 #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1125d818ea06..544f7bcb615e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,6 +24,7 @@
 #include <asm/io.h>
 
 #include <linux/hugetlb.h>
+#include <linux/node.h>
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
@@ -1320,39 +1321,71 @@ out:
 static struct kobject *hugepages_kobj;
 static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
 
-static struct hstate *kobj_to_hstate(struct kobject *kobj)
+static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
+
+static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
 {
 	int i;
+
 	for (i = 0; i < HUGE_MAX_HSTATE; i++)
-		if (hstate_kobjs[i] == kobj)
+		if (hstate_kobjs[i] == kobj) {
+			if (nidp)
+				*nidp = NUMA_NO_NODE;
 			return &hstates[i];
-	BUG();
-	return NULL;
+		}
+
+	return kobj_to_node_hstate(kobj, nidp);
 }
 
 static ssize_t nr_hugepages_show_common(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
-	struct hstate *h = kobj_to_hstate(kobj);
-	return sprintf(buf, "%lu\n", h->nr_huge_pages);
+	struct hstate *h;
+	unsigned long nr_huge_pages;
+	int nid;
+
+	h = kobj_to_hstate(kobj, &nid);
+	if (nid == NUMA_NO_NODE)
+		nr_huge_pages = h->nr_huge_pages;
+	else
+		nr_huge_pages = h->nr_huge_pages_node[nid];
+
+	return sprintf(buf, "%lu\n", nr_huge_pages);
 }
 static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
 			struct kobject *kobj, struct kobj_attribute *attr,
 			const char *buf, size_t len)
 {
 	int err;
+	int nid;
 	unsigned long count;
-	struct hstate *h = kobj_to_hstate(kobj);
+	struct hstate *h;
 	NODEMASK_ALLOC(nodemask_t, nodes_allowed);
 
 	err = strict_strtoul(buf, 10, &count);
 	if (err)
 		return 0;
 
-	if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
-		NODEMASK_FREE(nodes_allowed);
-		nodes_allowed = &node_online_map;
-	}
+	h = kobj_to_hstate(kobj, &nid);
+	if (nid == NUMA_NO_NODE) {
+		/*
+		 * global hstate attribute
+		 */
+		if (!(obey_mempolicy &&
+				init_nodemask_of_mempolicy(nodes_allowed))) {
+			NODEMASK_FREE(nodes_allowed);
+			nodes_allowed = &node_states[N_HIGH_MEMORY];
+		}
+	} else if (nodes_allowed) {
+		/*
+		 * per node hstate attribute: adjust count to global,
+		 * but restrict alloc/free to the specified node.
+		 */
+		count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
+		init_nodemask_of_node(nodes_allowed, nid);
+	} else
+		nodes_allowed = &node_states[N_HIGH_MEMORY];
+
 	h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
 
 	if (nodes_allowed != &node_online_map)
@@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy);
 static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
-	struct hstate *h = kobj_to_hstate(kobj);
+	struct hstate *h = kobj_to_hstate(kobj, NULL);
 	return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
 }
 static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
@@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
 {
 	int err;
 	unsigned long input;
-	struct hstate *h = kobj_to_hstate(kobj);
+	struct hstate *h = kobj_to_hstate(kobj, NULL);
 
 	err = strict_strtoul(buf, 10, &input);
 	if (err)
@@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
 static ssize_t free_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
-	struct hstate *h = kobj_to_hstate(kobj);
-	return sprintf(buf, "%lu\n", h->free_huge_pages);
+	struct hstate *h;
+	unsigned long free_huge_pages;
+	int nid;
+
+	h = kobj_to_hstate(kobj, &nid);
+	if (nid == NUMA_NO_NODE)
+		free_huge_pages = h->free_huge_pages;
+	else
+		free_huge_pages = h->free_huge_pages_node[nid];
+
+	return sprintf(buf, "%lu\n", free_huge_pages);
 }
 HSTATE_ATTR_RO(free_hugepages);
 
 static ssize_t resv_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
-	struct hstate *h = kobj_to_hstate(kobj);
+	struct hstate *h = kobj_to_hstate(kobj, NULL);
 	return sprintf(buf, "%lu\n", h->resv_huge_pages);
 }
 HSTATE_ATTR_RO(resv_hugepages);
@@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages);
 static ssize_t surplus_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
-	struct hstate *h = kobj_to_hstate(kobj);
-	return sprintf(buf, "%lu\n", h->surplus_huge_pages);
+	struct hstate *h;
+	unsigned long surplus_huge_pages;
+	int nid;
+
+	h = kobj_to_hstate(kobj, &nid);
+	if (nid == NUMA_NO_NODE)
+		surplus_huge_pages = h->surplus_huge_pages;
+	else
+		surplus_huge_pages = h->surplus_huge_pages_node[nid];
+
+	return sprintf(buf, "%lu\n", surplus_huge_pages);
 }
 HSTATE_ATTR_RO(surplus_hugepages);
 
@@ -1460,19 +1511,21 @@ static struct attribute_group hstate_attr_group = {
 	.attrs = hstate_attrs,
 };
 
-static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
+static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
+				struct kobject *parent,
+				struct kobject **hstate_kobjs,
+				struct attribute_group *hstate_attr_group)
 {
 	int retval;
+	int hi = h - hstates;
 
-	hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
-							hugepages_kobj);
-	if (!hstate_kobjs[h - hstates])
+	hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
+	if (!hstate_kobjs[hi])
 		return -ENOMEM;
 
-	retval = sysfs_create_group(hstate_kobjs[h - hstates],
-							&hstate_attr_group);
+	retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
 	if (retval)
-		kobject_put(hstate_kobjs[h - hstates]);
+		kobject_put(hstate_kobjs[hi]);
 
 	return retval;
 }
@@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(void)
 		return;
 
 	for_each_hstate(h) {
-		err = hugetlb_sysfs_add_hstate(h);
+		err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
+					 hstate_kobjs, &hstate_attr_group);
 		if (err)
 			printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
 								h->name);
 	}
 }
 
+#ifdef CONFIG_NUMA
+
+/*
+ * node_hstate/s - associate per node hstate attributes, via their kobjects,
+ * with node sysdevs in node_devices[] using a parallel array.  The array
+ * index of a node sysdev or _hstate == node id.
+ * This is here to avoid any static dependency of the node sysdev driver, in
+ * the base kernel, on the hugetlb module.
+ */
+struct node_hstate {
+	struct kobject		*hugepages_kobj;
+	struct kobject		*hstate_kobjs[HUGE_MAX_HSTATE];
+};
+struct node_hstate node_hstates[MAX_NUMNODES];
+
+/*
+ * A subset of global hstate attributes for node sysdevs
+ */
+static struct attribute *per_node_hstate_attrs[] = {
+	&nr_hugepages_attr.attr,
+	&free_hugepages_attr.attr,
+	&surplus_hugepages_attr.attr,
+	NULL,
+};
+
+static struct attribute_group per_node_hstate_attr_group = {
+	.attrs = per_node_hstate_attrs,
+};
+
+/*
+ * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
+ * Returns node id via non-NULL nidp.
+ */
+static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
+{
+	int nid;
+
+	for (nid = 0; nid < nr_node_ids; nid++) {
+		struct node_hstate *nhs = &node_hstates[nid];
+		int i;
+		for (i = 0; i < HUGE_MAX_HSTATE; i++)
+			if (nhs->hstate_kobjs[i] == kobj) {
+				if (nidp)
+					*nidp = nid;
+				return &hstates[i];
+			}
+	}
+
+	BUG();
+	return NULL;
+}
+
+/*
+ * Unregister hstate attributes from a single node sysdev.
+ * No-op if no hstate attributes attached.
+ */
+void hugetlb_unregister_node(struct node *node)
+{
+	struct hstate *h;
+	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+
+	if (!nhs->hugepages_kobj)
+		return;
+
+	for_each_hstate(h)
+		if (nhs->hstate_kobjs[h - hstates]) {
+			kobject_put(nhs->hstate_kobjs[h - hstates]);
+			nhs->hstate_kobjs[h - hstates] = NULL;
+		}
+
+	kobject_put(nhs->hugepages_kobj);
+	nhs->hugepages_kobj = NULL;
+}
+
+/*
+ * hugetlb module exit:  unregister hstate attributes from node sysdevs
+ * that have them.
+ */
+static void hugetlb_unregister_all_nodes(void)
+{
+	int nid;
+
+	/*
+	 * disable node sysdev registrations.
+	 */
+	register_hugetlbfs_with_node(NULL, NULL);
+
+	/*
+	 * remove hstate attributes from any nodes that have them.
+	 */
+	for (nid = 0; nid < nr_node_ids; nid++)
+		hugetlb_unregister_node(&node_devices[nid]);
+}
+
+/*
+ * Register hstate attributes for a single node sysdev.
+ * No-op if attributes already registered.
+ */
+void hugetlb_register_node(struct node *node)
+{
+	struct hstate *h;
+	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+	int err;
+
+	if (nhs->hugepages_kobj)
+		return;		/* already allocated */
+
+	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
+							&node->sysdev.kobj);
+	if (!nhs->hugepages_kobj)
+		return;
+
+	for_each_hstate(h) {
+		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
+						nhs->hstate_kobjs,
+						&per_node_hstate_attr_group);
+		if (err) {
+			printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
+					" for node %d\n",
+						h->name, node->sysdev.id);
+			hugetlb_unregister_node(node);
+			break;
+		}
+	}
+}
+
+/*
+ * hugetlb init time:  register hstate attributes for all registered
+ * node sysdevs.  All on-line nodes should have registered their
+ * associated sysdev by the time the hugetlb module initializes.
+ */
+static void hugetlb_register_all_nodes(void)
+{
+	int nid;
+
+	for (nid = 0; nid < nr_node_ids; nid++) {
+		struct node *node = &node_devices[nid];
+		if (node->sysdev.id == nid)
+			hugetlb_register_node(node);
+	}
+
+	/*
+	 * Let the node sysdev driver know we're here so it can
+	 * [un]register hstate attributes on node hotplug.
+	 */
+	register_hugetlbfs_with_node(hugetlb_register_node,
+				     hugetlb_unregister_node);
+}
+#else	/* !CONFIG_NUMA */
+
+static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
+{
+	BUG();
+	if (nidp)
+		*nidp = -1;
+	return NULL;
+}
+
+static void hugetlb_unregister_all_nodes(void) { }
+
+static void hugetlb_register_all_nodes(void) { }
+
+#endif
+
 static void __exit hugetlb_exit(void)
 {
 	struct hstate *h;
 
+	hugetlb_unregister_all_nodes();
+
 	for_each_hstate(h) {
 		kobject_put(hstate_kobjs[h - hstates]);
 	}
@@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void)
 
 	hugetlb_sysfs_init();
 
+	hugetlb_register_all_nodes();
+
 	return 0;
 }
 module_init(hugetlb_init);
-- 
cgit v1.2.3


From 8fe23e057172223fe2048768a4d87ab7de7477bc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 14 Dec 2009 17:58:33 -0800
Subject: mm: clear node in N_HIGH_MEMORY and stop kswapd when all memory is
 offlined

When memory is hot-removed, its node must be cleared in N_HIGH_MEMORY if
there are no present pages left.

In such a situation, kswapd must also be stopped since it has nothing left
to do.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 +
 mm/memory_hotplug.c  |  4 ++++
 mm/vmscan.c          | 28 ++++++++++++++++++++++------
 3 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4ec90019c1a4..abce8a0b2507 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -273,6 +273,7 @@ extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
 
 extern int kswapd_run(int nid);
+extern void kswapd_stop(int nid);
 
 #ifdef CONFIG_MMU
 /* linux/mm/shmem.c */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e8116f8bdffa..bc5a08138f1e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -853,6 +853,10 @@ repeat:
 
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
+	if (!node_present_pages(node)) {
+		node_clear_state(node, N_HIGH_MEMORY);
+		kswapd_stop(node);
+	}
 
 	vm_total_pages = nr_free_pagecache_pages();
 	writeback_set_ratelimit();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 777af57fd8c8..d0a631a428a0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2173,6 +2173,7 @@ static int kswapd(void *p)
 	order = 0;
 	for ( ; ; ) {
 		unsigned long new_order;
+		int ret;
 
 		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
 		new_order = pgdat->kswapd_max_order;
@@ -2184,19 +2185,23 @@ static int kswapd(void *p)
 			 */
 			order = new_order;
 		} else {
-			if (!freezing(current))
+			if (!freezing(current) && !kthread_should_stop())
 				schedule();
 
 			order = pgdat->kswapd_max_order;
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		if (!try_to_freeze()) {
-			/* We can speed up thawing tasks if we don't call
-			 * balance_pgdat after returning from the refrigerator
-			 */
+		ret = try_to_freeze();
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * We can speed up thawing tasks if we don't call balance_pgdat
+		 * after returning from the refrigerator
+		 */
+		if (!ret)
 			balance_pgdat(pgdat, order);
-		}
 	}
 	return 0;
 }
@@ -2451,6 +2456,17 @@ int kswapd_run(int nid)
 	return ret;
 }
 
+/*
+ * Called by memory hotplug when all memory in a node is offlined.
+ */
+void kswapd_stop(int nid)
+{
+	struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
+
+	if (kswapd)
+		kthread_stop(kswapd);
+}
+
 static int __init kswapd_init(void)
 {
 	int nid;
-- 
cgit v1.2.3


From 39da08cb074cf19cb249832a2a955dfb28837e65 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:36 -0800
Subject: hugetlb: offload per node attribute registrations

Offload the registration and unregistration of per node hstate sysfs
attributes to a worker thread rather than attempt the
allocation/attachment or detachment/freeing of the attributes in the
context of the memory hotplug handler.

I don't know that this is absolutely required, but the registration can
sleep in allocations and other mem hot plug handlers do it this way.  If
it turns out this is NOT required, we can drop this patch.

N.B.,  Only tested build, boot, libhugetlbfs regression.
       i.e., no memory hotplug testing.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 57 +++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/node.h |  5 +++++
 2 files changed, 52 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 9e218a6d4a5b..54e5d8eaf70e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -186,11 +186,14 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
 static node_registration_func_t __hugetlb_register_node;
 static node_registration_func_t __hugetlb_unregister_node;
 
-static inline void hugetlb_register_node(struct node *node)
+static inline bool hugetlb_register_node(struct node *node)
 {
 	if (__hugetlb_register_node &&
-			node_state(node->sysdev.id, N_HIGH_MEMORY))
+			node_state(node->sysdev.id, N_HIGH_MEMORY)) {
 		__hugetlb_register_node(node);
+		return true;
+	}
+	return false;
 }
 
 static inline void hugetlb_unregister_node(struct node *node)
@@ -387,10 +390,31 @@ static int link_mem_sections(int nid)
 	return err;
 }
 
+#ifdef CONFIG_HUGETLBFS
 /*
  * Handle per node hstate attribute [un]registration on transistions
  * to/from memoryless state.
  */
+static void node_hugetlb_work(struct work_struct *work)
+{
+	struct node *node = container_of(work, struct node, node_work);
+
+	/*
+	 * We only get here when a node transitions to/from memoryless state.
+	 * We can detect which transition occurred by examining whether the
+	 * node has memory now.  hugetlb_register_node() already check this
+	 * so we try to register the attributes.  If that fails, then the
+	 * node has transitioned to memoryless, try to unregister the
+	 * attributes.
+	 */
+	if (!hugetlb_register_node(node))
+		hugetlb_unregister_node(node);
+}
+
+static void init_node_hugetlb_work(int nid)
+{
+	INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work);
+}
 
 static int node_memory_callback(struct notifier_block *self,
 				unsigned long action, void *arg)
@@ -399,14 +423,16 @@ static int node_memory_callback(struct notifier_block *self,
 	int nid = mnb->status_change_nid;
 
 	switch (action) {
-	case MEM_ONLINE:    /* memory successfully brought online */
+	case MEM_ONLINE:
+	case MEM_OFFLINE:
+		/*
+		 * offload per node hstate [un]registration to a work thread
+		 * when transitioning to/from memoryless state.
+		 */
 		if (nid != NUMA_NO_NODE)
-			hugetlb_register_node(&node_devices[nid]);
-		break;
-	case MEM_OFFLINE:   /* or offline */
-		if (nid != NUMA_NO_NODE)
-			hugetlb_unregister_node(&node_devices[nid]);
+			schedule_work(&node_devices[nid].node_work);
 		break;
+
 	case MEM_GOING_ONLINE:
 	case MEM_GOING_OFFLINE:
 	case MEM_CANCEL_ONLINE:
@@ -417,15 +443,23 @@ static int node_memory_callback(struct notifier_block *self,
 
 	return NOTIFY_OK;
 }
-#else
+#endif	/* CONFIG_HUGETLBFS */
+#else	/* !CONFIG_MEMORY_HOTPLUG_SPARSE */
+
 static int link_mem_sections(int nid) { return 0; }
+#endif	/* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
+#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
+    !defined(CONFIG_HUGETLBFS)
 static inline int node_memory_callback(struct notifier_block *self,
 				unsigned long action, void *arg)
 {
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
+static void init_node_hugetlb_work(int nid) { }
+
+#endif
 
 int register_one_node(int nid)
 {
@@ -449,6 +483,9 @@ int register_one_node(int nid)
 
 		/* link memory sections under this node */
 		error = link_mem_sections(nid);
+
+		/* initialize work queue for memory hot plug */
+		init_node_hugetlb_work(nid);
 	}
 
 	return error;
diff --git a/include/linux/node.h b/include/linux/node.h
index dae1521e1f05..06292dac3eab 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -21,9 +21,14 @@
 
 #include <linux/sysdev.h>
 #include <linux/cpumask.h>
+#include <linux/workqueue.h>
 
 struct node {
 	struct sys_device	sysdev;
+
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
+	struct work_struct	node_work;
+#endif
 };
 
 struct memory_block;
-- 
cgit v1.2.3


From bad44b5be84cf3bb1ff900bec02ee61e1993328c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 14 Dec 2009 17:58:38 -0800
Subject: mm: add gfp flags for NODEMASK_ALLOC slab allocations

Objects passed to NODEMASK_ALLOC() are relatively small in size and are
backed by slab caches that are not of large order, traditionally never
greater than PAGE_ALLOC_COSTLY_ORDER.

Thus, using GFP_KERNEL for these allocations on large machines when
CONFIG_NODES_SHIFT > 8 will cause the page allocator to loop endlessly in
the allocation attempt, each time invoking both direct reclaim or the oom
killer.

This is of particular interest when using NODEMASK_ALLOC() from a
mempolicy context (either directly in mm/mempolicy.c or the mempolicy
constrained hugetlb allocations) since the oom killer always kills current
when allocations are constrained by mempolicies.  So for all present use
cases in the kernel, current would end up being oom killed when direct
reclaim fails.  That would allow the NODEMASK_ALLOC() to succeed but
current would have sacrificed itself upon returning.

This patch adds gfp flags to NODEMASK_ALLOC() to pass to kmalloc() on
CONFIG_NODES_SHIFT > 8; this parameter is a nop on other configurations.
All current use cases either directly from hugetlb code or indirectly via
NODEMASK_SCRATCH() union __GFP_NORETRY to avoid direct reclaim and the oom
killer when the slab allocator needs to allocate additional pages.

The side-effect of this change is that all current use cases of either
NODEMASK_ALLOC() or NODEMASK_SCRATCH() need appropriate -ENOMEM handling
when the allocation fails (never for CONFIG_NODES_SHIFT <= 8).  All
current use cases were audited and do have appropriate error handling at
this time.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 21 ++++++++++++---------
 mm/hugetlb.c             |  5 +++--
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index cbd521a03127..454997cccbd8 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -485,15 +485,17 @@ static inline int num_node_state(enum node_states state)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
 /*
- * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h)
- * NODEMASK_ALLOC(x, m) allocates an object of type 'x' with the name 'm'.
+ * For nodemask scrach area.
+ * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
+ * name.
  */
-#if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */
-#define NODEMASK_ALLOC(x, m)		x *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define NODEMASK_FREE(m)		kfree(m)
+#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#define NODEMASK_ALLOC(type, name, gfp_flags)	\
+			type *name = kmalloc(sizeof(*name), gfp_flags)
+#define NODEMASK_FREE(m)			kfree(m)
 #else
-#define NODEMASK_ALLOC(x, m)		x _m, *m = &_m
-#define NODEMASK_FREE(m)		do {} while (0)
+#define NODEMASK_ALLOC(type, name, gfp_flags)	type _name, *name = &_name
+#define NODEMASK_FREE(m)			do {} while (0)
 #endif
 
 /* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
@@ -502,8 +504,9 @@ struct nodemask_scratch {
 	nodemask_t	mask2;
 };
 
-#define NODEMASK_SCRATCH(x)	\
-		NODEMASK_ALLOC(struct nodemask_scratch, x)
+#define NODEMASK_SCRATCH(x)						\
+			NODEMASK_ALLOC(struct nodemask_scratch, x,	\
+					GFP_KERNEL | __GFP_NORETRY)
 #define NODEMASK_SCRATCH_FREE(x)	NODEMASK_FREE(x)
 
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b4a263512cb7..450493d25572 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1361,7 +1361,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
 	int nid;
 	unsigned long count;
 	struct hstate *h;
-	NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
 	err = strict_strtoul(buf, 10, &count);
 	if (err)
@@ -1857,7 +1857,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write) {
-		NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+		NODEMASK_ALLOC(nodemask_t, nodes_allowed,
+						GFP_KERNEL | __GFP_NORETRY);
 		if (!(obey_mempolicy &&
 			       init_nodemask_of_mempolicy(nodes_allowed))) {
 			NODEMASK_FREE(nodes_allowed);
-- 
cgit v1.2.3


From f29ad6a99b596b8169744d107bf088e8be9e8d0d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:40 -0800
Subject: swap_info: private to swapfile.c

The swap_info_struct is mostly private to mm/swapfile.c, with only
one other in-tree user: get_swap_bio().  Adjust its interface to
map_swap_page(), so that we can then remove get_swap_info_struct().

But there is a popular user out-of-tree, TuxOnIce: so leave the
declaration of swap_info_struct in linux/swap.h.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Nigel Cunningham <ncunningham@crca.org.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  3 +--
 mm/page_io.c         | 19 +++++++------------
 mm/swapfile.c        | 29 +++++++++++++++++------------
 3 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index abce8a0b2507..82aa7e121c05 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -318,9 +318,8 @@ extern void swapcache_free(swp_entry_t, struct page *page);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
-extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
+extern sector_t map_swap_page(swp_entry_t, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
-extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
diff --git a/mm/page_io.c b/mm/page_io.c
index c6f3e5071de3..afeed89a0a5d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -19,20 +19,17 @@
 #include <linux/writeback.h>
 #include <asm/pgtable.h>
 
-static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
+static struct bio *get_swap_bio(gfp_t gfp_flags,
 				struct page *page, bio_end_io_t end_io)
 {
 	struct bio *bio;
 
 	bio = bio_alloc(gfp_flags, 1);
 	if (bio) {
-		struct swap_info_struct *sis;
-		swp_entry_t entry = { .val = index, };
-
-		sis = get_swap_info_struct(swp_type(entry));
-		bio->bi_sector = map_swap_page(sis, swp_offset(entry)) *
-					(PAGE_SIZE >> 9);
-		bio->bi_bdev = sis->bdev;
+		swp_entry_t entry;
+		entry.val = page_private(page);
+		bio->bi_sector = map_swap_page(entry, &bio->bi_bdev);
+		bio->bi_sector <<= PAGE_SHIFT - 9;
 		bio->bi_io_vec[0].bv_page = page;
 		bio->bi_io_vec[0].bv_len = PAGE_SIZE;
 		bio->bi_io_vec[0].bv_offset = 0;
@@ -102,8 +99,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 		unlock_page(page);
 		goto out;
 	}
-	bio = get_swap_bio(GFP_NOIO, page_private(page), page,
-				end_swap_bio_write);
+	bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
 	if (bio == NULL) {
 		set_page_dirty(page);
 		unlock_page(page);
@@ -127,8 +123,7 @@ int swap_readpage(struct page *page)
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageUptodate(page));
-	bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
-				end_swap_bio_read);
+	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
 	if (bio == NULL) {
 		unlock_page(page);
 		ret = -ENOMEM;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9c590eef7912..f83f1c6f6196 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1283,12 +1283,22 @@ static void drain_mmlist(void)
 
 /*
  * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
- * corresponds to page offset `offset'.
+ * corresponds to page offset `offset'.  Note that the type of this function
+ * is sector_t, but it returns page offset into the bdev, not sector offset.
  */
-sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
+sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
 {
-	struct swap_extent *se = sis->curr_swap_extent;
-	struct swap_extent *start_se = se;
+	struct swap_info_struct *sis;
+	struct swap_extent *start_se;
+	struct swap_extent *se;
+	pgoff_t offset;
+
+	sis = swap_info + swp_type(entry);
+	*bdev = sis->bdev;
+
+	offset = swp_offset(entry);
+	start_se = sis->curr_swap_extent;
+	se = start_se;
 
 	for ( ; ; ) {
 		struct list_head *lh;
@@ -1314,12 +1324,14 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
 sector_t swapdev_block(int swap_type, pgoff_t offset)
 {
 	struct swap_info_struct *sis;
+	struct block_device *bdev;
 
 	if (swap_type >= nr_swapfiles)
 		return 0;
 
 	sis = swap_info + swap_type;
-	return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
+	return (sis->flags & SWP_WRITEOK) ?
+		map_swap_page(swp_entry(swap_type, offset), &bdev) : 0;
 }
 #endif /* CONFIG_HIBERNATION */
 
@@ -2159,13 +2171,6 @@ int swapcache_prepare(swp_entry_t entry)
 	return __swap_duplicate(entry, SWAP_CACHE);
 }
 
-
-struct swap_info_struct *
-get_swap_info_struct(unsigned type)
-{
-	return &swap_info[type];
-}
-
 /*
  * swap_lock prevents swap_map being freed. Don't grab an extra
  * reference on the swaphandle, it doesn't matter if it becomes unused.
-- 
cgit v1.2.3


From efa90a981bbc891efad96db2a75b5487e00852ca Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:41 -0800
Subject: swap_info: change to array of pointers

The swap_info_struct is only 76 or 104 bytes, but it does seem wrong
to reserve an array of about 30 of them in bss, when most people will
want only one.  Change swap_info[] to an array of pointers.

That does need a "type" field in the structure: pack it as a char with
next type and short prio (aha, char is unsigned by default on PowerPC).
Use the (admittedly peculiar) name "type" throughout for this index.

/proc/swaps does not take swap_lock: I wouldn't want it to, but do take
care with barriers when adding a new item to the array (never removed).

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |   7 +-
 mm/swapfile.c        | 204 ++++++++++++++++++++++++++++-----------------------
 2 files changed, 117 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 82aa7e121c05..f1c248796fb8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -159,9 +159,10 @@ enum {
  * The in-memory structure used to track swap areas.
  */
 struct swap_info_struct {
-	unsigned long flags;
-	int prio;			/* swap priority */
-	int next;			/* next entry on swap list */
+	unsigned long	flags;		/* SWP_USED etc: see above */
+	signed short	prio;		/* swap priority of this type */
+	signed char	type;		/* strange name for an index */
+	signed char	next;		/* next type on the swap list */
 	struct file *swap_file;
 	struct block_device *bdev;
 	struct list_head extent_list;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f83f1c6f6196..dc88a7e4257e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -49,7 +49,7 @@ static const char Unused_offset[] = "Unused swap offset entry ";
 
 static struct swap_list_t swap_list = {-1, -1};
 
-static struct swap_info_struct swap_info[MAX_SWAPFILES];
+static struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
@@ -79,12 +79,11 @@ static inline unsigned short encode_swapmap(int count, bool has_cache)
 	return ret;
 }
 
-/* returnes 1 if swap entry is freed */
+/* returns 1 if swap entry is freed */
 static int
 __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 {
-	int type = si - swap_info;
-	swp_entry_t entry = swp_entry(type, offset);
+	swp_entry_t entry = swp_entry(si->type, offset);
 	struct page *page;
 	int ret = 0;
 
@@ -120,7 +119,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 	down_read(&swap_unplug_sem);
 	entry.val = page_private(page);
 	if (PageSwapCache(page)) {
-		struct block_device *bdev = swap_info[swp_type(entry)].bdev;
+		struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
 		struct backing_dev_info *bdi;
 
 		/*
@@ -467,10 +466,10 @@ swp_entry_t get_swap_page(void)
 	nr_swap_pages--;
 
 	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
-		si = swap_info + type;
+		si = swap_info[type];
 		next = si->next;
 		if (next < 0 ||
-		    (!wrapped && si->prio != swap_info[next].prio)) {
+		    (!wrapped && si->prio != swap_info[next]->prio)) {
 			next = swap_list.head;
 			wrapped++;
 		}
@@ -503,8 +502,8 @@ swp_entry_t get_swap_page_of_type(int type)
 	pgoff_t offset;
 
 	spin_lock(&swap_lock);
-	si = swap_info + type;
-	if (si->flags & SWP_WRITEOK) {
+	si = swap_info[type];
+	if (si && (si->flags & SWP_WRITEOK)) {
 		nr_swap_pages--;
 		/* This is called for allocating swap entry, not cache */
 		offset = scan_swap_map(si, SWAP_MAP);
@@ -528,7 +527,7 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry)
 	type = swp_type(entry);
 	if (type >= nr_swapfiles)
 		goto bad_nofile;
-	p = & swap_info[type];
+	p = swap_info[type];
 	if (!(p->flags & SWP_USED))
 		goto bad_device;
 	offset = swp_offset(entry);
@@ -581,8 +580,9 @@ static int swap_entry_free(struct swap_info_struct *p,
 			p->lowest_bit = offset;
 		if (offset > p->highest_bit)
 			p->highest_bit = offset;
-		if (p->prio > swap_info[swap_list.next].prio)
-			swap_list.next = p - swap_info;
+		if (swap_list.next >= 0 &&
+		    p->prio > swap_info[swap_list.next]->prio)
+			swap_list.next = p->type;
 		nr_swap_pages++;
 		p->inuse_pages--;
 	}
@@ -741,14 +741,14 @@ int free_swap_and_cache(swp_entry_t entry)
 int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 {
 	struct block_device *bdev = NULL;
-	int i;
+	int type;
 
 	if (device)
 		bdev = bdget(device);
 
 	spin_lock(&swap_lock);
-	for (i = 0; i < nr_swapfiles; i++) {
-		struct swap_info_struct *sis = swap_info + i;
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *sis = swap_info[type];
 
 		if (!(sis->flags & SWP_WRITEOK))
 			continue;
@@ -758,7 +758,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 				*bdev_p = bdgrab(sis->bdev);
 
 			spin_unlock(&swap_lock);
-			return i;
+			return type;
 		}
 		if (bdev == sis->bdev) {
 			struct swap_extent *se;
@@ -771,7 +771,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 
 				spin_unlock(&swap_lock);
 				bdput(bdev);
-				return i;
+				return type;
 			}
 		}
 	}
@@ -792,15 +792,17 @@ unsigned int count_swap_pages(int type, int free)
 {
 	unsigned int n = 0;
 
-	if (type < nr_swapfiles) {
-		spin_lock(&swap_lock);
-		if (swap_info[type].flags & SWP_WRITEOK) {
-			n = swap_info[type].pages;
+	spin_lock(&swap_lock);
+	if ((unsigned int)type < nr_swapfiles) {
+		struct swap_info_struct *sis = swap_info[type];
+
+		if (sis->flags & SWP_WRITEOK) {
+			n = sis->pages;
 			if (free)
-				n -= swap_info[type].inuse_pages;
+				n -= sis->inuse_pages;
 		}
-		spin_unlock(&swap_lock);
 	}
+	spin_unlock(&swap_lock);
 	return n;
 }
 #endif
@@ -1024,7 +1026,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
  */
 static int try_to_unuse(unsigned int type)
 {
-	struct swap_info_struct * si = &swap_info[type];
+	struct swap_info_struct *si = swap_info[type];
 	struct mm_struct *start_mm;
 	unsigned short *swap_map;
 	unsigned short swcount;
@@ -1270,10 +1272,10 @@ retry:
 static void drain_mmlist(void)
 {
 	struct list_head *p, *next;
-	unsigned int i;
+	unsigned int type;
 
-	for (i = 0; i < nr_swapfiles; i++)
-		if (swap_info[i].inuse_pages)
+	for (type = 0; type < nr_swapfiles; type++)
+		if (swap_info[type]->inuse_pages)
 			return;
 	spin_lock(&mmlist_lock);
 	list_for_each_safe(p, next, &init_mm.mmlist)
@@ -1293,7 +1295,7 @@ sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
 	struct swap_extent *se;
 	pgoff_t offset;
 
-	sis = swap_info + swp_type(entry);
+	sis = swap_info[swp_type(entry)];
 	*bdev = sis->bdev;
 
 	offset = swp_offset(entry);
@@ -1321,17 +1323,15 @@ sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
  * corresponding to given index in swap_info (swap type).
  */
-sector_t swapdev_block(int swap_type, pgoff_t offset)
+sector_t swapdev_block(int type, pgoff_t offset)
 {
-	struct swap_info_struct *sis;
 	struct block_device *bdev;
 
-	if (swap_type >= nr_swapfiles)
+	if ((unsigned int)type >= nr_swapfiles)
 		return 0;
-
-	sis = swap_info + swap_type;
-	return (sis->flags & SWP_WRITEOK) ?
-		map_swap_page(swp_entry(swap_type, offset), &bdev) : 0;
+	if (!(swap_info[type]->flags & SWP_WRITEOK))
+		return 0;
+	return map_swap_page(swp_entry(type, offset), &bdev);
 }
 #endif /* CONFIG_HIBERNATION */
 
@@ -1547,8 +1547,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	mapping = victim->f_mapping;
 	prev = -1;
 	spin_lock(&swap_lock);
-	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
-		p = swap_info + type;
+	for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
+		p = swap_info[type];
 		if (p->flags & SWP_WRITEOK) {
 			if (p->swap_file->f_mapping == mapping)
 				break;
@@ -1567,18 +1567,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		spin_unlock(&swap_lock);
 		goto out_dput;
 	}
-	if (prev < 0) {
+	if (prev < 0)
 		swap_list.head = p->next;
-	} else {
-		swap_info[prev].next = p->next;
-	}
+	else
+		swap_info[prev]->next = p->next;
 	if (type == swap_list.next) {
 		/* just pick something that's safe... */
 		swap_list.next = swap_list.head;
 	}
 	if (p->prio < 0) {
-		for (i = p->next; i >= 0; i = swap_info[i].next)
-			swap_info[i].prio = p->prio--;
+		for (i = p->next; i >= 0; i = swap_info[i]->next)
+			swap_info[i]->prio = p->prio--;
 		least_priority++;
 	}
 	nr_swap_pages -= p->pages;
@@ -1596,16 +1595,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		if (p->prio < 0)
 			p->prio = --least_priority;
 		prev = -1;
-		for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
-			if (p->prio >= swap_info[i].prio)
+		for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
+			if (p->prio >= swap_info[i]->prio)
 				break;
 			prev = i;
 		}
 		p->next = i;
 		if (prev < 0)
-			swap_list.head = swap_list.next = p - swap_info;
+			swap_list.head = swap_list.next = type;
 		else
-			swap_info[prev].next = p - swap_info;
+			swap_info[prev]->next = type;
 		nr_swap_pages += p->pages;
 		total_swap_pages += p->pages;
 		p->flags |= SWP_WRITEOK;
@@ -1665,8 +1664,8 @@ out:
 /* iterator */
 static void *swap_start(struct seq_file *swap, loff_t *pos)
 {
-	struct swap_info_struct *ptr = swap_info;
-	int i;
+	struct swap_info_struct *si;
+	int type;
 	loff_t l = *pos;
 
 	mutex_lock(&swapon_mutex);
@@ -1674,11 +1673,13 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
 	if (!l)
 		return SEQ_START_TOKEN;
 
-	for (i = 0; i < nr_swapfiles; i++, ptr++) {
-		if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
+	for (type = 0; type < nr_swapfiles; type++) {
+		smp_rmb();	/* read nr_swapfiles before swap_info[type] */
+		si = swap_info[type];
+		if (!(si->flags & SWP_USED) || !si->swap_map)
 			continue;
 		if (!--l)
-			return ptr;
+			return si;
 	}
 
 	return NULL;
@@ -1686,21 +1687,21 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
 
 static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
 {
-	struct swap_info_struct *ptr;
-	struct swap_info_struct *endptr = swap_info + nr_swapfiles;
+	struct swap_info_struct *si = v;
+	int type;
 
 	if (v == SEQ_START_TOKEN)
-		ptr = swap_info;
-	else {
-		ptr = v;
-		ptr++;
-	}
+		type = 0;
+	else
+		type = si->type + 1;
 
-	for (; ptr < endptr; ptr++) {
-		if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
+	for (; type < nr_swapfiles; type++) {
+		smp_rmb();	/* read nr_swapfiles before swap_info[type] */
+		si = swap_info[type];
+		if (!(si->flags & SWP_USED) || !si->swap_map)
 			continue;
 		++*pos;
-		return ptr;
+		return si;
 	}
 
 	return NULL;
@@ -1713,24 +1714,24 @@ static void swap_stop(struct seq_file *swap, void *v)
 
 static int swap_show(struct seq_file *swap, void *v)
 {
-	struct swap_info_struct *ptr = v;
+	struct swap_info_struct *si = v;
 	struct file *file;
 	int len;
 
-	if (ptr == SEQ_START_TOKEN) {
+	if (si == SEQ_START_TOKEN) {
 		seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
 		return 0;
 	}
 
-	file = ptr->swap_file;
+	file = si->swap_file;
 	len = seq_path(swap, &file->f_path, " \t\n\\");
 	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
 			len < 40 ? 40 - len : 1, " ",
 			S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
 				"partition" : "file\t",
-			ptr->pages << (PAGE_SHIFT - 10),
-			ptr->inuse_pages << (PAGE_SHIFT - 10),
-			ptr->prio);
+			si->pages << (PAGE_SHIFT - 10),
+			si->inuse_pages << (PAGE_SHIFT - 10),
+			si->prio);
 	return 0;
 }
 
@@ -1798,23 +1799,45 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
 	spin_lock(&swap_lock);
-	p = swap_info;
-	for (type = 0 ; type < nr_swapfiles ; type++,p++)
-		if (!(p->flags & SWP_USED))
+	for (type = 0; type < nr_swapfiles; type++) {
+		if (!(swap_info[type]->flags & SWP_USED))
 			break;
+	}
 	error = -EPERM;
 	if (type >= MAX_SWAPFILES) {
 		spin_unlock(&swap_lock);
+		kfree(p);
 		goto out;
 	}
-	if (type >= nr_swapfiles)
-		nr_swapfiles = type+1;
-	memset(p, 0, sizeof(*p));
 	INIT_LIST_HEAD(&p->extent_list);
+	if (type >= nr_swapfiles) {
+		p->type = type;
+		swap_info[type] = p;
+		/*
+		 * Write swap_info[type] before nr_swapfiles, in case a
+		 * racing procfs swap_start() or swap_next() is reading them.
+		 * (We never shrink nr_swapfiles, we never free this entry.)
+		 */
+		smp_wmb();
+		nr_swapfiles++;
+	} else {
+		kfree(p);
+		p = swap_info[type];
+		/*
+		 * Do not memset this entry: a racing procfs swap_next()
+		 * would be relying on p->type to remain valid.
+		 */
+	}
 	p->flags = SWP_USED;
 	p->next = -1;
 	spin_unlock(&swap_lock);
+
 	name = getname(specialfile);
 	error = PTR_ERR(name);
 	if (IS_ERR(name)) {
@@ -1834,7 +1857,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	error = -EBUSY;
 	for (i = 0; i < nr_swapfiles; i++) {
-		struct swap_info_struct *q = &swap_info[i];
+		struct swap_info_struct *q = swap_info[i];
 
 		if (i == type || !q->swap_file)
 			continue;
@@ -1909,6 +1932,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	p->lowest_bit  = 1;
 	p->cluster_next = 1;
+	p->cluster_nr = 0;
 
 	/*
 	 * Find out how many pages are allowed for a single swap
@@ -2015,18 +2039,16 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	/* insert swap space into swap_list: */
 	prev = -1;
-	for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
-		if (p->prio >= swap_info[i].prio) {
+	for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
+		if (p->prio >= swap_info[i]->prio)
 			break;
-		}
 		prev = i;
 	}
 	p->next = i;
-	if (prev < 0) {
-		swap_list.head = swap_list.next = p - swap_info;
-	} else {
-		swap_info[prev].next = p - swap_info;
-	}
+	if (prev < 0)
+		swap_list.head = swap_list.next = type;
+	else
+		swap_info[prev]->next = type;
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	error = 0;
@@ -2063,15 +2085,15 @@ out:
 
 void si_swapinfo(struct sysinfo *val)
 {
-	unsigned int i;
+	unsigned int type;
 	unsigned long nr_to_be_unused = 0;
 
 	spin_lock(&swap_lock);
-	for (i = 0; i < nr_swapfiles; i++) {
-		if (!(swap_info[i].flags & SWP_USED) ||
-		     (swap_info[i].flags & SWP_WRITEOK))
-			continue;
-		nr_to_be_unused += swap_info[i].inuse_pages;
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *si = swap_info[type];
+
+		if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
+			nr_to_be_unused += si->inuse_pages;
 	}
 	val->freeswap = nr_swap_pages + nr_to_be_unused;
 	val->totalswap = total_swap_pages + nr_to_be_unused;
@@ -2104,7 +2126,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
 	type = swp_type(entry);
 	if (type >= nr_swapfiles)
 		goto bad_file;
-	p = type + swap_info;
+	p = swap_info[type];
 	offset = swp_offset(entry);
 
 	spin_lock(&swap_lock);
@@ -2186,7 +2208,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 	if (!our_page_cluster)	/* no readahead */
 		return 0;
 
-	si = &swap_info[swp_type(entry)];
+	si = swap_info[swp_type(entry)];
 	target = swp_offset(entry);
 	base = (target >> our_page_cluster) << our_page_cluster;
 	end = base + (1 << our_page_cluster);
-- 
cgit v1.2.3


From 9625a5f289f7c3c100b59c317e2bcc3c7e2e51fb Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:42 -0800
Subject: swap_info: include first_swap_extent

Make better use of the space by folding first swap_extent into its
swap_info_struct, instead of just the list_head: swap partitions need
only that one, and for others it's used as a circular list anyway.

[jirislaby@gmail.com: fix crash on double swapon]
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 +-
 mm/swapfile.c        | 70 +++++++++++++++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f1c248796fb8..109dfe794237 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -165,7 +165,7 @@ struct swap_info_struct {
 	signed char	next;		/* next type on the swap list */
 	struct file *swap_file;
 	struct block_device *bdev;
-	struct list_head extent_list;
+	struct swap_extent first_swap_extent;
 	struct swap_extent *curr_swap_extent;
 	unsigned short *swap_map;
 	unsigned int lowest_bit;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index dc88a7e4257e..16de84b56644 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -145,23 +145,28 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 static int discard_swap(struct swap_info_struct *si)
 {
 	struct swap_extent *se;
+	sector_t start_block;
+	sector_t nr_blocks;
 	int err = 0;
 
-	list_for_each_entry(se, &si->extent_list, list) {
-		sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
-		sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
+	/* Do not discard the swap header page! */
+	se = &si->first_swap_extent;
+	start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
+	nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
+	if (nr_blocks) {
+		err = blkdev_issue_discard(si->bdev, start_block,
+				nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+		if (err)
+			return err;
+		cond_resched();
+	}
 
-		if (se->start_page == 0) {
-			/* Do not discard the swap header page! */
-			start_block += 1 << (PAGE_SHIFT - 9);
-			nr_blocks -= 1 << (PAGE_SHIFT - 9);
-			if (!nr_blocks)
-				continue;
-		}
+	list_for_each_entry(se, &si->first_swap_extent.list, list) {
+		start_block = se->start_block << (PAGE_SHIFT - 9);
+		nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
 
 		err = blkdev_issue_discard(si->bdev, start_block,
-						nr_blocks, GFP_KERNEL,
-						DISCARD_FL_BARRIER);
+				nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
 		if (err)
 			break;
 
@@ -200,14 +205,11 @@ static void discard_swap_cluster(struct swap_info_struct *si,
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-							nr_blocks, GFP_NOIO,
-							DISCARD_FL_BARRIER))
+				    nr_blocks, GFP_NOIO, DISCARD_FL_BARRIER))
 				break;
 		}
 
 		lh = se->list.next;
-		if (lh == &si->extent_list)
-			lh = lh->next;
 		se = list_entry(lh, struct swap_extent, list);
 	}
 }
@@ -761,10 +763,8 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 			return type;
 		}
 		if (bdev == sis->bdev) {
-			struct swap_extent *se;
+			struct swap_extent *se = &sis->first_swap_extent;
 
-			se = list_entry(sis->extent_list.next,
-					struct swap_extent, list);
 			if (se->start_block == offset) {
 				if (bdev_p)
 					*bdev_p = bdgrab(sis->bdev);
@@ -1310,8 +1310,6 @@ sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
 			return se->start_block + (offset - se->start_page);
 		}
 		lh = se->list.next;
-		if (lh == &sis->extent_list)
-			lh = lh->next;
 		se = list_entry(lh, struct swap_extent, list);
 		sis->curr_swap_extent = se;
 		BUG_ON(se == start_se);		/* It *must* be present */
@@ -1340,10 +1338,10 @@ sector_t swapdev_block(int type, pgoff_t offset)
  */
 static void destroy_swap_extents(struct swap_info_struct *sis)
 {
-	while (!list_empty(&sis->extent_list)) {
+	while (!list_empty(&sis->first_swap_extent.list)) {
 		struct swap_extent *se;
 
-		se = list_entry(sis->extent_list.next,
+		se = list_entry(sis->first_swap_extent.list.next,
 				struct swap_extent, list);
 		list_del(&se->list);
 		kfree(se);
@@ -1364,8 +1362,15 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
 	struct swap_extent *new_se;
 	struct list_head *lh;
 
-	lh = sis->extent_list.prev;	/* The highest page extent */
-	if (lh != &sis->extent_list) {
+	if (start_page == 0) {
+		se = &sis->first_swap_extent;
+		sis->curr_swap_extent = se;
+		se->start_page = 0;
+		se->nr_pages = nr_pages;
+		se->start_block = start_block;
+		return 1;
+	} else {
+		lh = sis->first_swap_extent.list.prev;	/* Highest extent */
 		se = list_entry(lh, struct swap_extent, list);
 		BUG_ON(se->start_page + se->nr_pages != start_page);
 		if (se->start_block + se->nr_pages == start_block) {
@@ -1385,7 +1390,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
 	new_se->nr_pages = nr_pages;
 	new_se->start_block = start_block;
 
-	list_add_tail(&new_se->list, &sis->extent_list);
+	list_add_tail(&new_se->list, &sis->first_swap_extent.list);
 	return 1;
 }
 
@@ -1437,7 +1442,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 	if (S_ISBLK(inode->i_mode)) {
 		ret = add_swap_extent(sis, 0, sis->max, 0);
 		*span = sis->pages;
-		goto done;
+		goto out;
 	}
 
 	blkbits = inode->i_blkbits;
@@ -1508,15 +1513,12 @@ reprobe:
 	sis->max = page_no;
 	sis->pages = page_no - 1;
 	sis->highest_bit = page_no - 1;
-done:
-	sis->curr_swap_extent = list_entry(sis->extent_list.prev,
-					struct swap_extent, list);
-	goto out;
+out:
+	return ret;
 bad_bmap:
 	printk(KERN_ERR "swapon: swapfile has holes\n");
 	ret = -EINVAL;
-out:
-	return ret;
+	goto out;
 }
 
 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
@@ -1815,7 +1817,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		kfree(p);
 		goto out;
 	}
-	INIT_LIST_HEAD(&p->extent_list);
 	if (type >= nr_swapfiles) {
 		p->type = type;
 		swap_info[type] = p;
@@ -1834,6 +1835,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		 * would be relying on p->type to remain valid.
 		 */
 	}
+	INIT_LIST_HEAD(&p->first_swap_extent.list);
 	p->flags = SWP_USED;
 	p->next = -1;
 	spin_unlock(&swap_lock);
-- 
cgit v1.2.3


From 253d553ba75ab26b3e9e2f70cbf6fbf0813f7e86 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:44 -0800
Subject: swap_info: SWAP_HAS_CACHE cleanups

Though swap_count() is useful, I'm finding that swap_has_cache() and
encode_swapmap() obscure what happens in the swap_map entry, just at
those points where I need to understand it.  Remove them, and pass
more usable "usage" values to scan_swap_map(), swap_entry_free() and
__swap_duplicate(), instead of the SWAP_MAP and SWAP_CACHE enum.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |   2 +-
 mm/swapfile.c        | 155 +++++++++++++++++++++------------------------------
 2 files changed, 65 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 109dfe794237..c9d8870892b8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -154,7 +154,7 @@ enum {
 #define SWAP_MAP_MAX	0x7ffe
 #define SWAP_MAP_BAD	0x7fff
 #define SWAP_HAS_CACHE  0x8000		/* There is a swap cache of entry. */
-#define SWAP_COUNT_MASK (~SWAP_HAS_CACHE)
+
 /*
  * The in-memory structure used to track swap areas.
  */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fa5f10b9c28b..52497490a7ca 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -53,30 +53,9 @@ static struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
-/* For reference count accounting in swap_map */
-/* enum for swap_map[] handling. internal use only */
-enum {
-	SWAP_MAP = 0,	/* ops for reference from swap users */
-	SWAP_CACHE,	/* ops for reference from swap cache */
-};
-
 static inline int swap_count(unsigned short ent)
 {
-	return ent & SWAP_COUNT_MASK;
-}
-
-static inline bool swap_has_cache(unsigned short ent)
-{
-	return !!(ent & SWAP_HAS_CACHE);
-}
-
-static inline unsigned short encode_swapmap(int count, bool has_cache)
-{
-	unsigned short ret = count;
-
-	if (has_cache)
-		return SWAP_HAS_CACHE | ret;
-	return ret;
+	return ent & ~SWAP_HAS_CACHE;
 }
 
 /* returns 1 if swap entry is freed */
@@ -224,7 +203,7 @@ static int wait_for_discard(void *word)
 #define LATENCY_LIMIT		256
 
 static inline unsigned long scan_swap_map(struct swap_info_struct *si,
-					  int cache)
+					  unsigned short usage)
 {
 	unsigned long offset;
 	unsigned long scan_base;
@@ -355,10 +334,7 @@ checks:
 		si->lowest_bit = si->max;
 		si->highest_bit = 0;
 	}
-	if (cache == SWAP_CACHE) /* at usual swap-out via vmscan.c */
-		si->swap_map[offset] = encode_swapmap(0, true);
-	else /* at suspend */
-		si->swap_map[offset] = encode_swapmap(1, false);
+	si->swap_map[offset] = usage;
 	si->cluster_next = offset + 1;
 	si->flags -= SWP_SCANNING;
 
@@ -483,7 +459,7 @@ swp_entry_t get_swap_page(void)
 
 		swap_list.next = next;
 		/* This is called for allocating swap entry for cache */
-		offset = scan_swap_map(si, SWAP_CACHE);
+		offset = scan_swap_map(si, SWAP_HAS_CACHE);
 		if (offset) {
 			spin_unlock(&swap_lock);
 			return swp_entry(type, offset);
@@ -508,7 +484,7 @@ swp_entry_t get_swap_page_of_type(int type)
 	if (si && (si->flags & SWP_WRITEOK)) {
 		nr_swap_pages--;
 		/* This is called for allocating swap entry, not cache */
-		offset = scan_swap_map(si, SWAP_MAP);
+		offset = scan_swap_map(si, 1);
 		if (offset) {
 			spin_unlock(&swap_lock);
 			return swp_entry(type, offset);
@@ -555,29 +531,31 @@ out:
 	return NULL;
 }
 
-static int swap_entry_free(struct swap_info_struct *p,
-			   swp_entry_t ent, int cache)
+static unsigned short swap_entry_free(struct swap_info_struct *p,
+			   swp_entry_t entry, unsigned short usage)
 {
-	unsigned long offset = swp_offset(ent);
-	int count = swap_count(p->swap_map[offset]);
-	bool has_cache;
+	unsigned long offset = swp_offset(entry);
+	unsigned short count;
+	unsigned short has_cache;
 
-	has_cache = swap_has_cache(p->swap_map[offset]);
+	count = p->swap_map[offset];
+	has_cache = count & SWAP_HAS_CACHE;
+	count &= ~SWAP_HAS_CACHE;
 
-	if (cache == SWAP_MAP) { /* dropping usage count of swap */
-		if (count < SWAP_MAP_MAX) {
-			count--;
-			p->swap_map[offset] = encode_swapmap(count, has_cache);
-		}
-	} else { /* dropping swap cache flag */
+	if (usage == SWAP_HAS_CACHE) {
 		VM_BUG_ON(!has_cache);
-		p->swap_map[offset] = encode_swapmap(count, false);
+		has_cache = 0;
+	} else if (count < SWAP_MAP_MAX)
+		count--;
+
+	if (!count)
+		mem_cgroup_uncharge_swap(entry);
+
+	usage = count | has_cache;
+	p->swap_map[offset] = usage;
 
-	}
-	/* return code. */
-	count = p->swap_map[offset];
 	/* free if no reference */
-	if (!count) {
+	if (!usage) {
 		if (offset < p->lowest_bit)
 			p->lowest_bit = offset;
 		if (offset > p->highest_bit)
@@ -588,9 +566,8 @@ static int swap_entry_free(struct swap_info_struct *p,
 		nr_swap_pages++;
 		p->inuse_pages--;
 	}
-	if (!swap_count(count))
-		mem_cgroup_uncharge_swap(ent);
-	return count;
+
+	return usage;
 }
 
 /*
@@ -603,7 +580,7 @@ void swap_free(swp_entry_t entry)
 
 	p = swap_info_get(entry);
 	if (p) {
-		swap_entry_free(p, entry, SWAP_MAP);
+		swap_entry_free(p, entry, 1);
 		spin_unlock(&swap_lock);
 	}
 }
@@ -614,19 +591,13 @@ void swap_free(swp_entry_t entry)
 void swapcache_free(swp_entry_t entry, struct page *page)
 {
 	struct swap_info_struct *p;
-	int ret;
+	unsigned short count;
 
 	p = swap_info_get(entry);
 	if (p) {
-		ret = swap_entry_free(p, entry, SWAP_CACHE);
-		if (page) {
-			bool swapout;
-			if (ret)
-				swapout = true; /* the end of swap out */
-			else
-				swapout = false; /* no more swap users! */
-			mem_cgroup_uncharge_swapcache(page, entry, swapout);
-		}
+		count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
+		if (page)
+			mem_cgroup_uncharge_swapcache(page, entry, count != 0);
 		spin_unlock(&swap_lock);
 	}
 }
@@ -705,7 +676,7 @@ int free_swap_and_cache(swp_entry_t entry)
 
 	p = swap_info_get(entry);
 	if (p) {
-		if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) {
+		if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
 			page = find_get_page(&swapper_space, entry.val);
 			if (page && !trylock_page(page)) {
 				page_cache_release(page);
@@ -1212,7 +1183,7 @@ static int try_to_unuse(unsigned int type)
 
 		if (swap_count(*swap_map) == SWAP_MAP_MAX) {
 			spin_lock(&swap_lock);
-			*swap_map = encode_swapmap(0, true);
+			*swap_map = SWAP_HAS_CACHE;
 			spin_unlock(&swap_lock);
 			reset_overflow = 1;
 		}
@@ -2111,16 +2082,16 @@ void si_swapinfo(struct sysinfo *val)
  * - swap-cache reference is requested but there is already one. -> EEXIST
  * - swap-cache reference is requested but the entry is not used. -> ENOENT
  */
-static int __swap_duplicate(swp_entry_t entry, bool cache)
+static int __swap_duplicate(swp_entry_t entry, unsigned short usage)
 {
 	struct swap_info_struct *p;
 	unsigned long offset, type;
-	int result = -EINVAL;
-	int count;
-	bool has_cache;
+	unsigned short count;
+	unsigned short has_cache;
+	int err = -EINVAL;
 
 	if (non_swap_entry(entry))
-		return -EINVAL;
+		goto out;
 
 	type = swp_type(entry);
 	if (type >= nr_swapfiles)
@@ -2129,54 +2100,56 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
 	offset = swp_offset(entry);
 
 	spin_lock(&swap_lock);
-
 	if (unlikely(offset >= p->max))
 		goto unlock_out;
 
-	count = swap_count(p->swap_map[offset]);
-	has_cache = swap_has_cache(p->swap_map[offset]);
+	count = p->swap_map[offset];
+	has_cache = count & SWAP_HAS_CACHE;
+	count &= ~SWAP_HAS_CACHE;
+	err = 0;
 
-	if (cache == SWAP_CACHE) { /* called for swapcache/swapin-readahead */
+	if (usage == SWAP_HAS_CACHE) {
 
 		/* set SWAP_HAS_CACHE if there is no cache and entry is used */
-		if (!has_cache && count) {
-			p->swap_map[offset] = encode_swapmap(count, true);
-			result = 0;
-		} else if (has_cache) /* someone added cache */
-			result = -EEXIST;
-		else if (!count) /* no users */
-			result = -ENOENT;
+		if (!has_cache && count)
+			has_cache = SWAP_HAS_CACHE;
+		else if (has_cache)		/* someone else added cache */
+			err = -EEXIST;
+		else				/* no users remaining */
+			err = -ENOENT;
 
 	} else if (count || has_cache) {
-		if (count < SWAP_MAP_MAX - 1) {
-			p->swap_map[offset] = encode_swapmap(count + 1,
-							     has_cache);
-			result = 0;
-		} else if (count <= SWAP_MAP_MAX) {
+
+		if (count < SWAP_MAP_MAX - 1)
+			count++;
+		else if (count <= SWAP_MAP_MAX) {
 			if (swap_overflow++ < 5)
 				printk(KERN_WARNING
 				       "swap_dup: swap entry overflow\n");
-			p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX,
-							      has_cache);
-			result = 0;
-		}
+			count = SWAP_MAP_MAX;
+		} else
+			err = -EINVAL;
 	} else
-		result = -ENOENT; /* unused swap entry */
+		err = -ENOENT;			/* unused swap entry */
+
+	p->swap_map[offset] = count | has_cache;
+
 unlock_out:
 	spin_unlock(&swap_lock);
 out:
-	return result;
+	return err;
 
 bad_file:
 	printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
 	goto out;
 }
+
 /*
  * increase reference count of swap entry by 1.
  */
 void swap_duplicate(swp_entry_t entry)
 {
-	__swap_duplicate(entry, SWAP_MAP);
+	__swap_duplicate(entry, 1);
 }
 
 /*
@@ -2189,7 +2162,7 @@ void swap_duplicate(swp_entry_t entry)
  */
 int swapcache_prepare(swp_entry_t entry)
 {
-	return __swap_duplicate(entry, SWAP_CACHE);
+	return __swap_duplicate(entry, SWAP_HAS_CACHE);
 }
 
 /*
-- 
cgit v1.2.3


From 8d69aaee80c123b460918816cbfa2e83224c3646 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:45 -0800
Subject: swap_info: swap_map of chars not shorts

Halve the vmalloc'ed swap_map array from unsigned shorts to unsigned
chars: it's still very unusual to reach a swap count of 126, and the
next patch allows it to be extended indefinitely.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  8 ++++----
 mm/swapfile.c        | 40 +++++++++++++++++++++++-----------------
 2 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c9d8870892b8..f733deb10748 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -151,9 +151,9 @@ enum {
 
 #define SWAP_CLUSTER_MAX 32
 
-#define SWAP_MAP_MAX	0x7ffe
-#define SWAP_MAP_BAD	0x7fff
-#define SWAP_HAS_CACHE  0x8000		/* There is a swap cache of entry. */
+#define SWAP_MAP_MAX	0x7e
+#define SWAP_MAP_BAD	0x7f
+#define SWAP_HAS_CACHE	0x80		/* There is a swap cache of entry. */
 
 /*
  * The in-memory structure used to track swap areas.
@@ -167,7 +167,7 @@ struct swap_info_struct {
 	struct block_device *bdev;
 	struct swap_extent first_swap_extent;
 	struct swap_extent *curr_swap_extent;
-	unsigned short *swap_map;
+	unsigned char *swap_map;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
 	unsigned int lowest_alloc;	/* while preparing discard cluster */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 52497490a7ca..c0d7b9ed0c16 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -53,7 +53,7 @@ static struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
-static inline int swap_count(unsigned short ent)
+static inline unsigned char swap_count(unsigned char ent)
 {
 	return ent & ~SWAP_HAS_CACHE;
 }
@@ -203,7 +203,7 @@ static int wait_for_discard(void *word)
 #define LATENCY_LIMIT		256
 
 static inline unsigned long scan_swap_map(struct swap_info_struct *si,
-					  unsigned short usage)
+					  unsigned char usage)
 {
 	unsigned long offset;
 	unsigned long scan_base;
@@ -531,12 +531,12 @@ out:
 	return NULL;
 }
 
-static unsigned short swap_entry_free(struct swap_info_struct *p,
-			   swp_entry_t entry, unsigned short usage)
+static unsigned char swap_entry_free(struct swap_info_struct *p,
+				     swp_entry_t entry, unsigned char usage)
 {
 	unsigned long offset = swp_offset(entry);
-	unsigned short count;
-	unsigned short has_cache;
+	unsigned char count;
+	unsigned char has_cache;
 
 	count = p->swap_map[offset];
 	has_cache = count & SWAP_HAS_CACHE;
@@ -591,7 +591,7 @@ void swap_free(swp_entry_t entry)
 void swapcache_free(swp_entry_t entry, struct page *page)
 {
 	struct swap_info_struct *p;
-	unsigned short count;
+	unsigned char count;
 
 	p = swap_info_get(entry);
 	if (p) {
@@ -975,7 +975,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 {
 	unsigned int max = si->max;
 	unsigned int i = prev;
-	int count;
+	unsigned char count;
 
 	/*
 	 * No need for swap_lock here: we're just looking
@@ -1013,8 +1013,8 @@ static int try_to_unuse(unsigned int type)
 {
 	struct swap_info_struct *si = swap_info[type];
 	struct mm_struct *start_mm;
-	unsigned short *swap_map;
-	unsigned short swcount;
+	unsigned char *swap_map;
+	unsigned char swcount;
 	struct page *page;
 	swp_entry_t entry;
 	unsigned int i = 0;
@@ -1174,6 +1174,12 @@ static int try_to_unuse(unsigned int type)
 		 * If that's wrong, then we should worry more about
 		 * exit_mmap() and do_munmap() cases described above:
 		 * we might be resetting SWAP_MAP_MAX too early here.
+		 *
+		 * Yes, that's wrong: though very unlikely, swap count 0x7ffe
+		 * could surely occur if pid_max raised from PID_MAX_DEFAULT;
+		 * and we are now lowering SWAP_MAP_MAX to 0x7e, making it
+		 * much easier to reach.  But the next patch will fix that.
+		 *
 		 * We know "Undead"s can happen, they're okay, so don't
 		 * report them; but do report if we reset SWAP_MAP_MAX.
 		 */
@@ -1492,7 +1498,7 @@ bad_bmap:
 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 {
 	struct swap_info_struct *p = NULL;
-	unsigned short *swap_map;
+	unsigned char *swap_map;
 	struct file *swap_file, *victim;
 	struct address_space *mapping;
 	struct inode *inode;
@@ -1762,7 +1768,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	sector_t span;
 	unsigned long maxpages = 1;
 	unsigned long swapfilepages;
-	unsigned short *swap_map = NULL;
+	unsigned char *swap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
 	int did_down = 0;
@@ -1938,13 +1944,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap;
 
 	/* OK, set up the swap map and apply the bad block list */
-	swap_map = vmalloc(maxpages * sizeof(short));
+	swap_map = vmalloc(maxpages);
 	if (!swap_map) {
 		error = -ENOMEM;
 		goto bad_swap;
 	}
 
-	memset(swap_map, 0, maxpages * sizeof(short));
+	memset(swap_map, 0, maxpages);
 	for (i = 0; i < swap_header->info.nr_badpages; i++) {
 		int page_nr = swap_header->info.badpages[i];
 		if (page_nr <= 0 || page_nr >= swap_header->info.last_page) {
@@ -2082,12 +2088,12 @@ void si_swapinfo(struct sysinfo *val)
  * - swap-cache reference is requested but there is already one. -> EEXIST
  * - swap-cache reference is requested but the entry is not used. -> ENOENT
  */
-static int __swap_duplicate(swp_entry_t entry, unsigned short usage)
+static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 {
 	struct swap_info_struct *p;
 	unsigned long offset, type;
-	unsigned short count;
-	unsigned short has_cache;
+	unsigned char count;
+	unsigned char has_cache;
 	int err = -EINVAL;
 
 	if (non_swap_entry(entry))
-- 
cgit v1.2.3


From 570a335b8e22579e2a51a68136d2b1f907a20eec Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:46 -0800
Subject: swap_info: swap count continuations

Swap is duplicated (reference count incremented by one) whenever the same
swap page is inserted into another mm (when forking finds a swap entry in
place of a pte, or when reclaim unmaps a pte to insert the swap entry).

swap_info_struct's vmalloc'ed swap_map is the array of these reference
counts: but what happens when the unsigned short (or unsigned char since
the preceding patch) is full? (and its high bit is kept for a cache flag)

We then lose track of it, never freeing, leaving it in use until swapoff:
at which point we _hope_ that a single pass will have found all instances,
assume there are no more, and will lose user data if we're wrong.

Swapping of KSM pages has not yet been enabled; but it is implemented,
and makes it very easy for a user to overflow the maximum swap count:
possible with ordinary process pages, but unlikely, even when pid_max
has been raised from PID_MAX_DEFAULT.

This patch implements swap count continuations: when the count overflows,
a continuation page is allocated and linked to the original vmalloc'ed
map page, and this used to hold the continuation counts for that entry
and its neighbours.  These continuation pages are seldom referenced:
the common paths all work on the original swap_map, only referring to
a continuation page when the low "digit" of a count is incremented or
decremented through SWAP_MAP_MAX.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  22 +++-
 mm/memory.c          |  19 +++-
 mm/rmap.c            |   6 +-
 mm/swapfile.c        | 304 ++++++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 287 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f733deb10748..389e7bd92cca 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -145,15 +145,18 @@ enum {
 	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
+	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32
 
-#define SWAP_MAP_MAX	0x7e
-#define SWAP_MAP_BAD	0x7f
-#define SWAP_HAS_CACHE	0x80		/* There is a swap cache of entry. */
+#define SWAP_MAP_MAX	0x3e	/* Max duplication count, in first swap_map */
+#define SWAP_MAP_BAD	0x3f	/* Note pageblock is bad, in first swap_map */
+#define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
+#define SWAP_CONT_MAX	0x7f	/* Max count, in each swap_map continuation */
+#define COUNT_CONTINUED	0x80	/* See swap_map continuation for full count */
 
 /*
  * The in-memory structure used to track swap areas.
@@ -311,9 +314,10 @@ extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
-extern void swap_duplicate(swp_entry_t);
-extern int swapcache_prepare(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
+extern int add_swap_count_continuation(swp_entry_t, gfp_t);
+extern int swap_duplicate(swp_entry_t);
+extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
 extern void swapcache_free(swp_entry_t, struct page *page);
 extern int free_swap_and_cache(swp_entry_t);
@@ -385,8 +389,14 @@ static inline void show_swap_cache_info(void)
 #define free_swap_and_cache(swp)	is_migration_entry(swp)
 #define swapcache_prepare(swp)		is_migration_entry(swp)
 
-static inline void swap_duplicate(swp_entry_t swp)
+static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 {
+	return 0;
+}
+
+static inline int swap_duplicate(swp_entry_t swp)
+{
+	return 0;
 }
 
 static inline void swap_free(swp_entry_t swp)
diff --git a/mm/memory.c b/mm/memory.c
index 6ab19dd4a199..543c446bf4ed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -572,7 +572,7 @@ out:
  * covered by this vma.
  */
 
-static inline void
+static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
 		unsigned long addr, int *rss)
@@ -586,7 +586,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		if (!pte_file(pte)) {
 			swp_entry_t entry = pte_to_swp_entry(pte);
 
-			swap_duplicate(entry);
+			if (swap_duplicate(entry) < 0)
+				return entry.val;
+
 			/* make sure dst_mm is on swapoff's mmlist. */
 			if (unlikely(list_empty(&dst_mm->mmlist))) {
 				spin_lock(&mmlist_lock);
@@ -635,6 +637,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
+	return 0;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -646,6 +649,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
 	int rss[2];
+	swp_entry_t entry = (swp_entry_t){0};
 
 again:
 	rss[1] = rss[0] = 0;
@@ -674,7 +678,10 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+							vma, addr, rss);
+		if (entry.val)
+			break;
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -684,6 +691,12 @@ again:
 	add_mm_rss(dst_mm, rss[0], rss[1]);
 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
 	cond_resched();
+
+	if (entry.val) {
+		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
+			return -ENOMEM;
+		progress = 0;
+	}
 	if (addr != end)
 		goto again;
 	return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index dd43373a483f..710bb4b2adf1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -822,7 +822,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * Store the swap location in the pte.
 			 * See handle_pte_fault() ...
 			 */
-			swap_duplicate(entry);
+			if (swap_duplicate(entry) < 0) {
+				set_pte_at(mm, address, pte, pteval);
+				ret = SWAP_FAIL;
+				goto out_unmap;
+			}
 			if (list_empty(&mm->mmlist)) {
 				spin_lock(&mmlist_lock);
 				if (list_empty(&mm->mmlist))
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c0d7b9ed0c16..cc5e7ebf2d2c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -35,11 +35,14 @@
 #include <linux/swapops.h>
 #include <linux/page_cgroup.h>
 
+static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
+				 unsigned char);
+static void free_swap_count_continuations(struct swap_info_struct *);
+
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
 long nr_swap_pages;
 long total_swap_pages;
-static int swap_overflow;
 static int least_priority;
 
 static const char Bad_file[] = "Bad swap file entry ";
@@ -55,7 +58,7 @@ static DEFINE_MUTEX(swapon_mutex);
 
 static inline unsigned char swap_count(unsigned char ent)
 {
-	return ent & ~SWAP_HAS_CACHE;
+	return ent & ~SWAP_HAS_CACHE;	/* may include SWAP_HAS_CONT flag */
 }
 
 /* returns 1 if swap entry is freed */
@@ -545,8 +548,15 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 	if (usage == SWAP_HAS_CACHE) {
 		VM_BUG_ON(!has_cache);
 		has_cache = 0;
-	} else if (count < SWAP_MAP_MAX)
-		count--;
+	} else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
+		if (count == COUNT_CONTINUED) {
+			if (swap_count_continued(p, offset, count))
+				count = SWAP_MAP_MAX | COUNT_CONTINUED;
+			else
+				count = SWAP_MAP_MAX;
+		} else
+			count--;
+	}
 
 	if (!count)
 		mem_cgroup_uncharge_swap(entry);
@@ -604,6 +614,8 @@ void swapcache_free(swp_entry_t entry, struct page *page)
 
 /*
  * How many references to page are currently swapped out?
+ * This does not give an exact answer when swap count is continued,
+ * but does include the high COUNT_CONTINUED flag to allow for that.
  */
 static inline int page_swapcount(struct page *page)
 {
@@ -1019,7 +1031,6 @@ static int try_to_unuse(unsigned int type)
 	swp_entry_t entry;
 	unsigned int i = 0;
 	int retval = 0;
-	int reset_overflow = 0;
 	int shmem;
 
 	/*
@@ -1034,8 +1045,7 @@ static int try_to_unuse(unsigned int type)
 	 * together, child after parent.  If we race with dup_mmap(), we
 	 * prefer to resolve parent before child, lest we miss entries
 	 * duplicated after we scanned child: using last mm would invert
-	 * that.  Though it's only a serious concern when an overflowed
-	 * swap count is reset from SWAP_MAP_MAX, preventing a rescan.
+	 * that.
 	 */
 	start_mm = &init_mm;
 	atomic_inc(&init_mm.mm_users);
@@ -1164,36 +1174,6 @@ static int try_to_unuse(unsigned int type)
 			break;
 		}
 
-		/*
-		 * How could swap count reach 0x7ffe ?
-		 * There's no way to repeat a swap page within an mm
-		 * (except in shmem, where it's the shared object which takes
-		 * the reference count)?
-		 * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned
-		 * short is too small....)
-		 * If that's wrong, then we should worry more about
-		 * exit_mmap() and do_munmap() cases described above:
-		 * we might be resetting SWAP_MAP_MAX too early here.
-		 *
-		 * Yes, that's wrong: though very unlikely, swap count 0x7ffe
-		 * could surely occur if pid_max raised from PID_MAX_DEFAULT;
-		 * and we are now lowering SWAP_MAP_MAX to 0x7e, making it
-		 * much easier to reach.  But the next patch will fix that.
-		 *
-		 * We know "Undead"s can happen, they're okay, so don't
-		 * report them; but do report if we reset SWAP_MAP_MAX.
-		 */
-		/* We might release the lock_page() in unuse_mm(). */
-		if (!PageSwapCache(page) || page_private(page) != entry.val)
-			goto retry;
-
-		if (swap_count(*swap_map) == SWAP_MAP_MAX) {
-			spin_lock(&swap_lock);
-			*swap_map = SWAP_HAS_CACHE;
-			spin_unlock(&swap_lock);
-			reset_overflow = 1;
-		}
-
 		/*
 		 * If a reference remains (rare), we would like to leave
 		 * the page in the swap cache; but try_to_unmap could
@@ -1235,7 +1215,6 @@ static int try_to_unuse(unsigned int type)
 		 * mark page dirty so shrink_page_list will preserve it.
 		 */
 		SetPageDirty(page);
-retry:
 		unlock_page(page);
 		page_cache_release(page);
 
@@ -1247,10 +1226,6 @@ retry:
 	}
 
 	mmput(start_mm);
-	if (reset_overflow) {
-		printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
-		swap_overflow = 0;
-	}
 	return retval;
 }
 
@@ -1593,6 +1568,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	up_write(&swap_unplug_sem);
 
 	destroy_swap_extents(p);
+	if (p->flags & SWP_CONTINUED)
+		free_swap_count_continuations(p);
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	drain_mmlist();
@@ -2079,14 +2057,13 @@ void si_swapinfo(struct sysinfo *val)
 /*
  * Verify that a swap entry is valid and increment its swap map count.
  *
- * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
- * "permanent", but will be reclaimed by the next swapoff.
  * Returns error code in following case.
  * - success -> 0
  * - swp_entry is invalid -> EINVAL
  * - swp_entry is migration entry -> EINVAL
  * - swap-cache reference is requested but there is already one. -> EEXIST
  * - swap-cache reference is requested but the entry is not used. -> ENOENT
+ * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
  */
 static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 {
@@ -2126,15 +2103,14 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 
 	} else if (count || has_cache) {
 
-		if (count < SWAP_MAP_MAX - 1)
-			count++;
-		else if (count <= SWAP_MAP_MAX) {
-			if (swap_overflow++ < 5)
-				printk(KERN_WARNING
-				       "swap_dup: swap entry overflow\n");
-			count = SWAP_MAP_MAX;
-		} else
+		if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
+			count += usage;
+		else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
 			err = -EINVAL;
+		else if (swap_count_continued(p, offset, count))
+			count = COUNT_CONTINUED;
+		else
+			err = -ENOMEM;
 	} else
 		err = -ENOENT;			/* unused swap entry */
 
@@ -2153,9 +2129,13 @@ bad_file:
 /*
  * increase reference count of swap entry by 1.
  */
-void swap_duplicate(swp_entry_t entry)
+int swap_duplicate(swp_entry_t entry)
 {
-	__swap_duplicate(entry, 1);
+	int err = 0;
+
+	while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
+		err = add_swap_count_continuation(entry, GFP_ATOMIC);
+	return err;
 }
 
 /*
@@ -2222,3 +2202,219 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 	*offset = ++toff;
 	return nr_pages? ++nr_pages: 0;
 }
+
+/*
+ * add_swap_count_continuation - called when a swap count is duplicated
+ * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
+ * page of the original vmalloc'ed swap_map, to hold the continuation count
+ * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
+ * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
+ *
+ * These continuation pages are seldom referenced: the common paths all work
+ * on the original swap_map, only referring to a continuation page when the
+ * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
+ *
+ * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
+ * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
+ * can be called after dropping locks.
+ */
+int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
+{
+	struct swap_info_struct *si;
+	struct page *head;
+	struct page *page;
+	struct page *list_page;
+	pgoff_t offset;
+	unsigned char count;
+
+	/*
+	 * When debugging, it's easier to use __GFP_ZERO here; but it's better
+	 * for latency not to zero a page while GFP_ATOMIC and holding locks.
+	 */
+	page = alloc_page(gfp_mask | __GFP_HIGHMEM);
+
+	si = swap_info_get(entry);
+	if (!si) {
+		/*
+		 * An acceptable race has occurred since the failing
+		 * __swap_duplicate(): the swap entry has been freed,
+		 * perhaps even the whole swap_map cleared for swapoff.
+		 */
+		goto outer;
+	}
+
+	offset = swp_offset(entry);
+	count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
+
+	if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
+		/*
+		 * The higher the swap count, the more likely it is that tasks
+		 * will race to add swap count continuation: we need to avoid
+		 * over-provisioning.
+		 */
+		goto out;
+	}
+
+	if (!page) {
+		spin_unlock(&swap_lock);
+		return -ENOMEM;
+	}
+
+	/*
+	 * We are fortunate that although vmalloc_to_page uses pte_offset_map,
+	 * no architecture is using highmem pages for kernel pagetables: so it
+	 * will not corrupt the GFP_ATOMIC caller's atomic pagetable kmaps.
+	 */
+	head = vmalloc_to_page(si->swap_map + offset);
+	offset &= ~PAGE_MASK;
+
+	/*
+	 * Page allocation does not initialize the page's lru field,
+	 * but it does always reset its private field.
+	 */
+	if (!page_private(head)) {
+		BUG_ON(count & COUNT_CONTINUED);
+		INIT_LIST_HEAD(&head->lru);
+		set_page_private(head, SWP_CONTINUED);
+		si->flags |= SWP_CONTINUED;
+	}
+
+	list_for_each_entry(list_page, &head->lru, lru) {
+		unsigned char *map;
+
+		/*
+		 * If the previous map said no continuation, but we've found
+		 * a continuation page, free our allocation and use this one.
+		 */
+		if (!(count & COUNT_CONTINUED))
+			goto out;
+
+		map = kmap_atomic(list_page, KM_USER0) + offset;
+		count = *map;
+		kunmap_atomic(map, KM_USER0);
+
+		/*
+		 * If this continuation count now has some space in it,
+		 * free our allocation and use this one.
+		 */
+		if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
+			goto out;
+	}
+
+	list_add_tail(&page->lru, &head->lru);
+	page = NULL;			/* now it's attached, don't free it */
+out:
+	spin_unlock(&swap_lock);
+outer:
+	if (page)
+		__free_page(page);
+	return 0;
+}
+
+/*
+ * swap_count_continued - when the original swap_map count is incremented
+ * from SWAP_MAP_MAX, check if there is already a continuation page to carry
+ * into, carry if so, or else fail until a new continuation page is allocated;
+ * when the original swap_map count is decremented from 0 with continuation,
+ * borrow from the continuation and report whether it still holds more.
+ * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
+ */
+static bool swap_count_continued(struct swap_info_struct *si,
+				 pgoff_t offset, unsigned char count)
+{
+	struct page *head;
+	struct page *page;
+	unsigned char *map;
+
+	head = vmalloc_to_page(si->swap_map + offset);
+	if (page_private(head) != SWP_CONTINUED) {
+		BUG_ON(count & COUNT_CONTINUED);
+		return false;		/* need to add count continuation */
+	}
+
+	offset &= ~PAGE_MASK;
+	page = list_entry(head->lru.next, struct page, lru);
+	map = kmap_atomic(page, KM_USER0) + offset;
+
+	if (count == SWAP_MAP_MAX)	/* initial increment from swap_map */
+		goto init_map;		/* jump over SWAP_CONT_MAX checks */
+
+	if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
+		/*
+		 * Think of how you add 1 to 999
+		 */
+		while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
+			kunmap_atomic(map, KM_USER0);
+			page = list_entry(page->lru.next, struct page, lru);
+			BUG_ON(page == head);
+			map = kmap_atomic(page, KM_USER0) + offset;
+		}
+		if (*map == SWAP_CONT_MAX) {
+			kunmap_atomic(map, KM_USER0);
+			page = list_entry(page->lru.next, struct page, lru);
+			if (page == head)
+				return false;	/* add count continuation */
+			map = kmap_atomic(page, KM_USER0) + offset;
+init_map:		*map = 0;		/* we didn't zero the page */
+		}
+		*map += 1;
+		kunmap_atomic(map, KM_USER0);
+		page = list_entry(page->lru.prev, struct page, lru);
+		while (page != head) {
+			map = kmap_atomic(page, KM_USER0) + offset;
+			*map = COUNT_CONTINUED;
+			kunmap_atomic(map, KM_USER0);
+			page = list_entry(page->lru.prev, struct page, lru);
+		}
+		return true;			/* incremented */
+
+	} else {				/* decrementing */
+		/*
+		 * Think of how you subtract 1 from 1000
+		 */
+		BUG_ON(count != COUNT_CONTINUED);
+		while (*map == COUNT_CONTINUED) {
+			kunmap_atomic(map, KM_USER0);
+			page = list_entry(page->lru.next, struct page, lru);
+			BUG_ON(page == head);
+			map = kmap_atomic(page, KM_USER0) + offset;
+		}
+		BUG_ON(*map == 0);
+		*map -= 1;
+		if (*map == 0)
+			count = 0;
+		kunmap_atomic(map, KM_USER0);
+		page = list_entry(page->lru.prev, struct page, lru);
+		while (page != head) {
+			map = kmap_atomic(page, KM_USER0) + offset;
+			*map = SWAP_CONT_MAX | count;
+			count = COUNT_CONTINUED;
+			kunmap_atomic(map, KM_USER0);
+			page = list_entry(page->lru.prev, struct page, lru);
+		}
+		return count == COUNT_CONTINUED;
+	}
+}
+
+/*
+ * free_swap_count_continuations - swapoff free all the continuation pages
+ * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
+ */
+static void free_swap_count_continuations(struct swap_info_struct *si)
+{
+	pgoff_t offset;
+
+	for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
+		struct page *head;
+		head = vmalloc_to_page(si->swap_map + offset);
+		if (page_private(head)) {
+			struct list_head *this, *next;
+			list_for_each_safe(this, next, &head->lru) {
+				struct page *page;
+				page = list_entry(this, struct page, lru);
+				list_del(this);
+				__free_page(page);
+			}
+		}
+	}
+}
-- 
cgit v1.2.3


From aaa468653b4a0d11c603c48d716f765177a5a9e4 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:47 -0800
Subject: swap_info: note SWAP_MAP_SHMEM

While we're fiddling with the swap_map values, let's assign a particular
value to shmem/tmpfs swap pages: their swap counts are never incremented,
and it helps swapoff's try_to_unuse() a little if it can immediately
distinguish those pages from process pages.

Since we've no use for SWAP_MAP_BAD | COUNT_CONTINUED,
we might as well use that 0xbf value for SWAP_MAP_SHMEM.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  6 ++++++
 mm/shmem.c           | 11 +++++++++--
 mm/swapfile.c        | 47 +++++++++++++++++++++++++++--------------------
 3 files changed, 42 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 389e7bd92cca..ac43d87b89b0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -157,6 +157,7 @@ enum {
 #define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
 #define SWAP_CONT_MAX	0x7f	/* Max count, in each swap_map continuation */
 #define COUNT_CONTINUED	0x80	/* See swap_map continuation for full count */
+#define SWAP_MAP_SHMEM	0xbf	/* Owned by shmem/tmpfs, in first swap_map */
 
 /*
  * The in-memory structure used to track swap areas.
@@ -316,6 +317,7 @@ extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
+extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
@@ -394,6 +396,10 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 	return 0;
 }
 
+static inline void swap_shmem_alloc(swp_entry_t swp)
+{
+}
+
 static inline int swap_duplicate(swp_entry_t swp)
 {
 	return 0;
diff --git a/mm/shmem.c b/mm/shmem.c
index 356dd99566ec..4fb41c83daca 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1017,7 +1017,14 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 			goto out;
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
-out:	return found;	/* 0 or 1 or -ENOMEM */
+	/*
+	 * Can some race bring us here?  We've been holding page lock,
+	 * so I think not; but would rather try again later than BUG()
+	 */
+	unlock_page(page);
+	page_cache_release(page);
+out:
+	return (found < 0) ? found : 0;
 }
 
 /*
@@ -1080,7 +1087,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		else
 			inode = NULL;
 		spin_unlock(&info->lock);
-		swap_duplicate(swap);
+		swap_shmem_alloc(swap);
 		BUG_ON(page_mapped(page));
 		page_cache_release(page);	/* pagecache ref */
 		swap_writepage(page, wbc);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index cc5e7ebf2d2c..58bec6600167 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -548,6 +548,12 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 	if (usage == SWAP_HAS_CACHE) {
 		VM_BUG_ON(!has_cache);
 		has_cache = 0;
+	} else if (count == SWAP_MAP_SHMEM) {
+		/*
+		 * Or we could insist on shmem.c using a special
+		 * swap_shmem_free() and free_shmem_swap_and_cache()...
+		 */
+		count = 0;
 	} else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
 		if (count == COUNT_CONTINUED) {
 			if (swap_count_continued(p, offset, count))
@@ -1031,7 +1037,6 @@ static int try_to_unuse(unsigned int type)
 	swp_entry_t entry;
 	unsigned int i = 0;
 	int retval = 0;
-	int shmem;
 
 	/*
 	 * When searching mms for an entry, a good strategy is to
@@ -1107,17 +1112,18 @@ static int try_to_unuse(unsigned int type)
 
 		/*
 		 * Remove all references to entry.
-		 * Whenever we reach init_mm, there's no address space
-		 * to search, but use it as a reminder to search shmem.
 		 */
-		shmem = 0;
 		swcount = *swap_map;
-		if (swap_count(swcount)) {
-			if (start_mm == &init_mm)
-				shmem = shmem_unuse(entry, page);
-			else
-				retval = unuse_mm(start_mm, entry, page);
+		if (swap_count(swcount) == SWAP_MAP_SHMEM) {
+			retval = shmem_unuse(entry, page);
+			/* page has already been unlocked and released */
+			if (retval < 0)
+				break;
+			continue;
 		}
+		if (swap_count(swcount) && start_mm != &init_mm)
+			retval = unuse_mm(start_mm, entry, page);
+
 		if (swap_count(*swap_map)) {
 			int set_start_mm = (*swap_map >= swcount);
 			struct list_head *p = &start_mm->mmlist;
@@ -1128,7 +1134,7 @@ static int try_to_unuse(unsigned int type)
 			atomic_inc(&new_start_mm->mm_users);
 			atomic_inc(&prev_mm->mm_users);
 			spin_lock(&mmlist_lock);
-			while (swap_count(*swap_map) && !retval && !shmem &&
+			while (swap_count(*swap_map) && !retval &&
 					(p = p->next) != &start_mm->mmlist) {
 				mm = list_entry(p, struct mm_struct, mmlist);
 				if (!atomic_inc_not_zero(&mm->mm_users))
@@ -1142,10 +1148,9 @@ static int try_to_unuse(unsigned int type)
 				swcount = *swap_map;
 				if (!swap_count(swcount)) /* any usage ? */
 					;
-				else if (mm == &init_mm) {
+				else if (mm == &init_mm)
 					set_start_mm = 1;
-					shmem = shmem_unuse(entry, page);
-				} else
+				else
 					retval = unuse_mm(mm, entry, page);
 
 				if (set_start_mm && *swap_map < swcount) {
@@ -1161,13 +1166,6 @@ static int try_to_unuse(unsigned int type)
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
-		if (shmem) {
-			/* page has already been unlocked and released */
-			if (shmem > 0)
-				continue;
-			retval = shmem;
-			break;
-		}
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -2126,6 +2124,15 @@ bad_file:
 	goto out;
 }
 
+/*
+ * Help swapoff by noting that swap entry belongs to shmem/tmpfs
+ * (in which case its reference count is never incremented).
+ */
+void swap_shmem_alloc(swp_entry_t entry)
+{
+	__swap_duplicate(entry, SWAP_MAP_SHMEM);
+}
+
 /*
  * increase reference count of swap entry by 1.
  */
-- 
cgit v1.2.3


From 7509765a29cfb1a4c506c09b304aaf3b4111c653 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:48 -0800
Subject: swap_info: reorder its fields

Reorder (and comment) the fields of swap_info_struct, to make better
use of its cachelines: it's good for swap_duplicate() in particular
if unsigned int max and swap_map are near the start.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ac43d87b89b0..9f0ca325e30d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -167,21 +167,21 @@ struct swap_info_struct {
 	signed short	prio;		/* swap priority of this type */
 	signed char	type;		/* strange name for an index */
 	signed char	next;		/* next type on the swap list */
-	struct file *swap_file;
-	struct block_device *bdev;
-	struct swap_extent first_swap_extent;
-	struct swap_extent *curr_swap_extent;
-	unsigned char *swap_map;
-	unsigned int lowest_bit;
-	unsigned int highest_bit;
+	unsigned int	max;		/* extent of the swap_map */
+	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
+	unsigned int lowest_bit;	/* index of first free in swap_map */
+	unsigned int highest_bit;	/* index of last free in swap_map */
+	unsigned int pages;		/* total of usable pages of swap */
+	unsigned int inuse_pages;	/* number of those currently in use */
+	unsigned int cluster_next;	/* likely index for next allocation */
+	unsigned int cluster_nr;	/* countdown to next cluster search */
 	unsigned int lowest_alloc;	/* while preparing discard cluster */
 	unsigned int highest_alloc;	/* while preparing discard cluster */
-	unsigned int cluster_next;
-	unsigned int cluster_nr;
-	unsigned int pages;
-	unsigned int max;
-	unsigned int inuse_pages;
-	unsigned int old_block_size;
+	struct swap_extent *curr_swap_extent;
+	struct swap_extent first_swap_extent;
+	struct block_device *bdev;	/* swap device or bdev of swap file */
+	struct file *swap_file;		/* seldom referenced */
+	unsigned int old_block_size;	/* seldom referenced */
 };
 
 struct swap_list_t {
-- 
cgit v1.2.3


From d4906e1aa516cc965292b43b5a26122dd4344e7e Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Mon, 14 Dec 2009 17:58:49 -0800
Subject: swap: rework map_swap_page() again

Seems that page_io.c doesn't really need to know that page_private(page)
is the swp_entry 'val'.  Rework map_swap_page() to do what its name says
and map a page to a page offset in the swap space.

The only other caller of map_swap_page() is internal to mm/swapfile.c and
it does want to map a swap entry to the 'sector'.  So rename
map_swap_page() to map_swap_entry(), make it 'static' and and implement
map_swap_page() as a wrapper around that.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 +-
 mm/page_io.c         |  4 +---
 mm/swapfile.c        | 20 ++++++++++++++++----
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 9f0ca325e30d..a2602a8207a6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -325,7 +325,7 @@ extern void swapcache_free(swp_entry_t, struct page *page);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
-extern sector_t map_swap_page(swp_entry_t, struct block_device **);
+extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
diff --git a/mm/page_io.c b/mm/page_io.c
index afeed89a0a5d..a19af956ee1b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -26,9 +26,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
 
 	bio = bio_alloc(gfp_flags, 1);
 	if (bio) {
-		swp_entry_t entry;
-		entry.val = page_private(page);
-		bio->bi_sector = map_swap_page(entry, &bio->bi_bdev);
+		bio->bi_sector = map_swap_page(page, &bio->bi_bdev);
 		bio->bi_sector <<= PAGE_SHIFT - 9;
 		bio->bi_io_vec[0].bv_page = page;
 		bio->bi_io_vec[0].bv_len = PAGE_SIZE;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 58bec6600167..d5eb2e85600b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -38,6 +38,7 @@
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
 static void free_swap_count_continuations(struct swap_info_struct *);
+static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -782,7 +783,7 @@ sector_t swapdev_block(int type, pgoff_t offset)
 		return 0;
 	if (!(swap_info[type]->flags & SWP_WRITEOK))
 		return 0;
-	return map_swap_page(swp_entry(type, offset), &bdev);
+	return map_swap_entry(swp_entry(type, offset), &bdev);
 }
 
 /*
@@ -1249,10 +1250,11 @@ static void drain_mmlist(void)
 
 /*
  * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
- * corresponds to page offset `offset'.  Note that the type of this function
- * is sector_t, but it returns page offset into the bdev, not sector offset.
+ * corresponds to page offset for the specified swap entry.
+ * Note that the type of this function is sector_t, but it returns page offset
+ * into the bdev, not sector offset.
  */
-sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
+static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
 {
 	struct swap_info_struct *sis;
 	struct swap_extent *start_se;
@@ -1280,6 +1282,16 @@ sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
 	}
 }
 
+/*
+ * Returns the page offset into bdev for the specified page's swap entry.
+ */
+sector_t map_swap_page(struct page *page, struct block_device **bdev)
+{
+	swp_entry_t entry;
+	entry.val = page_private(page);
+	return map_swap_entry(entry, bdev);
+}
+
 /*
  * Free all of a swapdev's extent information
  */
-- 
cgit v1.2.3


From f50de2d3811081957156b5d736778799379c29de Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 14 Dec 2009 17:58:53 -0800
Subject: vmscan: have kswapd sleep for a short interval and double check it
 should be asleep

After kswapd balances all zones in a pgdat, it goes to sleep.  In the
event of no IO congestion, kswapd can go to sleep very shortly after the
high watermark was reached.  If there are a constant stream of allocations
from parallel processes, it can mean that kswapd went to sleep too quickly
and the high watermark is not being maintained for sufficient length time.

This patch makes kswapd go to sleep as a two-stage process.  It first
tries to sleep for HZ/10.  If it is woken up by another process or the
high watermark is no longer met, it's considered a premature sleep and
kswapd continues work.  Otherwise it goes fully to sleep.

This adds more counters to distinguish between fast and slow breaches of
watermarks.  A "fast" premature sleep is one where the low watermark was
hit in a very short time after kswapd going to sleep.  A "slow" premature
sleep indicates that the high watermark was breached after a very short
interval.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Frans Pop <elendil@planet.nl>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  1 +
 mm/vmscan.c            | 44 ++++++++++++++++++++++++++++++++++++++++++--
 mm/vmstat.c            |  2 ++
 3 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d85889710f9b..fd5be240c0b7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -40,6 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGSCAN_ZONE_RECLAIM_FAILED,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+		KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 61d3a9a0d96f..e176bd3936da 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1904,6 +1904,24 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 }
 #endif
 
+/* is kswapd sleeping prematurely? */
+static int sleeping_prematurely(int order, long remaining)
+{
+	struct zone *zone;
+
+	/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
+	if (remaining)
+		return 1;
+
+	/* If after HZ/10, a zone is below the high mark, it's premature */
+	for_each_populated_zone(zone)
+		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
+								0, 0))
+			return 1;
+
+	return 0;
+}
+
 /*
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
@@ -2185,8 +2203,30 @@ static int kswapd(void *p)
 			 */
 			order = new_order;
 		} else {
-			if (!freezing(current) && !kthread_should_stop())
-				schedule();
+			if (!freezing(current) && !kthread_should_stop()) {
+				long remaining = 0;
+
+				/* Try to sleep for a short interval */
+				if (!sleeping_prematurely(order, remaining)) {
+					remaining = schedule_timeout(HZ/10);
+					finish_wait(&pgdat->kswapd_wait, &wait);
+					prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+				}
+
+				/*
+				 * After a short sleep, check if it was a
+				 * premature sleep. If not, then go fully
+				 * to sleep until explicitly woken up
+				 */
+				if (!sleeping_prematurely(order, remaining))
+					schedule();
+				else {
+					if (remaining)
+						count_vm_event(KSWAPD_PREMATURE_FAST);
+					else
+						count_vm_event(KSWAPD_PREMATURE_SLOW);
+				}
+			}
 
 			order = pgdat->kswapd_max_order;
 		}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dad2327e4580..63ab71455c5b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -683,6 +683,8 @@ static const char * const vmstat_text[] = {
 	"slabs_scanned",
 	"kswapd_steal",
 	"kswapd_inodesteal",
+	"kswapd_slept_prematurely_fast",
+	"kswapd_slept_prematurely_slow",
 	"pageoutrun",
 	"allocstall",
 
-- 
cgit v1.2.3


From bb3ab596832b920c703d1aea1ce76d69c0f71fb7 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 14 Dec 2009 17:58:55 -0800
Subject: vmscan: stop kswapd waiting on congestion when the min watermark is
 not being met

If reclaim fails to make sufficient progress, the priority is raised.
Once the priority is higher, kswapd starts waiting on congestion.
However, if the zone is below the min watermark then kswapd needs to
continue working without delay as there is a danger of an increased rate
of GFP_ATOMIC allocation failure.

This patch changes the conditions under which kswapd waits on congestion
by only going to sleep if the min watermarks are being met.

[mel@csn.ul.ie: add stats to track how relevant the logic is]
[mel@csn.ul.ie: make kswapd only check its own zones and rename the relevant counters]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  3 ++-
 mm/vmscan.c            | 38 +++++++++++++++++++++++++++++---------
 mm/vmstat.c            |  5 +++--
 3 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fd5be240c0b7..ee03bba9c5df 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -40,7 +40,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGSCAN_ZONE_RECLAIM_FAILED,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
-		KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW,
+		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
+		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e176bd3936da..cb69f717799f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1905,19 +1905,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 #endif
 
 /* is kswapd sleeping prematurely? */
-static int sleeping_prematurely(int order, long remaining)
+static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 {
-	struct zone *zone;
+	int i;
 
 	/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
 	if (remaining)
 		return 1;
 
 	/* If after HZ/10, a zone is below the high mark, it's premature */
-	for_each_populated_zone(zone)
+	for (i = 0; i < pgdat->nr_zones; i++) {
+		struct zone *zone = pgdat->node_zones + i;
+
+		if (!populated_zone(zone))
+			continue;
+
 		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
 								0, 0))
 			return 1;
+	}
 
 	return 0;
 }
@@ -1979,6 +1985,7 @@ loop_again:
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
+		int has_under_min_watermark_zone = 0;
 
 		/* The swap token gets in the way of swapout... */
 		if (!priority)
@@ -2085,6 +2092,15 @@ loop_again:
 			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
 			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
+
+			/*
+			 * We are still under min water mark. it mean we have
+			 * GFP_ATOMIC allocation failure risk. Hurry up!
+			 */
+			if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
+					      end_zone, 0))
+				has_under_min_watermark_zone = 1;
+
 		}
 		if (all_zones_ok)
 			break;		/* kswapd: all done */
@@ -2092,8 +2108,12 @@ loop_again:
 		 * OK, kswapd is getting into trouble.  Take a nap, then take
 		 * another pass across the zones.
 		 */
-		if (total_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
+		if (total_scanned && (priority < DEF_PRIORITY - 2)) {
+			if (has_under_min_watermark_zone)
+				count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
+			else
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
+		}
 
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
@@ -2207,7 +2227,7 @@ static int kswapd(void *p)
 				long remaining = 0;
 
 				/* Try to sleep for a short interval */
-				if (!sleeping_prematurely(order, remaining)) {
+				if (!sleeping_prematurely(pgdat, order, remaining)) {
 					remaining = schedule_timeout(HZ/10);
 					finish_wait(&pgdat->kswapd_wait, &wait);
 					prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2218,13 +2238,13 @@ static int kswapd(void *p)
 				 * premature sleep. If not, then go fully
 				 * to sleep until explicitly woken up
 				 */
-				if (!sleeping_prematurely(order, remaining))
+				if (!sleeping_prematurely(pgdat, order, remaining))
 					schedule();
 				else {
 					if (remaining)
-						count_vm_event(KSWAPD_PREMATURE_FAST);
+						count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
 					else
-						count_vm_event(KSWAPD_PREMATURE_SLOW);
+						count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
 				}
 			}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 63ab71455c5b..6051fbab67ba 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -683,8 +683,9 @@ static const char * const vmstat_text[] = {
 	"slabs_scanned",
 	"kswapd_steal",
 	"kswapd_inodesteal",
-	"kswapd_slept_prematurely_fast",
-	"kswapd_slept_prematurely_slow",
+	"kswapd_low_wmark_hit_quickly",
+	"kswapd_high_wmark_hit_quickly",
+	"kswapd_skip_congestion_wait",
 	"pageoutrun",
 	"allocstall",
 
-- 
cgit v1.2.3


From 3ca7b3c5b64d35fe02c35b5d44c2c58b49499fee Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:57 -0800
Subject: mm: define PAGE_MAPPING_FLAGS

At present we define PageAnon(page) by the low PAGE_MAPPING_ANON bit set
in page->mapping, with the higher bits a pointer to the anon_vma; and have
defined PageKsm(page) as that with NULL anon_vma.

But KSM swapping will need to store a pointer there: so in preparation for
that, now define PAGE_MAPPING_FLAGS as the low two bits, including
PAGE_MAPPING_KSM (always set along with PAGE_MAPPING_ANON, until some
other use for the bit emerges).

Declare page_rmapping(page) to return the pointer part of page->mapping,
and page_anon_vma(page) to return the anon_vma pointer when that's what it
is.  Use these in a few appropriate places: notably, unuse_vma() has been
testing page->mapping, but is better to be testing page_anon_vma() (cases
may be added in which flag bits are set without any pointer).

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h  |  5 +++--
 include/linux/mm.h   | 17 ++++++++++++++++-
 include/linux/rmap.h |  8 ++++++++
 mm/migrate.c         | 11 ++++-------
 mm/rmap.c            |  7 +++----
 mm/swapfile.c        |  2 +-
 6 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index a485c14ecd5d..1401a313fa77 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -38,7 +38,8 @@ static inline void ksm_exit(struct mm_struct *mm)
  */
 static inline int PageKsm(struct page *page)
 {
-	return ((unsigned long)page->mapping == PAGE_MAPPING_ANON);
+	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
 }
 
 /*
@@ -47,7 +48,7 @@ static inline int PageKsm(struct page *page)
 static inline void page_add_ksm_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount)) {
-		page->mapping = (void *) PAGE_MAPPING_ANON;
+		page->mapping = (void *) (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
 		__inc_zone_page_state(page, NR_ANON_PAGES);
 	}
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c395694f4d..1202cd3121e1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -620,13 +620,22 @@ void page_address_init(void);
 /*
  * On an anonymous page mapped into a user virtual memory area,
  * page->mapping points to its anon_vma, not to a struct address_space;
- * with the PAGE_MAPPING_ANON bit set to distinguish it.
+ * with the PAGE_MAPPING_ANON bit set to distinguish it.  See rmap.h.
+ *
+ * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
+ * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;
+ * and then page->mapping points, not to an anon_vma, but to a private
+ * structure which KSM associates with that merged page.  See ksm.h.
+ *
+ * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.
  *
  * Please note that, confusingly, "page_mapping" refers to the inode
  * address_space which maps the page from disk; whereas "page_mapped"
  * refers to user virtual address space into which the page is mapped.
  */
 #define PAGE_MAPPING_ANON	1
+#define PAGE_MAPPING_KSM	2
+#define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
 
 extern struct address_space swapper_space;
 static inline struct address_space *page_mapping(struct page *page)
@@ -644,6 +653,12 @@ static inline struct address_space *page_mapping(struct page *page)
 	return mapping;
 }
 
+/* Neutral page->mapping pointer to address_space or anon_vma or other */
+static inline void *page_rmapping(struct page *page)
+{
+	return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
+}
+
 static inline int PageAnon(struct page *page)
 {
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index cb0ba7032609..1f65af44c6d2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -39,6 +39,14 @@ struct anon_vma {
 
 #ifdef CONFIG_MMU
 
+static inline struct anon_vma *page_anon_vma(struct page *page)
+{
+	if (((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) !=
+					    PAGE_MAPPING_ANON)
+		return NULL;
+	return page_rmapping(page);
+}
+
 static inline void anon_vma_lock(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
diff --git a/mm/migrate.c b/mm/migrate.c
index 576c25eeb1ca..367272d04423 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -172,17 +172,14 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
-	unsigned long mapping;
-
-	mapping = (unsigned long)new->mapping;
-
-	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
-		return;
 
 	/*
 	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
 	 */
-	anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
+	anon_vma = page_anon_vma(new);
+	if (!anon_vma)
+		return;
+
 	spin_lock(&anon_vma->lock);
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
diff --git a/mm/rmap.c b/mm/rmap.c
index 1a0ee6e634c2..f06cee48eca7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -203,7 +203,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	rcu_read_lock();
 	anon_mapping = (unsigned long) page->mapping;
-	if (!(anon_mapping & PAGE_MAPPING_ANON))
+	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
 		goto out;
 	if (!page_mapped(page))
 		goto out;
@@ -248,8 +248,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 {
 	if (PageAnon(page)) {
-		if ((void *)vma->anon_vma !=
-		    (void *)page->mapping - PAGE_MAPPING_ANON)
+		if (vma->anon_vma != page_anon_vma(page))
 			return -EFAULT;
 	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
 		if (!vma->vm_file ||
@@ -513,7 +512,7 @@ int page_referenced(struct page *page,
 		referenced++;
 
 	*vm_flags = 0;
-	if (page_mapped(page) && page->mapping) {
+	if (page_mapped(page) && page_rmapping(page)) {
 		if (PageAnon(page))
 			referenced += page_referenced_anon(page, mem_cont,
 								vm_flags);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d5eb2e85600b..e74112e8e5f4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -938,7 +938,7 @@ static int unuse_vma(struct vm_area_struct *vma,
 	unsigned long addr, end, next;
 	int ret;
 
-	if (page->mapping) {
+	if (page_anon_vma(page)) {
 		addr = page_address_in_vma(page, vma);
 		if (addr == -EFAULT)
 			return 0;
-- 
cgit v1.2.3


From af8e3354b4bbd1ee5a3a55d11a5e1fe37e77f0ba Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:58:59 -0800
Subject: mm: CONFIG_MMU for PG_mlocked

Remove three degrees of obfuscation, left over from when we had
CONFIG_UNEVICTABLE_LRU.  MLOCK_PAGES is CONFIG_HAVE_MLOCKED_PAGE_BIT is
CONFIG_HAVE_MLOCK is CONFIG_MMU.  rmap.o (and memory-failure.o) are only
built when CONFIG_MMU, so don't need such conditions at all.

Somehow, I feel no compulsion to remove the CONFIG_HAVE_MLOCK* lines from
169 defconfigs: leave those to evolve in due course.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  8 +++-----
 mm/Kconfig                 |  8 --------
 mm/internal.h              | 26 ++++++++++++--------------
 mm/memory-failure.c        |  2 --
 mm/page_alloc.c            |  4 ----
 mm/rmap.c                  | 15 ++++-----------
 6 files changed, 19 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b173955..49e907bd067f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -99,7 +99,7 @@ enum pageflags {
 	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 	PG_unevictable,		/* Page is "unevictable"  */
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+#ifdef CONFIG_MMU
 	PG_mlocked,		/* Page is vma mlocked */
 #endif
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
@@ -259,12 +259,10 @@ PAGEFLAG_FALSE(SwapCache)
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 	TESTCLEARFLAG(Unevictable, unevictable)
 
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
-#define MLOCK_PAGES 1
+#ifdef CONFIG_MMU
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
 	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
-#define MLOCK_PAGES 0
 PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
 	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
@@ -393,7 +391,7 @@ static inline void __ClearPageTail(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+#ifdef CONFIG_MMU
 #define __PG_MLOCKED		(1 << PG_mlocked)
 #else
 #define __PG_MLOCKED		0
diff --git a/mm/Kconfig b/mm/Kconfig
index 44cf6f0a3a6d..77b4980d6143 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -200,14 +200,6 @@ config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
 
-config HAVE_MLOCK
-	bool
-	default y if MMU=y
-
-config HAVE_MLOCKED_PAGE_BIT
-	bool
-	default y if HAVE_MLOCK=y
-
 config MMU_NOTIFIER
 	bool
 
diff --git a/mm/internal.h b/mm/internal.h
index 22ec8d2b0fb8..cb7d92d0a46d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -63,17 +63,6 @@ static inline unsigned long page_order(struct page *page)
 	return page_private(page);
 }
 
-#ifdef CONFIG_HAVE_MLOCK
-extern long mlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end);
-extern void munlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end);
-static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
-{
-	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
-}
-#endif
-
 /*
  * unevictable_migrate_page() called only from migrate_page_copy() to
  * migrate unevictable flag to new page.
@@ -86,7 +75,16 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old)
 		SetPageUnevictable(new);
 }
 
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+#ifdef CONFIG_MMU
+extern long mlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+extern void munlock_vma_pages_range(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
+
 /*
  * Called only in fault path via page_evictable() for a new page
  * to determine if it's being mapped into a LOCKED vma.
@@ -144,7 +142,7 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 	}
 }
 
-#else /* CONFIG_HAVE_MLOCKED_PAGE_BIT */
+#else /* !CONFIG_MMU */
 static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
 {
 	return 0;
@@ -153,7 +151,7 @@ static inline void clear_page_mlock(struct page *page) { }
 static inline void mlock_vma_page(struct page *page) { }
 static inline void mlock_migrate_page(struct page *new, struct page *old) { }
 
-#endif /* CONFIG_HAVE_MLOCKED_PAGE_BIT */
+#endif /* !CONFIG_MMU */
 
 /*
  * Return the mem_map entry representing the 'offset' subpage within
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1ac49fef95ab..50d4f8d7024a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -582,10 +582,8 @@ static struct page_state {
 	{ unevict|dirty, unevict|dirty,	"unevictable LRU", me_pagecache_dirty},
 	{ unevict,	unevict,	"unevictable LRU", me_pagecache_clean},
 
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 	{ mlock|dirty,	mlock|dirty,	"mlocked LRU",	me_pagecache_dirty },
 	{ mlock,	mlock,		"mlocked LRU",	me_pagecache_clean },
-#endif
 
 	{ lru|dirty,	lru|dirty,	"LRU",		me_pagecache_dirty },
 	{ lru|dirty,	lru,		"clean LRU",	me_pagecache_clean },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..59d2e88fb47c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -486,7 +486,6 @@ static inline void __free_one_page(struct page *page,
 	zone->free_area[order].nr_free++;
 }
 
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 /*
  * free_page_mlock() -- clean up attempts to free and mlocked() page.
  * Page should not be on lru, so no need to fix that up.
@@ -497,9 +496,6 @@ static inline void free_page_mlock(struct page *page)
 	__dec_zone_page_state(page, NR_MLOCK);
 	__count_vm_event(UNEVICTABLE_MLOCKFREED);
 }
-#else
-static void free_page_mlock(struct page *page) { }
-#endif
 
 static inline int free_pages_check(struct page *page)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index c3d6dc4223a4..eb3dfc8355ea 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -788,7 +788,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			ret = SWAP_MLOCK;
 			goto out_unmap;
 		}
-		if (MLOCK_PAGES && TTU_ACTION(flags) == TTU_MUNLOCK)
+		if (TTU_ACTION(flags) == TTU_MUNLOCK)
 			goto out_unmap;
 	}
 	if (!(flags & TTU_IGNORE_ACCESS)) {
@@ -861,7 +861,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 
-	if (MLOCK_PAGES && ret == SWAP_MLOCK) {
+	if (ret == SWAP_MLOCK) {
 		ret = SWAP_AGAIN;
 		if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
 			if (vma->vm_flags & VM_LOCKED) {
@@ -938,11 +938,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 		return ret;
 
 	/*
-	 * MLOCK_PAGES => feature is configured.
-	 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
+	 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
 	 * keep the sem while scanning the cluster for mlocking pages.
 	 */
-	if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
+	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
 		locked_vma = (vma->vm_flags & VM_LOCKED);
 		if (!locked_vma)
 			up_read(&vma->vm_mm->mmap_sem); /* don't need it */
@@ -1075,9 +1074,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-		if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
-			(vma->vm_flags & VM_LOCKED))
-			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
 			max_nl_cursor = cursor;
@@ -1110,9 +1106,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-			if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
-			    (vma->vm_flags & VM_LOCKED))
-				continue;
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
-- 
cgit v1.2.3


From 08beca44dfb0ab008e365163df70dbd302ae1508 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:59:21 -0800
Subject: ksm: stable_node point to page and back

Add a pointer to the ksm page into struct stable_node, holding a reference
to the page while the node exists.  Put a pointer to the stable_node into
the ksm page's ->mapping.

Then we don't need get_ksm_page() while traversing the stable tree: the
page to compare against is sure to be present and correct, even if it's no
longer visible through any of its existing rmap_items.

And we can handle the forked ksm page case more efficiently: no need to
memcmp our way through the tree to find its match.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 24 +++++++++----
 mm/ksm.c            | 99 ++++++++++++++++++-----------------------------------
 2 files changed, 51 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 1401a313fa77..ef55ce14a2ce 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,6 +12,8 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
+struct stable_node;
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -34,7 +36,8 @@ static inline void ksm_exit(struct mm_struct *mm)
 /*
  * A KSM page is one of those write-protected "shared pages" or "merged pages"
  * which KSM maps into multiple mms, wherever identical anonymous page content
- * is found in VM_MERGEABLE vmas.  It's a PageAnon page, with NULL anon_vma.
+ * is found in VM_MERGEABLE vmas.  It's a PageAnon page, pointing not to any
+ * anon_vma, but to that page's node of the stable tree.
  */
 static inline int PageKsm(struct page *page)
 {
@@ -42,15 +45,22 @@ static inline int PageKsm(struct page *page)
 				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
 }
 
-/*
- * But we have to avoid the checking which page_add_anon_rmap() performs.
- */
+static inline struct stable_node *page_stable_node(struct page *page)
+{
+	return PageKsm(page) ? page_rmapping(page) : NULL;
+}
+
+static inline void set_page_stable_node(struct page *page,
+					struct stable_node *stable_node)
+{
+	page->mapping = (void *)stable_node +
+				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+}
+
 static inline void page_add_ksm_rmap(struct page *page)
 {
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		page->mapping = (void *) (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_ANON_PAGES);
-	}
 }
 #else  /* !CONFIG_KSM */
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 9b7af2eb4280..748785683399 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -107,10 +107,12 @@ struct ksm_scan {
 
 /**
  * struct stable_node - node of the stable rbtree
+ * @page: pointer to struct page of the ksm page
  * @node: rb node of this ksm page in the stable tree
  * @hlist: hlist head of rmap_items using this ksm page
  */
 struct stable_node {
+	struct page *page;
 	struct rb_node node;
 	struct hlist_head hlist;
 };
@@ -434,23 +436,6 @@ out:		page = NULL;
 	return page;
 }
 
-/*
- * get_ksm_page: checks if the page at the virtual address in rmap_item
- * is still PageKsm, in which case we can trust the content of the page,
- * and it returns the gotten page; but NULL if the page has been zapped.
- */
-static struct page *get_ksm_page(struct rmap_item *rmap_item)
-{
-	struct page *page;
-
-	page = get_mergeable_page(rmap_item);
-	if (page && !PageKsm(page)) {
-		put_page(page);
-		page = NULL;
-	}
-	return page;
-}
-
 /*
  * Removing rmap_item from stable or unstable tree.
  * This function will clean the information from the stable/unstable tree.
@@ -465,6 +450,9 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		if (stable_node->hlist.first)
 			ksm_pages_sharing--;
 		else {
+			set_page_stable_node(stable_node->page, NULL);
+			put_page(stable_node->page);
+
 			rb_erase(&stable_node->node, &root_stable_tree);
 			free_stable_node(stable_node);
 			ksm_pages_shared--;
@@ -740,8 +728,7 @@ out:
  * try_to_merge_one_page - take two pages and merge them into one
  * @vma: the vma that holds the pte pointing to page
  * @page: the PageAnon page that we want to replace with kpage
- * @kpage: the PageKsm page (or newly allocated page which page_add_ksm_rmap
- *         will make PageKsm) that we want to map instead of page
+ * @kpage: the PageKsm page that we want to map instead of page
  *
  * This function returns 0 if the pages were merged, -EFAULT otherwise.
  */
@@ -793,6 +780,9 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
 	struct vm_area_struct *vma;
 	int err = -EFAULT;
 
+	if (page == kpage)			/* ksm page forked */
+		return 0;
+
 	down_read(&mm->mmap_sem);
 	if (ksm_test_exit(mm))
 		goto out;
@@ -846,6 +836,9 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
 		goto up;
 
 	copy_user_highpage(kpage, page, rmap_item->address, vma);
+
+	set_page_stable_node(kpage, NULL);	/* mark it PageKsm */
+
 	err = try_to_merge_one_page(vma, page, kpage);
 up:
 	up_read(&mm->mmap_sem);
@@ -876,41 +869,31 @@ up:
  * This function returns the stable tree node of identical content if found,
  * NULL otherwise.
  */
-static struct stable_node *stable_tree_search(struct page *page,
-					      struct page **tree_pagep)
+static struct stable_node *stable_tree_search(struct page *page)
 {
 	struct rb_node *node = root_stable_tree.rb_node;
 	struct stable_node *stable_node;
 
+	stable_node = page_stable_node(page);
+	if (stable_node) {			/* ksm page forked */
+		get_page(page);
+		return stable_node;
+	}
+
 	while (node) {
-		struct hlist_node *hlist, *hnext;
-		struct rmap_item *tree_rmap_item;
-		struct page *tree_page;
 		int ret;
 
+		cond_resched();
 		stable_node = rb_entry(node, struct stable_node, node);
-		hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
-					&stable_node->hlist, hlist) {
-			BUG_ON(!in_stable_tree(tree_rmap_item));
-			cond_resched();
-			tree_page = get_ksm_page(tree_rmap_item);
-			if (tree_page)
-				break;
-			remove_rmap_item_from_tree(tree_rmap_item);
-		}
-		if (!hlist)
-			return NULL;
 
-		ret = memcmp_pages(page, tree_page);
+		ret = memcmp_pages(page, stable_node->page);
 
-		if (ret < 0) {
-			put_page(tree_page);
+		if (ret < 0)
 			node = node->rb_left;
-		} else if (ret > 0) {
-			put_page(tree_page);
+		else if (ret > 0)
 			node = node->rb_right;
-		} else {
-			*tree_pagep = tree_page;
+		else {
+			get_page(stable_node->page);
 			return stable_node;
 		}
 	}
@@ -932,26 +915,12 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
 	struct stable_node *stable_node;
 
 	while (*new) {
-		struct hlist_node *hlist, *hnext;
-		struct rmap_item *tree_rmap_item;
-		struct page *tree_page;
 		int ret;
 
+		cond_resched();
 		stable_node = rb_entry(*new, struct stable_node, node);
-		hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
-					&stable_node->hlist, hlist) {
-			BUG_ON(!in_stable_tree(tree_rmap_item));
-			cond_resched();
-			tree_page = get_ksm_page(tree_rmap_item);
-			if (tree_page)
-				break;
-			remove_rmap_item_from_tree(tree_rmap_item);
-		}
-		if (!hlist)
-			return NULL;
 
-		ret = memcmp_pages(kpage, tree_page);
-		put_page(tree_page);
+		ret = memcmp_pages(kpage, stable_node->page);
 
 		parent = *new;
 		if (ret < 0)
@@ -977,6 +946,10 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
 
 	INIT_HLIST_HEAD(&stable_node->hlist);
 
+	get_page(kpage);
+	stable_node->page = kpage;
+	set_page_stable_node(kpage, stable_node);
+
 	return stable_node;
 }
 
@@ -1085,14 +1058,10 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	remove_rmap_item_from_tree(rmap_item);
 
 	/* We first start with searching the page inside the stable tree */
-	stable_node = stable_tree_search(page, &tree_page);
+	stable_node = stable_tree_search(page);
 	if (stable_node) {
-		kpage = tree_page;
-		if (page == kpage)			/* forked */
-			err = 0;
-		else
-			err = try_to_merge_with_ksm_page(rmap_item,
-							 page, kpage);
+		kpage = stable_node->page;
+		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
 		if (!err) {
 			/*
 			 * The page was successfully merged:
-- 
cgit v1.2.3


From 5ad6468801d28c4d4ac9f48ec19297817c915f6a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:59:24 -0800
Subject: ksm: let shared pages be swappable

Initial implementation for swapping out KSM's shared pages: add
page_referenced_ksm() and try_to_unmap_ksm(), which rmap.c calls when
faced with a PageKsm page.

Most of what's needed can be got from the rmap_items listed from the
stable_node of the ksm page, without discovering the actual vma: so in
this patch just fake up a struct vma for page_referenced_one() or
try_to_unmap_one(), then refine that in the next patch.

Add VM_NONLINEAR to ksm_madvise()'s list of exclusions: it has always been
implicit there (being only set with VM_SHARED, already excluded), but
let's make it explicit, to help justify the lack of nonlinear unmap.

Rely on the page lock to protect against concurrent modifications to that
page's node of the stable tree.

The awkward part is not swapout but swapin: do_swap_page() and
page_add_anon_rmap() now have to allow for new possibilities - perhaps a
ksm page still in swapcache, perhaps a swapcache page associated with one
location in one anon_vma now needed for another location or anon_vma.
(And the vma might even be no longer VM_MERGEABLE when that happens.)

ksm_might_need_to_copy() checks for that case, and supplies a duplicate
page when necessary, simply leaving it to a subsequent pass of ksmd to
rediscover the identity and merge them back into one ksm page.
Disappointingly primitive: but the alternative would have to accumulate
unswappable info about the swapped out ksm pages, limiting swappability.

Remove page_add_ksm_rmap(): page_add_anon_rmap() now has to allow for the
particular case it was handling, so just use it instead.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h  |  54 ++++++++++++++--
 include/linux/rmap.h |   5 ++
 mm/ksm.c             | 172 ++++++++++++++++++++++++++++++++++++++++++++++-----
 mm/memory.c          |   6 ++
 mm/rmap.c            |  65 +++++++++++--------
 mm/swapfile.c        |  11 +++-
 6 files changed, 264 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index ef55ce14a2ce..157d83dbaef8 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -9,10 +9,12 @@
 
 #include <linux/bitops.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
 #include <linux/sched.h>
-#include <linux/vmstat.h>
 
 struct stable_node;
+struct mem_cgroup;
 
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -57,11 +59,36 @@ static inline void set_page_stable_node(struct page *page,
 				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
 }
 
-static inline void page_add_ksm_rmap(struct page *page)
+/*
+ * When do_swap_page() first faults in from swap what used to be a KSM page,
+ * no problem, it will be assigned to this vma's anon_vma; but thereafter,
+ * it might be faulted into a different anon_vma (or perhaps to a different
+ * offset in the same anon_vma).  do_swap_page() cannot do all the locking
+ * needed to reconstitute a cross-anon_vma KSM page: for now it has to make
+ * a copy, and leave remerging the pages to a later pass of ksmd.
+ *
+ * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
+ * but what if the vma was unmerged while the page was swapped out?
+ */
+struct page *ksm_does_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
+static inline struct page *ksm_might_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
 {
-	if (atomic_inc_and_test(&page->_mapcount))
-		__inc_zone_page_state(page, NR_ANON_PAGES);
+	struct anon_vma *anon_vma = page_anon_vma(page);
+
+	if (!anon_vma ||
+	    (anon_vma == vma->anon_vma &&
+	     page->index == linear_page_index(vma, address)))
+		return page;
+
+	return ksm_does_need_to_copy(page, vma, address);
 }
+
+int page_referenced_ksm(struct page *page,
+			struct mem_cgroup *memcg, unsigned long *vm_flags);
+int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
+
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -84,7 +111,22 @@ static inline int PageKsm(struct page *page)
 	return 0;
 }
 
-/* No stub required for page_add_ksm_rmap(page) */
+static inline struct page *ksm_might_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	return page;
+}
+
+static inline int page_referenced_ksm(struct page *page,
+			struct mem_cgroup *memcg, unsigned long *vm_flags)
+{
+	return 0;
+}
+
+static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
+{
+	return 0;
+}
 #endif /* !CONFIG_KSM */
 
-#endif
+#endif /* __LINUX_KSM_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1f65af44c6d2..0b4913a4a344 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -89,6 +89,9 @@ static inline void page_dup_rmap(struct page *page)
  */
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *cnt, unsigned long *vm_flags);
+int page_referenced_one(struct page *, struct vm_area_struct *,
+	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
+
 enum ttu_flags {
 	TTU_UNMAP = 0,			/* unmap mode */
 	TTU_MIGRATION = 1,		/* migration mode */
@@ -102,6 +105,8 @@ enum ttu_flags {
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
+int try_to_unmap_one(struct page *, struct vm_area_struct *,
+			unsigned long address, enum ttu_flags flags);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/mm/ksm.c b/mm/ksm.c
index af5f571185d5..2f58ceebfe8f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -196,6 +196,13 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
 static DEFINE_SPINLOCK(ksm_mmlist_lock);
 
+/*
+ * Temporary hack for page_referenced_ksm() and try_to_unmap_ksm(),
+ * later we rework things a little to get the right vma to them.
+ */
+static DEFINE_SPINLOCK(ksm_fallback_vma_lock);
+static struct vm_area_struct ksm_fallback_vma;
+
 #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
 		sizeof(struct __struct), __alignof__(struct __struct),\
 		(__flags), NULL)
@@ -445,14 +452,20 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 {
 	if (rmap_item->address & STABLE_FLAG) {
 		struct stable_node *stable_node;
+		struct page *page;
 
 		stable_node = rmap_item->head;
+		page = stable_node->page;
+		lock_page(page);
+
 		hlist_del(&rmap_item->hlist);
-		if (stable_node->hlist.first)
+		if (stable_node->hlist.first) {
+			unlock_page(page);
 			ksm_pages_sharing--;
-		else {
-			set_page_stable_node(stable_node->page, NULL);
-			put_page(stable_node->page);
+		} else {
+			set_page_stable_node(page, NULL);
+			unlock_page(page);
+			put_page(page);
 
 			rb_erase(&stable_node->node, &root_stable_tree);
 			free_stable_node(stable_node);
@@ -710,7 +723,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	}
 
 	get_page(kpage);
-	page_add_ksm_rmap(kpage);
+	page_add_anon_rmap(kpage, vma, addr);
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
 	ptep_clear_flush(vma, addr, ptep);
@@ -763,8 +776,16 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
 	    pages_identical(page, kpage))
 		err = replace_page(vma, page, kpage, orig_pte);
 
-	if ((vma->vm_flags & VM_LOCKED) && !err)
+	if ((vma->vm_flags & VM_LOCKED) && !err) {
 		munlock_vma_page(page);
+		if (!PageMlocked(kpage)) {
+			unlock_page(page);
+			lru_add_drain();
+			lock_page(kpage);
+			mlock_vma_page(kpage);
+			page = kpage;		/* for final unlock */
+		}
+	}
 
 	unlock_page(page);
 out:
@@ -841,7 +862,11 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
 
 	copy_user_highpage(kpage, page, rmap_item->address, vma);
 
+	SetPageDirty(kpage);
+	__SetPageUptodate(kpage);
+	SetPageSwapBacked(kpage);
 	set_page_stable_node(kpage, NULL);	/* mark it PageKsm */
+	lru_cache_add_lru(kpage, LRU_ACTIVE_ANON);
 
 	err = try_to_merge_one_page(vma, page, kpage);
 up:
@@ -1071,7 +1096,9 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 			 * The page was successfully merged:
 			 * add its rmap_item to the stable tree.
 			 */
+			lock_page(kpage);
 			stable_tree_append(rmap_item, stable_node);
+			unlock_page(kpage);
 		}
 		put_page(kpage);
 		return;
@@ -1112,11 +1139,13 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 		if (kpage) {
 			remove_rmap_item_from_tree(tree_rmap_item);
 
+			lock_page(kpage);
 			stable_node = stable_tree_insert(kpage);
 			if (stable_node) {
 				stable_tree_append(tree_rmap_item, stable_node);
 				stable_tree_append(rmap_item, stable_node);
 			}
+			unlock_page(kpage);
 			put_page(kpage);
 
 			/*
@@ -1285,14 +1314,6 @@ static void ksm_do_scan(unsigned int scan_npages)
 			return;
 		if (!PageKsm(page) || !in_stable_tree(rmap_item))
 			cmp_and_merge_page(page, rmap_item);
-		else if (page_mapcount(page) == 1) {
-			/*
-			 * Replace now-unshared ksm page by ordinary page.
-			 */
-			break_cow(rmap_item);
-			remove_rmap_item_from_tree(rmap_item);
-			rmap_item->oldchecksum = calc_checksum(page);
-		}
 		put_page(page);
 	}
 }
@@ -1337,7 +1358,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
 				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
 				 VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP  | VM_SAO))
+				 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
 			return 0;		/* just ignore the advice */
 
 		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
@@ -1435,6 +1456,127 @@ void __ksm_exit(struct mm_struct *mm)
 	}
 }
 
+struct page *ksm_does_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct page *new_page;
+
+	unlock_page(page);	/* any racers will COW it, not modify it */
+
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (new_page) {
+		copy_user_highpage(new_page, page, address, vma);
+
+		SetPageDirty(new_page);
+		__SetPageUptodate(new_page);
+		SetPageSwapBacked(new_page);
+		__set_page_locked(new_page);
+
+		if (page_evictable(new_page, vma))
+			lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
+		else
+			add_page_to_unevictable_list(new_page);
+	}
+
+	page_cache_release(page);
+	return new_page;
+}
+
+int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
+			unsigned long *vm_flags)
+{
+	struct stable_node *stable_node;
+	struct rmap_item *rmap_item;
+	struct hlist_node *hlist;
+	unsigned int mapcount = page_mapcount(page);
+	int referenced = 0;
+	struct vm_area_struct *vma;
+
+	VM_BUG_ON(!PageKsm(page));
+	VM_BUG_ON(!PageLocked(page));
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return 0;
+
+	/*
+	 * Temporary hack: really we need anon_vma in rmap_item, to
+	 * provide the correct vma, and to find recently forked instances.
+	 * Use zalloc to avoid weirdness if any other fields are involved.
+	 */
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
+	if (!vma) {
+		spin_lock(&ksm_fallback_vma_lock);
+		vma = &ksm_fallback_vma;
+	}
+
+	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+		if (memcg && !mm_match_cgroup(rmap_item->mm, memcg))
+			continue;
+
+		vma->vm_mm = rmap_item->mm;
+		vma->vm_start = rmap_item->address;
+		vma->vm_end = vma->vm_start + PAGE_SIZE;
+
+		referenced += page_referenced_one(page, vma,
+				rmap_item->address, &mapcount, vm_flags);
+		if (!mapcount)
+			goto out;
+	}
+out:
+	if (vma == &ksm_fallback_vma)
+		spin_unlock(&ksm_fallback_vma_lock);
+	else
+		kmem_cache_free(vm_area_cachep, vma);
+	return referenced;
+}
+
+int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
+{
+	struct stable_node *stable_node;
+	struct hlist_node *hlist;
+	struct rmap_item *rmap_item;
+	int ret = SWAP_AGAIN;
+	struct vm_area_struct *vma;
+
+	VM_BUG_ON(!PageKsm(page));
+	VM_BUG_ON(!PageLocked(page));
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return SWAP_FAIL;
+
+	/*
+	 * Temporary hack: really we need anon_vma in rmap_item, to
+	 * provide the correct vma, and to find recently forked instances.
+	 * Use zalloc to avoid weirdness if any other fields are involved.
+	 */
+	if (TTU_ACTION(flags) != TTU_UNMAP)
+		return SWAP_FAIL;
+
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
+	if (!vma) {
+		spin_lock(&ksm_fallback_vma_lock);
+		vma = &ksm_fallback_vma;
+	}
+
+	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+		vma->vm_mm = rmap_item->mm;
+		vma->vm_start = rmap_item->address;
+		vma->vm_end = vma->vm_start + PAGE_SIZE;
+
+		ret = try_to_unmap_one(page, vma, rmap_item->address, flags);
+		if (ret != SWAP_AGAIN || !page_mapped(page))
+			goto out;
+	}
+out:
+	if (vma == &ksm_fallback_vma)
+		spin_unlock(&ksm_fallback_vma_lock);
+	else
+		kmem_cache_free(vm_area_cachep, vma);
+	return ret;
+}
+
 #ifdef CONFIG_SYSFS
 /*
  * This all compiles without CONFIG_SYSFS, but is a waste of space.
diff --git a/mm/memory.c b/mm/memory.c
index 1c9dc46da3db..a54b2c498444 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2561,6 +2561,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	lock_page(page);
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 
+	page = ksm_might_need_to_copy(page, vma, address);
+	if (!page) {
+		ret = VM_FAULT_OOM;
+		goto out;
+	}
+
 	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
diff --git a/mm/rmap.c b/mm/rmap.c
index ebee81688736..869aaa3206a2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
 #include <linux/swapops.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
@@ -336,9 +337,9 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
  * Subfunctions of page_referenced: page_referenced_one called
  * repeatedly from either page_referenced_anon or page_referenced_file.
  */
-static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-			       unsigned long address, unsigned int *mapcount,
-			       unsigned long *vm_flags)
+int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+			unsigned long address, unsigned int *mapcount,
+			unsigned long *vm_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte;
@@ -507,28 +508,33 @@ int page_referenced(struct page *page,
 		    unsigned long *vm_flags)
 {
 	int referenced = 0;
+	int we_locked = 0;
 
 	if (TestClearPageReferenced(page))
 		referenced++;
 
 	*vm_flags = 0;
 	if (page_mapped(page) && page_rmapping(page)) {
-		if (PageAnon(page))
+		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
+			we_locked = trylock_page(page);
+			if (!we_locked) {
+				referenced++;
+				goto out;
+			}
+		}
+		if (unlikely(PageKsm(page)))
+			referenced += page_referenced_ksm(page, mem_cont,
+								vm_flags);
+		else if (PageAnon(page))
 			referenced += page_referenced_anon(page, mem_cont,
 								vm_flags);
-		else if (is_locked)
+		else if (page->mapping)
 			referenced += page_referenced_file(page, mem_cont,
 								vm_flags);
-		else if (!trylock_page(page))
-			referenced++;
-		else {
-			if (page->mapping)
-				referenced += page_referenced_file(page,
-							mem_cont, vm_flags);
+		if (we_locked)
 			unlock_page(page);
-		}
 	}
-
+out:
 	if (page_test_and_clear_young(page))
 		referenced++;
 
@@ -620,14 +626,7 @@ static void __page_set_anon_rmap(struct page *page,
 	BUG_ON(!anon_vma);
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	page->mapping = (struct address_space *) anon_vma;
-
 	page->index = linear_page_index(vma, address);
-
-	/*
-	 * nr_mapped state can be updated without turning off
-	 * interrupts because it is not modified via interrupt.
-	 */
-	__inc_zone_page_state(page, NR_ANON_PAGES);
 }
 
 /**
@@ -665,14 +664,21 @@ static void __page_check_anon_rmap(struct page *page,
  * @vma:	the vm area in which the mapping is added
  * @address:	the user virtual address mapped
  *
- * The caller needs to hold the pte lock and the page must be locked.
+ * The caller needs to hold the pte lock, and the page must be locked in
+ * the anon_vma case: to serialize mapping,index checking after setting.
  */
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
+	int first = atomic_inc_and_test(&page->_mapcount);
+	if (first)
+		__inc_zone_page_state(page, NR_ANON_PAGES);
+	if (unlikely(PageKsm(page)))
+		return;
+
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
-	if (atomic_inc_and_test(&page->_mapcount))
+	if (first)
 		__page_set_anon_rmap(page, vma, address);
 	else
 		__page_check_anon_rmap(page, vma, address);
@@ -694,6 +700,7 @@ void page_add_new_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	SetPageSwapBacked(page);
 	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
+	__inc_zone_page_state(page, NR_ANON_PAGES);
 	__page_set_anon_rmap(page, vma, address);
 	if (page_evictable(page, vma))
 		lru_cache_add_lru(page, LRU_ACTIVE_ANON);
@@ -760,8 +767,8 @@ void page_remove_rmap(struct page *page)
  * Subfunctions of try_to_unmap: try_to_unmap_one called
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
-static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-			    unsigned long address, enum ttu_flags flags)
+int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+		     unsigned long address, enum ttu_flags flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte;
@@ -1156,7 +1163,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
 
 	BUG_ON(!PageLocked(page));
 
-	if (PageAnon(page))
+	if (unlikely(PageKsm(page)))
+		ret = try_to_unmap_ksm(page, flags);
+	else if (PageAnon(page))
 		ret = try_to_unmap_anon(page, flags);
 	else
 		ret = try_to_unmap_file(page, flags);
@@ -1177,15 +1186,17 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
  *
  * SWAP_AGAIN	- no vma is holding page mlocked, or,
  * SWAP_AGAIN	- page mapped in mlocked vma -- couldn't acquire mmap sem
+ * SWAP_FAIL	- page cannot be located at present
  * SWAP_MLOCK	- page is now mlocked.
  */
 int try_to_munlock(struct page *page)
 {
 	VM_BUG_ON(!PageLocked(page) || PageLRU(page));
 
-	if (PageAnon(page))
+	if (unlikely(PageKsm(page)))
+		return try_to_unmap_ksm(page, TTU_MUNLOCK);
+	else if (PageAnon(page))
 		return try_to_unmap_anon(page, TTU_MUNLOCK);
 	else
 		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
-
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e74112e8e5f4..6c0585b16418 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
 #include <linux/backing-dev.h>
@@ -650,6 +651,8 @@ int reuse_swap_page(struct page *page)
 	int count;
 
 	VM_BUG_ON(!PageLocked(page));
+	if (unlikely(PageKsm(page)))
+		return 0;
 	count = page_mapcount(page);
 	if (count <= 1 && PageSwapCache(page)) {
 		count += page_swapcount(page);
@@ -658,7 +661,7 @@ int reuse_swap_page(struct page *page)
 			SetPageDirty(page);
 		}
 	}
-	return count == 1;
+	return count <= 1;
 }
 
 /*
@@ -1185,6 +1188,12 @@ static int try_to_unuse(unsigned int type)
 		 * read from disk into another page.  Splitting into two
 		 * pages would be incorrect if swap supported "shared
 		 * private" pages, but they are handled by tmpfs files.
+		 *
+		 * Given how unuse_vma() targets one particular offset
+		 * in an anon_vma, once the anon_vma has been determined,
+		 * this splitting happens to be just what is needed to
+		 * handle where KSM pages have been swapped out: re-reading
+		 * is unnecessarily slow, but we can fix that later on.
 		 */
 		if (swap_count(*swap_map) &&
 		     PageDirty(page) && PageSwapCache(page)) {
-- 
cgit v1.2.3


From db114b83ab6064d9b1d6ec5650e096c89bd95e25 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:59:25 -0800
Subject: ksm: hold anon_vma in rmap_item

For full functionality, page_referenced_one() and try_to_unmap_one() need
to know the vma: to pass vma down to arch-dependent flushes, or to observe
VM_LOCKED or VM_EXEC.  But KSM keeps no record of vma: nor can it, since
vmas get split and merged without its knowledge.

Instead, note page's anon_vma in its rmap_item when adding to stable tree:
all the vmas which might map that page are listed by its anon_vma.

page_referenced_ksm() and try_to_unmap_ksm() then traverse the anon_vma,
first to find the probable vma, that which matches rmap_item's mm; but if
that is not enough to locate all instances, traverse again to try the
others.  This catches those occasions when fork has duplicated a pte of a
ksm page, but ksmd has not yet come around to assign it an rmap_item.

But each rmap_item in the stable tree which refers to an anon_vma needs to
take a reference to it.  Andrea's anon_vma design cleverly avoided a
reference count (an anon_vma was free when its list of vmas was empty),
but KSM now needs to add that.  Is a 32-bit count sufficient?  I believe
so - the anon_vma is only free when both count is 0 and list is empty.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  24 ++++++++
 mm/ksm.c             | 157 +++++++++++++++++++++++++++++++--------------------
 mm/rmap.c            |   5 +-
 3 files changed, 122 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0b4913a4a344..980094a527ee 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,9 @@
  */
 struct anon_vma {
 	spinlock_t lock;	/* Serialize access to vma list */
+#ifdef CONFIG_KSM
+	atomic_t ksm_refcount;
+#endif
 	/*
 	 * NOTE: the LSB of the head.next is set by
 	 * mm_take_all_locks() _after_ taking the above lock. So the
@@ -38,6 +41,26 @@ struct anon_vma {
 };
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_KSM
+static inline void ksm_refcount_init(struct anon_vma *anon_vma)
+{
+	atomic_set(&anon_vma->ksm_refcount, 0);
+}
+
+static inline int ksm_refcount(struct anon_vma *anon_vma)
+{
+	return atomic_read(&anon_vma->ksm_refcount);
+}
+#else
+static inline void ksm_refcount_init(struct anon_vma *anon_vma)
+{
+}
+
+static inline int ksm_refcount(struct anon_vma *anon_vma)
+{
+	return 0;
+}
+#endif /* CONFIG_KSM */
 
 static inline struct anon_vma *page_anon_vma(struct page *page)
 {
@@ -70,6 +93,7 @@ void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *);
 void anon_vma_unlink(struct vm_area_struct *);
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
+void anon_vma_free(struct anon_vma *);
 
 /*
  * rmap interfaces called when adding or removing pte of page
diff --git a/mm/ksm.c b/mm/ksm.c
index 2f58ceebfe8f..f7d121c42d01 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -121,7 +121,7 @@ struct stable_node {
 /**
  * struct rmap_item - reverse mapping item for virtual addresses
  * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
- * @filler: unused space we're making available in this patch
+ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
  * @mm: the memory structure this rmap_item is pointing into
  * @address: the virtual address this rmap_item tracks (+ flags in low bits)
  * @oldchecksum: previous checksum of the page at that virtual address
@@ -131,7 +131,7 @@ struct stable_node {
  */
 struct rmap_item {
 	struct rmap_item *rmap_list;
-	unsigned long filler;
+	struct anon_vma *anon_vma;	/* when stable */
 	struct mm_struct *mm;
 	unsigned long address;		/* + low bits used for flags below */
 	unsigned int oldchecksum;	/* when unstable */
@@ -196,13 +196,6 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
 static DEFINE_SPINLOCK(ksm_mmlist_lock);
 
-/*
- * Temporary hack for page_referenced_ksm() and try_to_unmap_ksm(),
- * later we rework things a little to get the right vma to them.
- */
-static DEFINE_SPINLOCK(ksm_fallback_vma_lock);
-static struct vm_area_struct ksm_fallback_vma;
-
 #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
 		sizeof(struct __struct), __alignof__(struct __struct),\
 		(__flags), NULL)
@@ -323,6 +316,25 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 	return rmap_item->address & STABLE_FLAG;
 }
 
+static void hold_anon_vma(struct rmap_item *rmap_item,
+			  struct anon_vma *anon_vma)
+{
+	rmap_item->anon_vma = anon_vma;
+	atomic_inc(&anon_vma->ksm_refcount);
+}
+
+static void drop_anon_vma(struct rmap_item *rmap_item)
+{
+	struct anon_vma *anon_vma = rmap_item->anon_vma;
+
+	if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->lock)) {
+		int empty = list_empty(&anon_vma->head);
+		spin_unlock(&anon_vma->lock);
+		if (empty)
+			anon_vma_free(anon_vma);
+	}
+}
+
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -472,6 +484,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 			ksm_pages_shared--;
 		}
 
+		drop_anon_vma(rmap_item);
 		rmap_item->address &= PAGE_MASK;
 
 	} else if (rmap_item->address & UNSTABLE_FLAG) {
@@ -752,6 +765,9 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
 	pte_t orig_pte = __pte(0);
 	int err = -EFAULT;
 
+	if (page == kpage)			/* ksm page forked */
+		return 0;
+
 	if (!(vma->vm_flags & VM_MERGEABLE))
 		goto out;
 	if (!PageAnon(page))
@@ -805,9 +821,6 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
 	struct vm_area_struct *vma;
 	int err = -EFAULT;
 
-	if (page == kpage)			/* ksm page forked */
-		return 0;
-
 	down_read(&mm->mmap_sem);
 	if (ksm_test_exit(mm))
 		goto out;
@@ -816,6 +829,11 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
 		goto out;
 
 	err = try_to_merge_one_page(vma, page, kpage);
+	if (err)
+		goto out;
+
+	/* Must get reference to anon_vma while still holding mmap_sem */
+	hold_anon_vma(rmap_item, vma->anon_vma);
 out:
 	up_read(&mm->mmap_sem);
 	return err;
@@ -869,6 +887,11 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
 	lru_cache_add_lru(kpage, LRU_ACTIVE_ANON);
 
 	err = try_to_merge_one_page(vma, page, kpage);
+	if (err)
+		goto up;
+
+	/* Must get reference to anon_vma while still holding mmap_sem */
+	hold_anon_vma(rmap_item, vma->anon_vma);
 up:
 	up_read(&mm->mmap_sem);
 
@@ -879,8 +902,10 @@ up:
 		 * If that fails, we have a ksm page with only one pte
 		 * pointing to it: so break it.
 		 */
-		if (err)
+		if (err) {
+			drop_anon_vma(rmap_item);
 			break_cow(rmap_item);
+		}
 	}
 	if (err) {
 		put_page(kpage);
@@ -1155,7 +1180,9 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 			 * in which case we need to break_cow on both.
 			 */
 			if (!stable_node) {
+				drop_anon_vma(tree_rmap_item);
 				break_cow(tree_rmap_item);
+				drop_anon_vma(rmap_item);
 				break_cow(rmap_item);
 			}
 		}
@@ -1490,7 +1517,7 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 	struct hlist_node *hlist;
 	unsigned int mapcount = page_mapcount(page);
 	int referenced = 0;
-	struct vm_area_struct *vma;
+	int search_new_forks = 0;
 
 	VM_BUG_ON(!PageKsm(page));
 	VM_BUG_ON(!PageLocked(page));
@@ -1498,36 +1525,40 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 	stable_node = page_stable_node(page);
 	if (!stable_node)
 		return 0;
-
-	/*
-	 * Temporary hack: really we need anon_vma in rmap_item, to
-	 * provide the correct vma, and to find recently forked instances.
-	 * Use zalloc to avoid weirdness if any other fields are involved.
-	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
-	if (!vma) {
-		spin_lock(&ksm_fallback_vma_lock);
-		vma = &ksm_fallback_vma;
-	}
-
+again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
-		if (memcg && !mm_match_cgroup(rmap_item->mm, memcg))
-			continue;
+		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct vm_area_struct *vma;
+
+		spin_lock(&anon_vma->lock);
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			if (rmap_item->address < vma->vm_start ||
+			    rmap_item->address >= vma->vm_end)
+				continue;
+			/*
+			 * Initially we examine only the vma which covers this
+			 * rmap_item; but later, if there is still work to do,
+			 * we examine covering vmas in other mms: in case they
+			 * were forked from the original since ksmd passed.
+			 */
+			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+				continue;
 
-		vma->vm_mm = rmap_item->mm;
-		vma->vm_start = rmap_item->address;
-		vma->vm_end = vma->vm_start + PAGE_SIZE;
+			if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
+				continue;
 
-		referenced += page_referenced_one(page, vma,
+			referenced += page_referenced_one(page, vma,
 				rmap_item->address, &mapcount, vm_flags);
+			if (!search_new_forks || !mapcount)
+				break;
+		}
+		spin_unlock(&anon_vma->lock);
 		if (!mapcount)
 			goto out;
 	}
+	if (!search_new_forks++)
+		goto again;
 out:
-	if (vma == &ksm_fallback_vma)
-		spin_unlock(&ksm_fallback_vma_lock);
-	else
-		kmem_cache_free(vm_area_cachep, vma);
 	return referenced;
 }
 
@@ -1537,7 +1568,7 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 	struct hlist_node *hlist;
 	struct rmap_item *rmap_item;
 	int ret = SWAP_AGAIN;
-	struct vm_area_struct *vma;
+	int search_new_forks = 0;
 
 	VM_BUG_ON(!PageKsm(page));
 	VM_BUG_ON(!PageLocked(page));
@@ -1545,35 +1576,37 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 	stable_node = page_stable_node(page);
 	if (!stable_node)
 		return SWAP_FAIL;
-
-	/*
-	 * Temporary hack: really we need anon_vma in rmap_item, to
-	 * provide the correct vma, and to find recently forked instances.
-	 * Use zalloc to avoid weirdness if any other fields are involved.
-	 */
-	if (TTU_ACTION(flags) != TTU_UNMAP)
-		return SWAP_FAIL;
-
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
-	if (!vma) {
-		spin_lock(&ksm_fallback_vma_lock);
-		vma = &ksm_fallback_vma;
-	}
-
+again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
-		vma->vm_mm = rmap_item->mm;
-		vma->vm_start = rmap_item->address;
-		vma->vm_end = vma->vm_start + PAGE_SIZE;
+		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct vm_area_struct *vma;
 
-		ret = try_to_unmap_one(page, vma, rmap_item->address, flags);
-		if (ret != SWAP_AGAIN || !page_mapped(page))
-			goto out;
+		spin_lock(&anon_vma->lock);
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			if (rmap_item->address < vma->vm_start ||
+			    rmap_item->address >= vma->vm_end)
+				continue;
+			/*
+			 * Initially we examine only the vma which covers this
+			 * rmap_item; but later, if there is still work to do,
+			 * we examine covering vmas in other mms: in case they
+			 * were forked from the original since ksmd passed.
+			 */
+			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+				continue;
+
+			ret = try_to_unmap_one(page, vma,
+					rmap_item->address, flags);
+			if (ret != SWAP_AGAIN || !page_mapped(page)) {
+				spin_unlock(&anon_vma->lock);
+				goto out;
+			}
+		}
+		spin_unlock(&anon_vma->lock);
 	}
+	if (!search_new_forks++)
+		goto again;
 out:
-	if (vma == &ksm_fallback_vma)
-		spin_unlock(&ksm_fallback_vma_lock);
-	else
-		kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 869aaa3206a2..ebdf582ef185 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -68,7 +68,7 @@ static inline struct anon_vma *anon_vma_alloc(void)
 	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
 }
 
-static inline void anon_vma_free(struct anon_vma *anon_vma)
+void anon_vma_free(struct anon_vma *anon_vma)
 {
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
@@ -172,7 +172,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
 	list_del(&vma->anon_vma_node);
 
 	/* We must garbage collect the anon_vma if it's empty */
-	empty = list_empty(&anon_vma->head);
+	empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
 	spin_unlock(&anon_vma->lock);
 
 	if (empty)
@@ -184,6 +184,7 @@ static void anon_vma_ctor(void *data)
 	struct anon_vma *anon_vma = data;
 
 	spin_lock_init(&anon_vma->lock);
+	ksm_refcount_init(anon_vma);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
 
-- 
cgit v1.2.3


From e9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:59:31 -0800
Subject: ksm: rmap_walk to remove_migation_ptes

A side-effect of making ksm pages swappable is that they have to be placed
on the LRUs: which then exposes them to isolate_lru_page() and hence to
page migration.

Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and
rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c.  Perhaps some
consolidation with existing code is possible, but don't attempt that yet
(try_to_unmap needs to handle nonlinears, but migration pte removal does
not).

rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like
remove_anon_migration_ptes() which it replaces, avoids calling
page_lock_anon_vma(), because that includes a page_mapped() test which
fails when all migration ptes are in place.  That was valid when NUMA page
migration was introduced (holding mmap_sem provided the missing guarantee
that anon_vma's slab had not already been destroyed), but I believe not
valid in the memory hotremove case added since.

For now do the same as before, and consider the best way to fix that
unlikely race later on.  When fixed, we can probably use rmap_walk() on
hwpoisoned ksm pages too: for now, they remain among hwpoison's various
exceptions (its PageKsm test comes before the page is locked, but its
page_lock_anon_vma fails safely if an anon gets upgraded).

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h  | 13 ++++++++
 include/linux/rmap.h |  6 ++++
 mm/ksm.c             | 65 ++++++++++++++++++++++++++++++++++++++++
 mm/migrate.c         | 85 +++++++++++-----------------------------------------
 mm/rmap.c            | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 157d83dbaef8..bed5f16ba827 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -88,6 +88,9 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
 int page_referenced_ksm(struct page *page,
 			struct mem_cgroup *memcg, unsigned long *vm_flags);
 int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
+int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
+		  struct vm_area_struct *, unsigned long, void *), void *arg);
+void ksm_migrate_page(struct page *newpage, struct page *oldpage);
 
 #else  /* !CONFIG_KSM */
 
@@ -127,6 +130,16 @@ static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 {
 	return 0;
 }
+
+static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
+		struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	return 0;
+}
+
+static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+{
+}
 #endif /* !CONFIG_KSM */
 
 #endif /* __LINUX_KSM_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 980094a527ee..b019ae64e2ab 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -164,6 +164,12 @@ struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
+/*
+ * Called by migrate.c to remove migration ptes, but might be used more later.
+ */
+int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
+		struct vm_area_struct *, unsigned long, void *), void *arg);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
diff --git a/mm/ksm.c b/mm/ksm.c
index 20f46a7b2799..dfdc292d3626 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1656,6 +1656,71 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_MIGRATION
+int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
+		  struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	struct stable_node *stable_node;
+	struct hlist_node *hlist;
+	struct rmap_item *rmap_item;
+	int ret = SWAP_AGAIN;
+	int search_new_forks = 0;
+
+	VM_BUG_ON(!PageKsm(page));
+	VM_BUG_ON(!PageLocked(page));
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return ret;
+again:
+	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct vm_area_struct *vma;
+
+		spin_lock(&anon_vma->lock);
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			if (rmap_item->address < vma->vm_start ||
+			    rmap_item->address >= vma->vm_end)
+				continue;
+			/*
+			 * Initially we examine only the vma which covers this
+			 * rmap_item; but later, if there is still work to do,
+			 * we examine covering vmas in other mms: in case they
+			 * were forked from the original since ksmd passed.
+			 */
+			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+				continue;
+
+			ret = rmap_one(page, vma, rmap_item->address, arg);
+			if (ret != SWAP_AGAIN) {
+				spin_unlock(&anon_vma->lock);
+				goto out;
+			}
+		}
+		spin_unlock(&anon_vma->lock);
+	}
+	if (!search_new_forks++)
+		goto again;
+out:
+	return ret;
+}
+
+void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+{
+	struct stable_node *stable_node;
+
+	VM_BUG_ON(!PageLocked(oldpage));
+	VM_BUG_ON(!PageLocked(newpage));
+	VM_BUG_ON(newpage->mapping != oldpage->mapping);
+
+	stable_node = page_stable_node(newpage);
+	if (stable_node) {
+		VM_BUG_ON(stable_node->page != oldpage);
+		stable_node->page = newpage;
+	}
+}
+#endif /* CONFIG_MIGRATION */
+
 #ifdef CONFIG_SYSFS
 /*
  * This all compiles without CONFIG_SYSFS, but is a waste of space.
diff --git a/mm/migrate.c b/mm/migrate.c
index 367272d04423..0b714747c028 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -21,6 +21,7 @@
 #include <linux/mm_inline.h>
 #include <linux/nsproxy.h>
 #include <linux/pagevec.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
@@ -78,8 +79,8 @@ int putback_lru_pages(struct list_head *l)
 /*
  * Restore a potential migration pte to a working pte entry
  */
-static void remove_migration_pte(struct vm_area_struct *vma,
-		struct page *old, struct page *new)
+static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
+				 unsigned long addr, void *old)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	swp_entry_t entry;
@@ -88,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma,
  	pmd_t *pmd;
 	pte_t *ptep, pte;
  	spinlock_t *ptl;
-	unsigned long addr = page_address_in_vma(new, vma);
-
-	if (addr == -EFAULT)
-		return;
 
  	pgd = pgd_offset(mm, addr);
 	if (!pgd_present(*pgd))
-                return;
+		goto out;
 
 	pud = pud_offset(pgd, addr);
 	if (!pud_present(*pud))
-                return;
+		goto out;
 
 	pmd = pmd_offset(pud, addr);
 	if (!pmd_present(*pmd))
-		return;
+		goto out;
 
 	ptep = pte_offset_map(pmd, addr);
 
 	if (!is_swap_pte(*ptep)) {
 		pte_unmap(ptep);
- 		return;
+		goto out;
  	}
 
  	ptl = pte_lockptr(mm, pmd);
  	spin_lock(ptl);
 	pte = *ptep;
 	if (!is_swap_pte(pte))
-		goto out;
+		goto unlock;
 
 	entry = pte_to_swp_entry(pte);
 
-	if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
-		goto out;
+	if (!is_migration_entry(entry) ||
+	    migration_entry_to_page(entry) != old)
+		goto unlock;
 
 	get_page(new);
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
@@ -137,55 +135,10 @@ static void remove_migration_pte(struct vm_area_struct *vma,
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, pte);
-
-out:
+unlock:
 	pte_unmap_unlock(ptep, ptl);
-}
-
-/*
- * Note that remove_file_migration_ptes will only work on regular mappings,
- * Nonlinear mappings do not use migration entries.
- */
-static void remove_file_migration_ptes(struct page *old, struct page *new)
-{
-	struct vm_area_struct *vma;
-	struct address_space *mapping = new->mapping;
-	struct prio_tree_iter iter;
-	pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
-	if (!mapping)
-		return;
-
-	spin_lock(&mapping->i_mmap_lock);
-
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
-		remove_migration_pte(vma, old, new);
-
-	spin_unlock(&mapping->i_mmap_lock);
-}
-
-/*
- * Must hold mmap_sem lock on at least one of the vmas containing
- * the page so that the anon_vma cannot vanish.
- */
-static void remove_anon_migration_ptes(struct page *old, struct page *new)
-{
-	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
-
-	/*
-	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
-	 */
-	anon_vma = page_anon_vma(new);
-	if (!anon_vma)
-		return;
-
-	spin_lock(&anon_vma->lock);
-
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
-		remove_migration_pte(vma, old, new);
-
-	spin_unlock(&anon_vma->lock);
+out:
+	return SWAP_AGAIN;
 }
 
 /*
@@ -194,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new)
  */
 static void remove_migration_ptes(struct page *old, struct page *new)
 {
-	if (PageAnon(new))
-		remove_anon_migration_ptes(old, new);
-	else
-		remove_file_migration_ptes(old, new);
+	rmap_walk(new, remove_migration_pte, old);
 }
 
 /*
@@ -358,6 +308,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
  	}
 
 	mlock_migrate_page(newpage, page);
+	ksm_migrate_page(newpage, page);
 
 	ClearPageSwapCache(page);
 	ClearPagePrivate(page);
@@ -577,9 +528,9 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	else
 		rc = fallback_migrate_page(mapping, newpage, page);
 
-	if (!rc) {
+	if (!rc)
 		remove_migration_ptes(page, newpage);
-	} else
+	else
 		newpage->mapping = NULL;
 
 	unlock_page(newpage);
diff --git a/mm/rmap.c b/mm/rmap.c
index 2e38e9048327..c81bedd7d527 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1203,3 +1203,82 @@ int try_to_munlock(struct page *page)
 	else
 		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
+
+#ifdef CONFIG_MIGRATION
+/*
+ * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
+ * Called by migrate.c to remove migration ptes, but might be used more later.
+ */
+static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
+		struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	int ret = SWAP_AGAIN;
+
+	/*
+	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
+	 * because that depends on page_mapped(); but not all its usages
+	 * are holding mmap_sem, which also gave the necessary guarantee
+	 * (that this anon_vma's slab has not already been destroyed).
+	 * This needs to be reviewed later: avoiding page_lock_anon_vma()
+	 * is risky, and currently limits the usefulness of rmap_walk().
+	 */
+	anon_vma = page_anon_vma(page);
+	if (!anon_vma)
+		return ret;
+	spin_lock(&anon_vma->lock);
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		unsigned long address = vma_address(page, vma);
+		if (address == -EFAULT)
+			continue;
+		ret = rmap_one(page, vma, address, arg);
+		if (ret != SWAP_AGAIN)
+			break;
+	}
+	spin_unlock(&anon_vma->lock);
+	return ret;
+}
+
+static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
+		struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	struct address_space *mapping = page->mapping;
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	int ret = SWAP_AGAIN;
+
+	if (!mapping)
+		return ret;
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		unsigned long address = vma_address(page, vma);
+		if (address == -EFAULT)
+			continue;
+		ret = rmap_one(page, vma, address, arg);
+		if (ret != SWAP_AGAIN)
+			break;
+	}
+	/*
+	 * No nonlinear handling: being always shared, nonlinear vmas
+	 * never contain migration ptes.  Decide what to do about this
+	 * limitation to linear when we need rmap_walk() on nonlinear.
+	 */
+	spin_unlock(&mapping->i_mmap_lock);
+	return ret;
+}
+
+int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
+		struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	VM_BUG_ON(!PageLocked(page));
+
+	if (unlikely(PageKsm(page)))
+		return rmap_walk_ksm(page, rmap_one, arg);
+	else if (PageAnon(page))
+		return rmap_walk_anon(page, rmap_one, arg);
+	else
+		return rmap_walk_file(page, rmap_one, arg);
+}
+#endif /* CONFIG_MIGRATION */
-- 
cgit v1.2.3


From 62b61f611eb5e20f7e9f8619bfd03bdfe8af6348 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 14 Dec 2009 17:59:33 -0800
Subject: ksm: memory hotremove migration only

The previous patch enables page migration of ksm pages, but that soon gets
into trouble: not surprising, since we're using the ksm page lock to lock
operations on its stable_node, but page migration switches the page whose
lock is to be used for that.  Another layer of locking would fix it, but
do we need that yet?

Do we actually need page migration of ksm pages?  Yes, memory hotremove
needs to offline sections of memory: and since we stopped allocating ksm
pages with GFP_HIGHUSER, they will tend to be GFP_HIGHUSER_MOVABLE
candidates for migration.

But KSM is currently unconscious of NUMA issues, happily merging pages
from different NUMA nodes: at present the rule must be, not to use
MADV_MERGEABLE where you care about NUMA.  So no, NUMA page migration of
ksm pages does not make sense yet.

So, to complete support for ksm swapping we need to make hotremove safe.
ksm_memory_callback() take ksm_thread_mutex when MEM_GOING_OFFLINE and
release it when MEM_OFFLINE or MEM_CANCEL_OFFLINE.  But if mapped pages
are freed before migration reaches them, stable_nodes may be left still
pointing to struct pages which have been removed from the system: the
stable_node needs to identify a page by pfn rather than page pointer, then
it can safely prune them when MEM_OFFLINE.

And make NUMA migration skip PageKsm pages where it skips PageReserved.
But it's only when we reach unmap_and_move() that the page lock is taken
and we can be sure that raised pagecount has prevented a PageAnon from
being upgraded: so add offlining arg to migrate_pages(), to migrate ksm
page when offlining (has sufficient locking) but reject it otherwise.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h |  8 ++---
 mm/ksm.c                | 84 +++++++++++++++++++++++++++++++++++++++++--------
 mm/memory_hotplug.c     |  2 +-
 mm/mempolicy.c          | 19 +++++------
 mm/migrate.c            | 27 ++++++++++++----
 5 files changed, 103 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 527602cdea1c..7f085c97c799 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -12,7 +12,8 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
-extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long);
+extern int migrate_pages(struct list_head *l, new_page_t x,
+			unsigned long private, int offlining);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -26,10 +27,7 @@ extern int migrate_vmas(struct mm_struct *mm,
 
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
-		unsigned long private) { return -ENOSYS; }
-
-static inline int migrate_pages_to(struct list_head *pagelist,
-			struct vm_area_struct *vma, int dest) { return 0; }
+		unsigned long private, int offlining) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 
diff --git a/mm/ksm.c b/mm/ksm.c
index dfdc292d3626..d4c228a9d278 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -29,6 +29,7 @@
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
+#include <linux/memory.h>
 #include <linux/mmu_notifier.h>
 #include <linux/swap.h>
 #include <linux/ksm.h>
@@ -108,14 +109,14 @@ struct ksm_scan {
 
 /**
  * struct stable_node - node of the stable rbtree
- * @page: pointer to struct page of the ksm page
  * @node: rb node of this ksm page in the stable tree
  * @hlist: hlist head of rmap_items using this ksm page
+ * @kpfn: page frame number of this ksm page
  */
 struct stable_node {
-	struct page *page;
 	struct rb_node node;
 	struct hlist_head hlist;
+	unsigned long kpfn;
 };
 
 /**
@@ -515,7 +516,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node)
 	struct page *page;
 	void *expected_mapping;
 
-	page = stable_node->page;
+	page = pfn_to_page(stable_node->kpfn);
 	expected_mapping = (void *)stable_node +
 				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
 	rcu_read_lock();
@@ -973,7 +974,7 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  * This function returns the stable tree node of identical content if found,
  * NULL otherwise.
  */
-static struct stable_node *stable_tree_search(struct page *page)
+static struct page *stable_tree_search(struct page *page)
 {
 	struct rb_node *node = root_stable_tree.rb_node;
 	struct stable_node *stable_node;
@@ -981,7 +982,7 @@ static struct stable_node *stable_tree_search(struct page *page)
 	stable_node = page_stable_node(page);
 	if (stable_node) {			/* ksm page forked */
 		get_page(page);
-		return stable_node;
+		return page;
 	}
 
 	while (node) {
@@ -1003,7 +1004,7 @@ static struct stable_node *stable_tree_search(struct page *page)
 			put_page(tree_page);
 			node = node->rb_right;
 		} else
-			return stable_node;
+			return tree_page;
 	}
 
 	return NULL;
@@ -1059,7 +1060,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
 
 	INIT_HLIST_HEAD(&stable_node->hlist);
 
-	stable_node->page = kpage;
+	stable_node->kpfn = page_to_pfn(kpage);
 	set_page_stable_node(kpage, stable_node);
 
 	return stable_node;
@@ -1170,9 +1171,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	remove_rmap_item_from_tree(rmap_item);
 
 	/* We first start with searching the page inside the stable tree */
-	stable_node = stable_tree_search(page);
-	if (stable_node) {
-		kpage = stable_node->page;
+	kpage = stable_tree_search(page);
+	if (kpage) {
 		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
 		if (!err) {
 			/*
@@ -1180,7 +1180,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 			 * add its rmap_item to the stable tree.
 			 */
 			lock_page(kpage);
-			stable_tree_append(rmap_item, stable_node);
+			stable_tree_append(rmap_item, page_stable_node(kpage));
 			unlock_page(kpage);
 		}
 		put_page(kpage);
@@ -1715,12 +1715,63 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 
 	stable_node = page_stable_node(newpage);
 	if (stable_node) {
-		VM_BUG_ON(stable_node->page != oldpage);
-		stable_node->page = newpage;
+		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
+		stable_node->kpfn = page_to_pfn(newpage);
 	}
 }
 #endif /* CONFIG_MIGRATION */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
+						 unsigned long end_pfn)
+{
+	struct rb_node *node;
+
+	for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
+		struct stable_node *stable_node;
+
+		stable_node = rb_entry(node, struct stable_node, node);
+		if (stable_node->kpfn >= start_pfn &&
+		    stable_node->kpfn < end_pfn)
+			return stable_node;
+	}
+	return NULL;
+}
+
+static int ksm_memory_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	struct stable_node *stable_node;
+
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		/*
+		 * Keep it very simple for now: just lock out ksmd and
+		 * MADV_UNMERGEABLE while any memory is going offline.
+		 */
+		mutex_lock(&ksm_thread_mutex);
+		break;
+
+	case MEM_OFFLINE:
+		/*
+		 * Most of the work is done by page migration; but there might
+		 * be a few stable_nodes left over, still pointing to struct
+		 * pages which have been offlined: prune those from the tree.
+		 */
+		while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
+					mn->start_pfn + mn->nr_pages)) != NULL)
+			remove_node_from_stable_tree(stable_node);
+		/* fallthrough */
+
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&ksm_thread_mutex);
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 #ifdef CONFIG_SYSFS
 /*
  * This all compiles without CONFIG_SYSFS, but is a waste of space.
@@ -1946,6 +1997,13 @@ static int __init ksm_init(void)
 
 #endif /* CONFIG_SYSFS */
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	/*
+	 * Choose a high priority since the callback takes ksm_thread_mutex:
+	 * later callbacks could only be taking locks which nest within that.
+	 */
+	hotplug_memory_notifier(ksm_memory_callback, 100);
+#endif
 	return 0;
 
 out_free2:
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index bc5a08138f1e..67e941d7882c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -698,7 +698,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 	if (list_empty(&source))
 		goto out;
 	/* this function returns # of failed pages */
-	ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
+	ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1);
 
 out:
 	return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f11fdad06204..290fb5bf0440 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -85,6 +85,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/migrate.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -413,17 +414,11 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		if (!page)
 			continue;
 		/*
-		 * The check for PageReserved here is important to avoid
-		 * handling zero pages and other pages that may have been
-		 * marked special by the system.
-		 *
-		 * If the PageReserved would not be checked here then f.e.
-		 * the location of the zero page could have an influence
-		 * on MPOL_MF_STRICT, zero pages would be counted for
-		 * the per node stats, and there would be useless attempts
-		 * to put zero pages on the migration list.
+		 * vm_normal_page() filters out zero pages, but there might
+		 * still be PageReserved pages to skip, perhaps in a VDSO.
+		 * And we cannot move PageKsm pages sensibly or safely yet.
 		 */
-		if (PageReserved(page))
+		if (PageReserved(page) || PageKsm(page))
 			continue;
 		nid = page_to_nid(page);
 		if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
@@ -839,7 +834,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
 	if (!list_empty(&pagelist))
-		err = migrate_pages(&pagelist, new_node_page, dest);
+		err = migrate_pages(&pagelist, new_node_page, dest, 0);
 
 	return err;
 }
@@ -1056,7 +1051,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 
 		if (!list_empty(&pagelist))
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
-						(unsigned long)vma);
+						(unsigned long)vma, 0);
 
 		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
diff --git a/mm/migrate.c b/mm/migrate.c
index 0b714747c028..2a0ea3ef509e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -543,7 +543,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force)
+			struct page *page, int force, int offlining)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -569,6 +569,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		lock_page(page);
 	}
 
+	/*
+	 * Only memory hotplug's offline_pages() caller has locked out KSM,
+	 * and can safely migrate a KSM page.  The other cases have skipped
+	 * PageKsm along with PageReserved - but it is only now when we have
+	 * the page lock that we can be certain it will not go KSM beneath us
+	 * (KSM will not upgrade a page from PageAnon to PageKsm when it sees
+	 * its pagecount raised, but only here do we take the page lock which
+	 * serializes that).
+	 */
+	if (PageKsm(page) && !offlining) {
+		rc = -EBUSY;
+		goto unlock;
+	}
+
 	/* charge against new page */
 	charge = mem_cgroup_prepare_migration(page, &mem);
 	if (charge == -ENOMEM) {
@@ -685,7 +699,7 @@ move_newpage:
  * Return: Number of pages not migrated or error code.
  */
 int migrate_pages(struct list_head *from,
-		new_page_t get_new_page, unsigned long private)
+		new_page_t get_new_page, unsigned long private, int offlining)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -705,7 +719,7 @@ int migrate_pages(struct list_head *from,
 			cond_resched();
 
 			rc = unmap_and_move(get_new_page, private,
-						page, pass > 2);
+						page, pass > 2, offlining);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -801,7 +815,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
 		if (!page)
 			goto set_status;
 
-		if (PageReserved(page))		/* Check for zero page */
+		/* Use PageReserved to check for zero page */
+		if (PageReserved(page) || PageKsm(page))
 			goto put_and_set;
 
 		pp->page = page;
@@ -838,7 +853,7 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist))
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm);
+				(unsigned long)pm, 0);
 
 	up_read(&mm->mmap_sem);
 	return err;
@@ -959,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
 
 		err = -ENOENT;
 		/* Use PageReserved to check for zero page */
-		if (!page || PageReserved(page))
+		if (!page || PageReserved(page) || PageKsm(page))
 			goto set_status;
 
 		err = page_to_nid(page);
-- 
cgit v1.2.3


From b4e655a4aaa327810110457cef92681447dd13e4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 14 Dec 2009 17:59:35 -0800
Subject: mm: memory_hotplug: make offline_pages() static

It has no references outside memory_hotplug.c.

Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 1 -
 mm/memory_hotplug.c            | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index fed969281a41..35b07b773e6c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -69,7 +69,6 @@ extern void online_page(struct page *page);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
-extern int offline_pages(unsigned long, unsigned long, unsigned long);
 
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 67e941d7882c..f827cf4cb4e5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -751,7 +751,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	return offlined;
 }
 
-int offline_pages(unsigned long start_pfn,
+static int offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn, unsigned long timeout)
 {
 	unsigned long pfn, nr_pages, expire;
-- 
cgit v1.2.3


From f096e59e844ba3c5d5a7b54b3deafd2aeeebf921 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Mon, 14 Dec 2009 17:59:51 -0800
Subject: include/linux/mm.h: remove unneeded ifdef

The check code for CONFIG_SWAP is redundant, because there is a
non-CONFIG_SWAP version for PageSwapCache() which just returns 0.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1202cd3121e1..52b264563cd9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -643,12 +643,9 @@ static inline struct address_space *page_mapping(struct page *page)
 	struct address_space *mapping = page->mapping;
 
 	VM_BUG_ON(PageSlab(page));
-#ifdef CONFIG_SWAP
 	if (unlikely(PageSwapCache(page)))
 		mapping = &swapper_space;
-	else
-#endif
-	if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
+	else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
 		mapping = NULL;
 	return mapping;
 }
-- 
cgit v1.2.3


From 5dc37642cbce34619e4588a9f0bdad1d2f870956 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 14 Dec 2009 18:00:01 -0800
Subject: mm hugetlb: add hugepage support to pagemap

This patch enables extraction of the pfn of a hugepage from
/proc/pid/pagemap in an architecture independent manner.

Details
-------
My test program (leak_pagemap) works as follows:
 - creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,)
 - read()/write() something on it,
 - call page-types with option -p,
 - munmap() and unlink() the file on hugetlbfs

Without my patches
------------------
$ ./leak_pagemap
             flags page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          1        0  __________________________________
0x0000000000000804          1        0  __R________M______________________ referenced,mmap
0x000000000000086c         81        0  __RU_lA____M______________________ referenced,uptodate,lru,active,mmap
0x0000000000005808          5        0  ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked
0x0000000000005868         12        0  ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c          1        0  __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total        101        0

The output of page-types don't show any hugepage.

With my patches
---------------
$ ./leak_pagemap
             flags page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          1        0  __________________________________
0x0000000000030000      51100      199  ________________TG________________ compound_tail,huge
0x0000000000028018        100        0  ___UD__________H_G________________ uptodate,dirty,compound_head,huge
0x0000000000000804          1        0  __R________M______________________ referenced,mmap
0x000000000000080c          1        0  __RU_______M______________________ referenced,uptodate,mmap
0x000000000000086c         80        0  __RU_lA____M______________________ referenced,uptodate,lru,active,mmap
0x0000000000005808          4        0  ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked
0x0000000000005868         12        0  ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c          1        0  __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total      51300      200

The output of page-types shows 51200 pages contributing to hugepages,
containing 100 head pages and 51100 tail pages as expected.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  3 +++
 mm/pagewalk.c      | 22 +++++++++++++++++++---
 3 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2a1bef9203c6..47c03f4336b8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -650,6 +650,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	return err;
 }
 
+static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
+{
+	u64 pme = 0;
+	if (pte_present(pte))
+		pme = PM_PFRAME(pte_pfn(pte) + offset)
+			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+	return pme;
+}
+
+static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
+				 unsigned long end, struct mm_walk *walk)
+{
+	struct vm_area_struct *vma;
+	struct pagemapread *pm = walk->private;
+	struct hstate *hs = NULL;
+	int err = 0;
+
+	vma = find_vma(walk->mm, addr);
+	if (vma)
+		hs = hstate_vma(vma);
+	for (; addr != end; addr += PAGE_SIZE) {
+		u64 pfn = PM_NOT_PRESENT;
+
+		if (vma && (addr >= vma->vm_end)) {
+			vma = find_vma(walk->mm, addr);
+			if (vma)
+				hs = hstate_vma(vma);
+		}
+
+		if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
+			/* calculate pfn of the "raw" page in the hugepage. */
+			int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
+			pfn = huge_pte_to_pagemap_entry(*pte, offset);
+		}
+		err = add_to_pagemap(addr, pfn, pm);
+		if (err)
+			return err;
+	}
+
+	cond_resched();
+
+	return err;
+}
+
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -742,6 +786,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 
 	pagemap_walk.pmd_entry = pagemap_pte_range;
 	pagemap_walk.pte_hole = pagemap_pte_hole;
+	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
 	pagemap_walk.mm = mm;
 	pagemap_walk.private = &pm;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52b264563cd9..9d65ae4ba0e0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -770,6 +770,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
  * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
  * @pte_hole: if set, called for each hole at all levels
+ * @hugetlb_entry: if set, called for each hugetlb entry
  *
  * (see walk_page_range for more details)
  */
@@ -779,6 +780,8 @@ struct mm_walk {
 	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
 	int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
 	int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
+	int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long,
+			     struct mm_walk *);
 	struct mm_struct *mm;
 	void *private;
 };
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index a286915e23ef..7b47a57b6646 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -120,15 +120,31 @@ int walk_page_range(unsigned long addr, unsigned long end,
 	do {
 		next = pgd_addr_end(addr, end);
 
-		/* skip hugetlb vma to avoid hugepage PMD being cleared
-		 * in pmd_none_or_clear_bad(). */
+		/*
+		 * handle hugetlb vma individually because pagetable walk for
+		 * the hugetlb page is dependent on the architecture and
+		 * we can't handled it in the same manner as non-huge pages.
+		 */
 		vma = find_vma(walk->mm, addr);
+#ifdef CONFIG_HUGETLB_PAGE
 		if (vma && is_vm_hugetlb_page(vma)) {
+			pte_t *pte;
+			struct hstate *hs;
+
 			if (vma->vm_end < next)
 				next = vma->vm_end;
+			hs = hstate_vma(vma);
+			pte = huge_pte_offset(walk->mm,
+					      addr & huge_page_mask(hs));
+			if (pte && !huge_pte_none(huge_ptep_get(pte))
+			    && walk->hugetlb_entry)
+				err = walk->hugetlb_entry(pte, addr,
+							  next, walk);
+			if (err)
+				break;
 			continue;
 		}
-
+#endif
 		if (pgd_none_or_clear_bad(pgd)) {
 			if (walk->pte_hole)
 				err = walk->pte_hole(addr, next, walk);
-- 
cgit v1.2.3


From 471452104b8520337ae2fb48c4e61cd4896e025d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Dec 2009 18:00:08 -0800
Subject: const: constify remaining dev_pm_ops

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/plat-omap/debug-leds.c            | 2 +-
 arch/arm/plat-omap/gpio.c                  | 2 +-
 arch/s390/appldata/appldata_base.c         | 2 +-
 drivers/block/floppy.c                     | 2 +-
 drivers/char/hvc_iucv.c                    | 2 +-
 drivers/dma/at_hdmac.c                     | 2 +-
 drivers/dma/dw_dmac.c                      | 2 +-
 drivers/dma/txx9dmac.c                     | 2 +-
 drivers/hwmon/applesmc.c                   | 2 +-
 drivers/i2c/busses/i2c-pxa.c               | 2 +-
 drivers/i2c/busses/i2c-s3c2410.c           | 2 +-
 drivers/i2c/busses/i2c-sh_mobile.c         | 2 +-
 drivers/input/keyboard/adp5588-keys.c      | 2 +-
 drivers/input/keyboard/sh_keysc.c          | 2 +-
 drivers/input/misc/bfin_rotary.c           | 2 +-
 drivers/input/misc/pcspkr.c                | 2 +-
 drivers/input/touchscreen/pcap_ts.c        | 2 +-
 drivers/media/video/davinci/vpfe_capture.c | 2 +-
 drivers/media/video/davinci/vpif_capture.c | 2 +-
 drivers/media/video/sh_mobile_ceu_camera.c | 2 +-
 drivers/mmc/host/pxamci.c                  | 2 +-
 drivers/mmc/host/s3cmci.c                  | 2 +-
 drivers/mtd/nand/nomadik_nand.c            | 2 +-
 drivers/net/3c59x.c                        | 2 +-
 drivers/net/dm9000.c                       | 2 +-
 drivers/net/r8169.c                        | 2 +-
 drivers/net/smsc911x.c                     | 2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c          | 2 +-
 drivers/pci/pcie/portdrv_pci.c             | 2 +-
 drivers/pcmcia/pxa2xx_base.c               | 2 +-
 drivers/pcmcia/yenta_socket.c              | 2 +-
 drivers/platform/x86/acerhdf.c             | 2 +-
 drivers/platform/x86/eeepc-laptop.c        | 2 +-
 drivers/platform/x86/hp-wmi.c              | 2 +-
 drivers/power/wm97xx_battery.c             | 2 +-
 drivers/rtc/rtc-pxa.c                      | 2 +-
 drivers/rtc/rtc-sa1100.c                   | 2 +-
 drivers/rtc/rtc-sh.c                       | 2 +-
 drivers/rtc/rtc-wm831x.c                   | 2 +-
 drivers/s390/block/dcssblk.c               | 2 +-
 drivers/s390/block/xpram.c                 | 2 +-
 drivers/s390/char/monreader.c              | 2 +-
 drivers/s390/char/monwriter.c              | 2 +-
 drivers/s390/char/sclp.c                   | 2 +-
 drivers/s390/char/sclp_cmd.c               | 2 +-
 drivers/s390/char/vmlogrdr.c               | 2 +-
 drivers/s390/cio/ccwgroup.c                | 2 +-
 drivers/s390/cio/css.c                     | 2 +-
 drivers/s390/cio/device.c                  | 2 +-
 drivers/s390/net/netiucv.c                 | 2 +-
 drivers/s390/net/smsgiucv.c                | 2 +-
 drivers/serial/pxa.c                       | 2 +-
 drivers/serial/sh-sci.c                    | 2 +-
 drivers/spi/pxa2xx_spi.c                   | 2 +-
 drivers/spi/spi_s3c24xx.c                  | 2 +-
 drivers/uio/uio_pdrv_genirq.c              | 2 +-
 drivers/usb/core/hcd-pci.c                 | 2 +-
 drivers/usb/core/hcd.h                     | 2 +-
 drivers/usb/core/usb.c                     | 2 +-
 drivers/usb/host/ehci-au1xxx.c             | 2 +-
 drivers/usb/host/ohci-au1xxx.c             | 2 +-
 drivers/usb/host/ohci-pxa27x.c             | 2 +-
 drivers/usb/host/r8a66597-hcd.c            | 2 +-
 drivers/usb/musb/musb_core.c               | 2 +-
 drivers/video/backlight/da903x_bl.c        | 2 +-
 drivers/video/hitfb.c                      | 2 +-
 drivers/video/pxafb.c                      | 2 +-
 drivers/video/sh_mobile_lcdcfb.c           | 2 +-
 drivers/watchdog/adx_wdt.c                 | 2 +-
 include/linux/pm.h                         | 2 +-
 net/iucv/af_iucv.c                         | 2 +-
 net/iucv/iucv.c                            | 2 +-
 sound/arm/pxa2xx-ac97.c                    | 2 +-
 sound/soc/s3c24xx/s3c24xx_simtec.c         | 2 +-
 sound/soc/s3c24xx/s3c24xx_simtec.h         | 2 +-
 sound/soc/soc-core.c                       | 2 +-
 76 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/debug-leds.c b/arch/arm/plat-omap/debug-leds.c
index 6c768b71ad64..53fcef7c5201 100644
--- a/arch/arm/plat-omap/debug-leds.c
+++ b/arch/arm/plat-omap/debug-leds.c
@@ -293,7 +293,7 @@ static int fpga_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops fpga_dev_pm_ops = {
+static const struct dev_pm_ops fpga_dev_pm_ops = {
 	.suspend_noirq = fpga_suspend_noirq,
 	.resume_noirq = fpga_resume_noirq,
 };
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 055160e0620e..04846811d0aa 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1431,7 +1431,7 @@ static int omap_mpuio_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops omap_mpuio_dev_pm_ops = {
+static const struct dev_pm_ops omap_mpuio_dev_pm_ops = {
 	.suspend_noirq = omap_mpuio_suspend_noirq,
 	.resume_noirq = omap_mpuio_resume_noirq,
 };
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 495589950dc7..5c91995b74e4 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -551,7 +551,7 @@ static int appldata_thaw(struct device *dev)
 	return appldata_restore(dev);
 }
 
-static struct dev_pm_ops appldata_pm_ops = {
+static const struct dev_pm_ops appldata_pm_ops = {
 	.freeze		= appldata_freeze,
 	.thaw		= appldata_thaw,
 	.restore	= appldata_restore,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 5c01f747571b..d41d7f018549 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4162,7 +4162,7 @@ static int floppy_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops floppy_pm_ops = {
+static const struct dev_pm_ops floppy_pm_ops = {
 	.resume = floppy_resume,
 	.restore = floppy_resume,
 };
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index b8a5d654d3d0..fe62bd0e17b7 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -931,7 +931,7 @@ static struct hv_ops hvc_iucv_ops = {
 };
 
 /* Suspend / resume device operations */
-static struct dev_pm_ops hvc_iucv_pm_ops = {
+static const struct dev_pm_ops hvc_iucv_pm_ops = {
 	.freeze	  = hvc_iucv_pm_freeze,
 	.thaw	  = hvc_iucv_pm_restore_thaw,
 	.restore  = hvc_iucv_pm_restore_thaw,
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c52ac9efd0bf..f15112569c1d 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1188,7 +1188,7 @@ static int at_dma_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops at_dma_dev_pm_ops = {
+static const struct dev_pm_ops at_dma_dev_pm_ops = {
 	.suspend_noirq = at_dma_suspend_noirq,
 	.resume_noirq = at_dma_resume_noirq,
 };
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 2eea823516a7..285bed0fe17b 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1427,7 +1427,7 @@ static int dw_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops dw_dev_pm_ops = {
+static const struct dev_pm_ops dw_dev_pm_ops = {
 	.suspend_noirq = dw_suspend_noirq,
 	.resume_noirq = dw_resume_noirq,
 };
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index fb6bb64e8861..3ebc61067e54 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -1313,7 +1313,7 @@ static int txx9dmac_resume_noirq(struct device *dev)
 
 }
 
-static struct dev_pm_ops txx9dmac_dev_pm_ops = {
+static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
 	.suspend_noirq = txx9dmac_suspend_noirq,
 	.resume_noirq = txx9dmac_resume_noirq,
 };
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 7ea6a8f66056..c1605b528e8f 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -518,7 +518,7 @@ static int applesmc_pm_restore(struct device *dev)
 	return applesmc_pm_resume(dev);
 }
 
-static struct dev_pm_ops applesmc_pm_ops = {
+static const struct dev_pm_ops applesmc_pm_ops = {
 	.resume = applesmc_pm_resume,
 	.restore = applesmc_pm_restore,
 };
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 049555777f67..7647a20523a0 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1155,7 +1155,7 @@ static int i2c_pxa_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops i2c_pxa_dev_pm_ops = {
+static const struct dev_pm_ops i2c_pxa_dev_pm_ops = {
 	.suspend_noirq = i2c_pxa_suspend_noirq,
 	.resume_noirq = i2c_pxa_resume_noirq,
 };
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 96aafb91b69a..1d8c98613fa0 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -967,7 +967,7 @@ static int s3c24xx_i2c_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
+static const struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
 	.suspend_noirq = s3c24xx_i2c_suspend_noirq,
 	.resume = s3c24xx_i2c_resume,
 };
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 86a9d4e81472..ccc46418ef7f 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -647,7 +647,7 @@ static int sh_mobile_i2c_runtime_nop(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sh_mobile_i2c_dev_pm_ops = {
+static const struct dev_pm_ops sh_mobile_i2c_dev_pm_ops = {
 	.runtime_suspend = sh_mobile_i2c_runtime_nop,
 	.runtime_resume = sh_mobile_i2c_runtime_nop,
 };
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index d48c808d5928..1edb596d927b 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -319,7 +319,7 @@ static int adp5588_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops adp5588_dev_pm_ops = {
+static const struct dev_pm_ops adp5588_dev_pm_ops = {
 	.suspend = adp5588_suspend,
 	.resume  = adp5588_resume,
 };
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 076111fc72d2..8e9380bfed40 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -295,7 +295,7 @@ static int sh_keysc_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sh_keysc_dev_pm_ops = {
+static const struct dev_pm_ops sh_keysc_dev_pm_ops = {
 	.suspend = sh_keysc_suspend,
 	.resume = sh_keysc_resume,
 };
diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c
index 690f3fafa03b..61d10177fa83 100644
--- a/drivers/input/misc/bfin_rotary.c
+++ b/drivers/input/misc/bfin_rotary.c
@@ -247,7 +247,7 @@ static int bfin_rotary_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops bfin_rotary_pm_ops = {
+static const struct dev_pm_ops bfin_rotary_pm_ops = {
 	.suspend	= bfin_rotary_suspend,
 	.resume		= bfin_rotary_resume,
 };
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index 21cb755a54fb..ea4e1fd12651 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -127,7 +127,7 @@ static void pcspkr_shutdown(struct platform_device *dev)
 	pcspkr_event(NULL, EV_SND, SND_BELL, 0);
 }
 
-static struct dev_pm_ops pcspkr_pm_ops = {
+static const struct dev_pm_ops pcspkr_pm_ops = {
 	.suspend = pcspkr_suspend,
 };
 
diff --git a/drivers/input/touchscreen/pcap_ts.c b/drivers/input/touchscreen/pcap_ts.c
index 67fcd33595de..b79097e3028a 100644
--- a/drivers/input/touchscreen/pcap_ts.c
+++ b/drivers/input/touchscreen/pcap_ts.c
@@ -233,7 +233,7 @@ static int pcap_ts_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops pcap_ts_pm_ops = {
+static const struct dev_pm_ops pcap_ts_pm_ops = {
 	.suspend	= pcap_ts_suspend,
 	.resume		= pcap_ts_resume,
 };
diff --git a/drivers/media/video/davinci/vpfe_capture.c b/drivers/media/video/davinci/vpfe_capture.c
index 12a1b3d7132d..c3916a42668e 100644
--- a/drivers/media/video/davinci/vpfe_capture.c
+++ b/drivers/media/video/davinci/vpfe_capture.c
@@ -2127,7 +2127,7 @@ vpfe_resume(struct device *dev)
 	return -1;
 }
 
-static struct dev_pm_ops vpfe_dev_pm_ops = {
+static const struct dev_pm_ops vpfe_dev_pm_ops = {
 	.suspend = vpfe_suspend,
 	.resume = vpfe_resume,
 };
diff --git a/drivers/media/video/davinci/vpif_capture.c b/drivers/media/video/davinci/vpif_capture.c
index d947ee5e4eb4..78130721f578 100644
--- a/drivers/media/video/davinci/vpif_capture.c
+++ b/drivers/media/video/davinci/vpif_capture.c
@@ -2107,7 +2107,7 @@ vpif_resume(struct device *dev)
 	return -1;
 }
 
-static struct dev_pm_ops vpif_dev_pm_ops = {
+static const struct dev_pm_ops vpif_dev_pm_ops = {
 	.suspend = vpif_suspend,
 	.resume = vpif_resume,
 };
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index a4f3472d4db8..961e4484d721 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -1825,7 +1825,7 @@ static int sh_mobile_ceu_runtime_nop(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sh_mobile_ceu_dev_pm_ops = {
+static const struct dev_pm_ops sh_mobile_ceu_dev_pm_ops = {
 	.runtime_suspend = sh_mobile_ceu_runtime_nop,
 	.runtime_resume = sh_mobile_ceu_runtime_nop,
 };
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index bb47ff465c04..0d783f3e79ed 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -828,7 +828,7 @@ static int pxamci_resume(struct device *dev)
 	return ret;
 }
 
-static struct dev_pm_ops pxamci_pm_ops = {
+static const struct dev_pm_ops pxamci_pm_ops = {
 	.suspend	= pxamci_suspend,
 	.resume		= pxamci_resume,
 };
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 941a4d35ef8d..ec15f1bbf8b9 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1892,7 +1892,7 @@ static int s3cmci_resume(struct device *dev)
 	return mmc_resume_host(mmc);
 }
 
-static struct dev_pm_ops s3cmci_pm = {
+static const struct dev_pm_ops s3cmci_pm = {
 	.suspend	= s3cmci_suspend,
 	.resume		= s3cmci_resume,
 };
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
index 7c302d55910e..66123419f65d 100644
--- a/drivers/mtd/nand/nomadik_nand.c
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -216,7 +216,7 @@ static int nomadik_nand_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops nomadik_nand_pm_ops = {
+static const struct dev_pm_ops nomadik_nand_pm_ops = {
 	.suspend = nomadik_nand_suspend,
 	.resume = nomadik_nand_resume,
 };
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 78b7167a8ce3..39db0e96815d 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -837,7 +837,7 @@ static int vortex_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops vortex_pm_ops = {
+static const struct dev_pm_ops vortex_pm_ops = {
 	.suspend = vortex_suspend,
 	.resume = vortex_resume,
 	.freeze = vortex_suspend,
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 0cbe3c0e7c06..b37730065688 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -1646,7 +1646,7 @@ dm9000_drv_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops dm9000_drv_pm_ops = {
+static const struct dev_pm_ops dm9000_drv_pm_ops = {
 	.suspend	= dm9000_drv_suspend,
 	.resume		= dm9000_drv_resume,
 };
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index acfc5a3aa490..60f96c468a24 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4859,7 +4859,7 @@ out:
 	return 0;
 }
 
-static struct dev_pm_ops rtl8169_pm_ops = {
+static const struct dev_pm_ops rtl8169_pm_ops = {
 	.suspend = rtl8169_suspend,
 	.resume = rtl8169_resume,
 	.freeze = rtl8169_suspend,
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 20d6095cf411..494cd91ea39c 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -2154,7 +2154,7 @@ static int smsc911x_resume(struct device *dev)
 	return (to == 0) ? -EIO : 0;
 }
 
-static struct dev_pm_ops smsc911x_pm_ops = {
+static const struct dev_pm_ops smsc911x_pm_ops = {
 	.suspend	= smsc911x_suspend,
 	.resume		= smsc911x_resume,
 };
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1ceb9d0f8b97..9cc438282d77 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2689,7 +2689,7 @@ vmxnet3_resume(struct device *device)
 	return 0;
 }
 
-static struct dev_pm_ops vmxnet3_pm_ops = {
+static const struct dev_pm_ops vmxnet3_pm_ops = {
 	.suspend = vmxnet3_suspend,
 	.resume = vmxnet3_resume,
 };
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index ce52ea34fee5..a49452e2aed9 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -43,7 +43,7 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
 }
 
 #ifdef CONFIG_PM
-static struct dev_pm_ops pcie_portdrv_pm_ops = {
+static const struct dev_pm_ops pcie_portdrv_pm_ops = {
 	.suspend	= pcie_port_device_suspend,
 	.resume		= pcie_port_device_resume,
 	.freeze		= pcie_port_device_suspend,
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index da346eb7e77e..3aabf1e37988 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -336,7 +336,7 @@ static int pxa2xx_drv_pcmcia_resume(struct device *dev)
 	return pcmcia_socket_dev_resume(dev);
 }
 
-static struct dev_pm_ops  pxa2xx_drv_pcmcia_pm_ops = {
+static const struct dev_pm_ops pxa2xx_drv_pcmcia_pm_ops = {
 	.suspend	= pxa2xx_drv_pcmcia_suspend,
 	.resume		= pxa2xx_drv_pcmcia_resume,
 };
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index fe02cfd4b5e9..e4d12acdd525 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1330,7 +1330,7 @@ static int yenta_dev_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops yenta_pm_ops = {
+static const struct dev_pm_ops yenta_pm_ops = {
 	.suspend_noirq = yenta_dev_suspend_noirq,
 	.resume_noirq = yenta_dev_resume_noirq,
 	.resume = yenta_dev_resume,
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index ab64522aaa64..be27aa47e810 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -460,7 +460,7 @@ static int acerhdf_remove(struct platform_device *device)
 	return 0;
 }
 
-static struct dev_pm_ops acerhdf_pm_ops = {
+static const struct dev_pm_ops acerhdf_pm_ops = {
 	.suspend = acerhdf_suspend,
 	.freeze  = acerhdf_suspend,
 };
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 4226e5352738..e647a856b9bf 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -154,7 +154,7 @@ static struct eeepc_hotk *ehotk;
 static int eeepc_hotk_thaw(struct device *device);
 static int eeepc_hotk_restore(struct device *device);
 
-static struct dev_pm_ops eeepc_pm_ops = {
+static const struct dev_pm_ops eeepc_pm_ops = {
 	.thaw = eeepc_hotk_thaw,
 	.restore = eeepc_hotk_restore,
 };
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index c2842171cec6..f00a71c58e69 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -94,7 +94,7 @@ static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
 
-static struct dev_pm_ops hp_wmi_pm_ops = {
+static const struct dev_pm_ops hp_wmi_pm_ops = {
 	.resume  = hp_wmi_resume_handler,
 	.restore  = hp_wmi_resume_handler,
 };
diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index f2bfd296dbae..fa39e759a275 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -157,7 +157,7 @@ static int wm97xx_bat_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops wm97xx_bat_pm_ops = {
+static const struct dev_pm_ops wm97xx_bat_pm_ops = {
 	.suspend	= wm97xx_bat_suspend,
 	.resume		= wm97xx_bat_resume,
 };
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 747ca194fad4..e6351b743da6 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -456,7 +456,7 @@ static int pxa_rtc_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops pxa_rtc_pm_ops = {
+static const struct dev_pm_ops pxa_rtc_pm_ops = {
 	.suspend	= pxa_rtc_suspend,
 	.resume		= pxa_rtc_resume,
 };
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 29f98a70586e..e4a44b641702 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -407,7 +407,7 @@ static int sa1100_rtc_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sa1100_rtc_pm_ops = {
+static const struct dev_pm_ops sa1100_rtc_pm_ops = {
 	.suspend	= sa1100_rtc_suspend,
 	.resume		= sa1100_rtc_resume,
 };
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index e6ed5404bca0..e95cc6f8d61e 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -826,7 +826,7 @@ static int sh_rtc_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sh_rtc_dev_pm_ops = {
+static const struct dev_pm_ops sh_rtc_dev_pm_ops = {
 	.suspend = sh_rtc_suspend,
 	.resume = sh_rtc_resume,
 };
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 79795cdf6ed8..000c7e481e59 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -485,7 +485,7 @@ static int __devexit wm831x_rtc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct dev_pm_ops wm831x_rtc_pm_ops = {
+static const struct dev_pm_ops wm831x_rtc_pm_ops = {
 	.suspend = wm831x_rtc_suspend,
 	.resume = wm831x_rtc_resume,
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index f76f4bd82b9f..9b43ae94beba 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -1005,7 +1005,7 @@ static int dcssblk_thaw(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops dcssblk_pm_ops = {
+static const struct dev_pm_ops dcssblk_pm_ops = {
 	.freeze		= dcssblk_freeze,
 	.thaw		= dcssblk_thaw,
 	.restore	= dcssblk_restore,
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 116d1b3eeb15..118de392af63 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -407,7 +407,7 @@ static int xpram_restore(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops xpram_pm_ops = {
+static const struct dev_pm_ops xpram_pm_ops = {
 	.restore	= xpram_restore,
 };
 
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 60473f86e1f9..33e96484d54f 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -529,7 +529,7 @@ static int monreader_restore(struct device *dev)
 	return monreader_thaw(dev);
 }
 
-static struct dev_pm_ops monreader_pm_ops = {
+static const struct dev_pm_ops monreader_pm_ops = {
 	.freeze  = monreader_freeze,
 	.thaw	 = monreader_thaw,
 	.restore = monreader_restore,
diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c
index 6532ed8b4afa..668a0579b26b 100644
--- a/drivers/s390/char/monwriter.c
+++ b/drivers/s390/char/monwriter.c
@@ -323,7 +323,7 @@ static int monwriter_thaw(struct device *dev)
 	return monwriter_restore(dev);
 }
 
-static struct dev_pm_ops monwriter_pm_ops = {
+static const struct dev_pm_ops monwriter_pm_ops = {
 	.freeze		= monwriter_freeze,
 	.thaw		= monwriter_thaw,
 	.restore	= monwriter_restore,
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index a983f5086788..ec88c59842e3 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -1019,7 +1019,7 @@ static int sclp_restore(struct device *dev)
 	return sclp_undo_suspend(SCLP_PM_EVENT_RESTORE);
 }
 
-static struct dev_pm_ops sclp_pm_ops = {
+static const struct dev_pm_ops sclp_pm_ops = {
 	.freeze		= sclp_freeze,
 	.thaw		= sclp_thaw,
 	.restore	= sclp_restore,
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 28b5afc129c3..b3beab610da4 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -547,7 +547,7 @@ struct read_storage_sccb {
 	u32 entries[0];
 } __packed;
 
-static struct dev_pm_ops sclp_mem_pm_ops = {
+static const struct dev_pm_ops sclp_mem_pm_ops = {
 	.freeze		= sclp_mem_freeze,
 };
 
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 899aa795bf38..7dfa5412d5a8 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -675,7 +675,7 @@ static int vmlogrdr_pm_prepare(struct device *dev)
 }
 
 
-static struct dev_pm_ops vmlogrdr_pm_ops = {
+static const struct dev_pm_ops vmlogrdr_pm_ops = {
 	.prepare = vmlogrdr_pm_prepare,
 };
 
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index a5a62f1f7747..5f97ea2ee6b1 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -560,7 +560,7 @@ static int ccwgroup_pm_restore(struct device *dev)
 	return gdrv->restore ? gdrv->restore(gdev) : 0;
 }
 
-static struct dev_pm_ops ccwgroup_pm_ops = {
+static const struct dev_pm_ops ccwgroup_pm_ops = {
 	.prepare = ccwgroup_pm_prepare,
 	.complete = ccwgroup_pm_complete,
 	.freeze = ccwgroup_pm_freeze,
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 92ff88ac1107..7679aee6fa14 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -1148,7 +1148,7 @@ static int css_pm_restore(struct device *dev)
 	return drv->restore ? drv->restore(sch) : 0;
 }
 
-static struct dev_pm_ops css_pm_ops = {
+static const struct dev_pm_ops css_pm_ops = {
 	.prepare = css_pm_prepare,
 	.complete = css_pm_complete,
 	.freeze = css_pm_freeze,
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 9fecfb4223a8..73901c9e260f 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1904,7 +1904,7 @@ out_unlock:
 	return ret;
 }
 
-static struct dev_pm_ops ccw_pm_ops = {
+static const struct dev_pm_ops ccw_pm_ops = {
 	.prepare = ccw_device_pm_prepare,
 	.complete = ccw_device_pm_complete,
 	.freeze = ccw_device_pm_freeze,
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 98c04cac43c1..65ebee0a3266 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -159,7 +159,7 @@ static void netiucv_pm_complete(struct device *);
 static int netiucv_pm_freeze(struct device *);
 static int netiucv_pm_restore_thaw(struct device *);
 
-static struct dev_pm_ops netiucv_pm_ops = {
+static const struct dev_pm_ops netiucv_pm_ops = {
 	.prepare = netiucv_pm_prepare,
 	.complete = netiucv_pm_complete,
 	.freeze = netiucv_pm_freeze,
diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c
index 3012355f8304..67f2485d2372 100644
--- a/drivers/s390/net/smsgiucv.c
+++ b/drivers/s390/net/smsgiucv.c
@@ -168,7 +168,7 @@ static int smsg_pm_restore_thaw(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops smsg_pm_ops = {
+static const struct dev_pm_ops smsg_pm_ops = {
 	.freeze = smsg_pm_freeze,
 	.thaw = smsg_pm_restore_thaw,
 	.restore = smsg_pm_restore_thaw,
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index 4a821046baae..56ee082157aa 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -756,7 +756,7 @@ static int serial_pxa_resume(struct device *dev)
         return 0;
 }
 
-static struct dev_pm_ops serial_pxa_pm_ops = {
+static const struct dev_pm_ops serial_pxa_pm_ops = {
 	.suspend	= serial_pxa_suspend,
 	.resume		= serial_pxa_resume,
 };
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index ff38dbdb5c6e..7e3f4ff58cfd 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1312,7 +1312,7 @@ static int sci_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sci_dev_pm_ops = {
+static const struct dev_pm_ops sci_dev_pm_ops = {
 	.suspend	= sci_suspend,
 	.resume		= sci_resume,
 };
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index c8c2b693ffac..c2f707e5ce74 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1709,7 +1709,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops pxa2xx_spi_pm_ops = {
+static const struct dev_pm_ops pxa2xx_spi_pm_ops = {
 	.suspend	= pxa2xx_spi_suspend,
 	.resume		= pxa2xx_spi_resume,
 };
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 33d94f76b9ef..276591569c8b 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -489,7 +489,7 @@ static int s3c24xx_spi_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops s3c24xx_spi_pmops = {
+static const struct dev_pm_ops s3c24xx_spi_pmops = {
 	.suspend	= s3c24xx_spi_suspend,
 	.resume		= s3c24xx_spi_resume,
 };
diff --git a/drivers/uio/uio_pdrv_genirq.c b/drivers/uio/uio_pdrv_genirq.c
index aa53db9f2e88..1ef3b8fc50b3 100644
--- a/drivers/uio/uio_pdrv_genirq.c
+++ b/drivers/uio/uio_pdrv_genirq.c
@@ -210,7 +210,7 @@ static int uio_pdrv_genirq_runtime_nop(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops uio_pdrv_genirq_dev_pm_ops = {
+static const struct dev_pm_ops uio_pdrv_genirq_dev_pm_ops = {
 	.runtime_suspend = uio_pdrv_genirq_runtime_nop,
 	.runtime_resume = uio_pdrv_genirq_runtime_nop,
 };
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 91f2885b6ee1..2dcf906df569 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -363,7 +363,7 @@ static int hcd_pci_restore(struct device *dev)
 	return resume_common(dev, true);
 }
 
-struct dev_pm_ops usb_hcd_pci_pm_ops = {
+const struct dev_pm_ops usb_hcd_pci_pm_ops = {
 	.suspend	= hcd_pci_suspend,
 	.suspend_noirq	= hcd_pci_suspend_noirq,
 	.resume_noirq	= hcd_pci_resume_noirq,
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index d8b43aee581e..bbe2b924aae8 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -330,7 +330,7 @@ extern void usb_hcd_pci_remove(struct pci_dev *dev);
 extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
 
 #ifdef CONFIG_PM_SLEEP
-extern struct dev_pm_ops	usb_hcd_pci_pm_ops;
+extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
 #endif
 #endif /* CONFIG_PCI */
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4e2c6df8d3cc..043fa833eeca 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -320,7 +320,7 @@ static int usb_dev_restore(struct device *dev)
 	return usb_resume(dev, PMSG_RESTORE);
 }
 
-static struct dev_pm_ops usb_device_pm_ops = {
+static const struct dev_pm_ops usb_device_pm_ops = {
 	.prepare =	usb_dev_prepare,
 	.complete =	usb_dev_complete,
 	.suspend =	usb_dev_suspend,
diff --git a/drivers/usb/host/ehci-au1xxx.c b/drivers/usb/host/ehci-au1xxx.c
index ed77be76d6bb..dbfb482a94e3 100644
--- a/drivers/usb/host/ehci-au1xxx.c
+++ b/drivers/usb/host/ehci-au1xxx.c
@@ -297,7 +297,7 @@ static int ehci_hcd_au1xxx_drv_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops au1xxx_ehci_pmops = {
+static const struct dev_pm_ops au1xxx_ehci_pmops = {
 	.suspend	= ehci_hcd_au1xxx_drv_suspend,
 	.resume		= ehci_hcd_au1xxx_drv_resume,
 };
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index e4380082ebb1..17a6043c1fa0 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -294,7 +294,7 @@ static int ohci_hcd_au1xxx_drv_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops au1xxx_ohci_pmops = {
+static const struct dev_pm_ops au1xxx_ohci_pmops = {
 	.suspend	= ohci_hcd_au1xxx_drv_suspend,
 	.resume		= ohci_hcd_au1xxx_drv_resume,
 };
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index f1c06202fdf2..a18debdd79b8 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -518,7 +518,7 @@ static int ohci_hcd_pxa27x_drv_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops ohci_hcd_pxa27x_pm_ops = {
+static const struct dev_pm_ops ohci_hcd_pxa27x_pm_ops = {
 	.suspend	= ohci_hcd_pxa27x_drv_suspend,
 	.resume		= ohci_hcd_pxa27x_drv_resume,
 };
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 41dbc70ae752..b7a661c02bcd 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -2353,7 +2353,7 @@ static int r8a66597_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops r8a66597_dev_pm_ops = {
+static const struct dev_pm_ops r8a66597_dev_pm_ops = {
 	.suspend = r8a66597_suspend,
 	.resume = r8a66597_resume,
 	.poweroff = r8a66597_suspend,
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 49f2346afad3..bfe08f4975a3 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2214,7 +2214,7 @@ static int musb_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops musb_dev_pm_ops = {
+static const struct dev_pm_ops musb_dev_pm_ops = {
 	.suspend	= musb_suspend,
 	.resume_noirq	= musb_resume_noirq,
 };
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 7fcb0eb54c60..f2d76dae1eb3 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -177,7 +177,7 @@ static int da903x_backlight_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops da903x_backlight_pm_ops = {
+static const struct dev_pm_ops da903x_backlight_pm_ops = {
 	.suspend	= da903x_backlight_suspend,
 	.resume		= da903x_backlight_resume,
 };
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index e7116a6d82d3..73c83a8de2d3 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -456,7 +456,7 @@ static int hitfb_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops hitfb_dev_pm_ops = {
+static const struct dev_pm_ops hitfb_dev_pm_ops = {
 	.suspend	= hitfb_suspend,
 	.resume		= hitfb_resume,
 };
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index f58a3aae6ea6..b7e58059b592 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -1667,7 +1667,7 @@ static int pxafb_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops pxafb_pm_ops = {
+static const struct dev_pm_ops pxafb_pm_ops = {
 	.suspend	= pxafb_suspend,
 	.resume		= pxafb_resume,
 };
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index b4b5de930cf5..8a65fb6648a6 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -890,7 +890,7 @@ static int sh_mobile_lcdc_runtime_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops sh_mobile_lcdc_dev_pm_ops = {
+static const struct dev_pm_ops sh_mobile_lcdc_dev_pm_ops = {
 	.suspend = sh_mobile_lcdc_suspend,
 	.resume = sh_mobile_lcdc_resume,
 	.runtime_suspend = sh_mobile_lcdc_runtime_suspend,
diff --git a/drivers/watchdog/adx_wdt.c b/drivers/watchdog/adx_wdt.c
index 77afb0acc500..9c6594473d3b 100644
--- a/drivers/watchdog/adx_wdt.c
+++ b/drivers/watchdog/adx_wdt.c
@@ -314,7 +314,7 @@ static int adx_wdt_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops adx_wdt_pm_ops = {
+static const struct dev_pm_ops adx_wdt_pm_ops = {
 	.suspend = adx_wdt_suspend,
 	.resume = adx_wdt_resume,
 };
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 0d65934246af..198b8f9fe05e 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -219,7 +219,7 @@ struct dev_pm_ops {
  * to RAM and hibernation.
  */
 #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-struct dev_pm_ops name = { \
+const struct dev_pm_ops name = { \
 	.suspend = suspend_fn, \
 	.resume = resume_fn, \
 	.freeze = suspend_fn, \
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e428863574f..c18286a2167b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -221,7 +221,7 @@ static int afiucv_pm_restore_thaw(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops afiucv_pm_ops = {
+static const struct dev_pm_ops afiucv_pm_ops = {
 	.prepare = afiucv_pm_prepare,
 	.complete = afiucv_pm_complete,
 	.freeze = afiucv_pm_freeze,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 3b1f5f5f8de7..fd8b28361a64 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -93,7 +93,7 @@ static int iucv_pm_freeze(struct device *);
 static int iucv_pm_thaw(struct device *);
 static int iucv_pm_restore(struct device *);
 
-static struct dev_pm_ops iucv_pm_ops = {
+static const struct dev_pm_ops iucv_pm_ops = {
 	.prepare = iucv_pm_prepare,
 	.complete = iucv_pm_complete,
 	.freeze = iucv_pm_freeze,
diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c
index b4b48afb6de6..5d9411839cd7 100644
--- a/sound/arm/pxa2xx-ac97.c
+++ b/sound/arm/pxa2xx-ac97.c
@@ -159,7 +159,7 @@ static int pxa2xx_ac97_resume(struct device *dev)
 	return ret;
 }
 
-static struct dev_pm_ops pxa2xx_ac97_pm_ops = {
+static const struct dev_pm_ops pxa2xx_ac97_pm_ops = {
 	.suspend	= pxa2xx_ac97_suspend,
 	.resume		= pxa2xx_ac97_resume,
 };
diff --git a/sound/soc/s3c24xx/s3c24xx_simtec.c b/sound/soc/s3c24xx/s3c24xx_simtec.c
index d441c3b64631..4984754f3298 100644
--- a/sound/soc/s3c24xx/s3c24xx_simtec.c
+++ b/sound/soc/s3c24xx/s3c24xx_simtec.c
@@ -312,7 +312,7 @@ int simtec_audio_resume(struct device *dev)
 	return 0;
 }
 
-struct dev_pm_ops simtec_audio_pmops = {
+const struct dev_pm_ops simtec_audio_pmops = {
 	.resume	= simtec_audio_resume,
 };
 EXPORT_SYMBOL_GPL(simtec_audio_pmops);
diff --git a/sound/soc/s3c24xx/s3c24xx_simtec.h b/sound/soc/s3c24xx/s3c24xx_simtec.h
index 2714203af161..e18faee30cce 100644
--- a/sound/soc/s3c24xx/s3c24xx_simtec.h
+++ b/sound/soc/s3c24xx/s3c24xx_simtec.h
@@ -15,7 +15,7 @@ extern int simtec_audio_core_probe(struct platform_device *pdev,
 extern int simtec_audio_remove(struct platform_device *pdev);
 
 #ifdef CONFIG_PM
-extern struct dev_pm_ops simtec_audio_pmops;
+extern const struct dev_pm_ops simtec_audio_pmops;
 #define simtec_audio_pm &simtec_audio_pmops
 #else
 #define simtec_audio_pm NULL
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ef8f28284cb9..0a6440c6f54a 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1236,7 +1236,7 @@ static int soc_poweroff(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops soc_pm_ops = {
+static const struct dev_pm_ops soc_pm_ops = {
 	.suspend = soc_suspend,
 	.resume = soc_resume,
 	.poweroff = soc_poweroff,
-- 
cgit v1.2.3


From 00b55864bb37200d7f05143c44f5e2edfc8c4578 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Dec 2009 18:00:14 -0800
Subject: dynamic_debug.h/kernel.h: Remove KBUILD_MODNAME from dynamic_pr_debug

If CONFIG_DYNAMIC_DEBUG is enabled and a source file has:

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>

dynamic_debug.h will duplicate KBUILD_MODNAME
in the output string.

Remove the use of KBUILD_MODNAME from the
output format string generated by dynamic_debug.h

If CONFIG_DYNAMIC_DEBUG is not enabled, no compile-time
check is done to printk/dev_printk arguments.

Add it.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dynamic_debug.h | 13 ++++++-------
 include/linux/kernel.h        |  5 ++---
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a0d9422a1569..f8c2e1767500 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -57,8 +57,7 @@ extern int ddebug_remove_module(char *mod_name);
 	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
 		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
 	if (__dynamic_dbg_enabled(descriptor))				\
-		printk(KERN_DEBUG KBUILD_MODNAME ":" pr_fmt(fmt),	\
-				##__VA_ARGS__);				\
+		printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);		\
 	} while (0)
 
 
@@ -69,9 +68,7 @@ extern int ddebug_remove_module(char *mod_name);
 	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
 		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
 	if (__dynamic_dbg_enabled(descriptor))				\
-			dev_printk(KERN_DEBUG, dev,			\
-					KBUILD_MODNAME ": " fmt,	\
-					##__VA_ARGS__);			\
+		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
 	} while (0)
 
 #else
@@ -81,8 +78,10 @@ static inline int ddebug_remove_module(char *mod)
 	return 0;
 }
 
-#define dynamic_pr_debug(fmt, ...)  do { } while (0)
-#define dynamic_dev_dbg(dev, format, ...)  do { } while (0)
+#define dynamic_pr_debug(fmt, ...)					\
+	do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
+#define dynamic_dev_dbg(dev, format, ...)				\
+	do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
 #endif
 
 #endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fa4c590cf12..a93cdd031aee 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -397,9 +397,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #elif defined(CONFIG_DYNAMIC_DEBUG)
 /* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
-#define pr_debug(fmt, ...) do { \
-	dynamic_pr_debug(fmt, ##__VA_ARGS__); \
-	} while (0)
+#define pr_debug(fmt, ...) \
+	dynamic_pr_debug(fmt, ##__VA_ARGS__)
 #else
 #define pr_debug(fmt, ...) \
 	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
-- 
cgit v1.2.3


From 0b2749aa6ca40ff3fe12ebb3fdf010ebad2e9085 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Dec 2009 18:00:19 -0800
Subject: kernel.h: remove initialization of bool in printk_once

Don't initialize __print_once.  Invert the test to reduce initialized
data.

defconfig before:	$size vmlinux
   text	   data	    bss	    dec	    hex	filename
6976022	 679572	1359668	9015262	 898fde	vmlinux

defconfig after:	$size vmlinux
   text	   data	    bss	    dec	    hex	filename
6976006	 679508	1359700	9015214	 898fae	vmlinux

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a93cdd031aee..910db75b1a72 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -251,10 +251,10 @@ extern int printk_delay_msec;
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
 #define printk_once(x...) ({			\
-	static bool __print_once = true;	\
+	static bool __print_once;		\
 						\
-	if (__print_once) {			\
-		__print_once = false;		\
+	if (!__print_once) {			\
+		__print_once = true;		\
 		printk(x);			\
 	}					\
 })
-- 
cgit v1.2.3


From 29671f22a8b6522db3b126a3fdfb208759ce46e3 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 14 Dec 2009 18:00:21 -0800
Subject: rwsem: fix rwsem_is_locked() bugs

rwsem_is_locked() tests ->activity without locks, so we should always keep
->activity consistent.  However, the code in __rwsem_do_wake() breaks this
rule, it updates ->activity after _all_ readers waken up, this may give
some reader a wrong ->activity value, thus cause rwsem_is_locked() behaves
wrong.

Quote from Andrew:

"
- we have one or more processes sleeping in down_read(), waiting for access.

- we wake one or more processes up without altering ->activity

- they start to run and they do rwsem_is_locked().  This incorrectly
  returns "false", because the waker process is still crunching away in
  __rwsem_do_wake().

- the waker now alters ->activity, but it was too late.
"

So we need get a spinlock to protect this.  And rwsem_is_locked() should
not block, thus we use spin_trylock_irqsave().

[akpm@linux-foundation.org: simplify code]
Reported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Cc: Ben Woodard <bwoodard@llnl.gov>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rwsem-spinlock.h |  6 +-----
 lib/rwsem-spinlock.c           | 13 +++++++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 6c3c0f6c261f..bdfcc2527970 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -68,11 +68,7 @@ extern int __down_write_trylock(struct rw_semaphore *sem);
 extern void __up_read(struct rw_semaphore *sem);
 extern void __up_write(struct rw_semaphore *sem);
 extern void __downgrade_write(struct rw_semaphore *sem);
-
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	return (sem->activity != 0);
-}
+extern int rwsem_is_locked(struct rw_semaphore *sem);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 39a74110f55b..ccf95bff7984 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,6 +17,19 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE	0x00000002
 };
 
+int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	int ret = 1;
+	unsigned long flags;
+
+	if (spin_trylock_irqsave(&sem->wait_lock, flags)) {
+		ret = (sem->activity != 0);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rwsem_is_locked);
+
 /*
  * initialise the semaphore
  */
-- 
cgit v1.2.3


From 948c1e2521979c332b21b623414cf258150f214e Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 14 Dec 2009 18:00:22 -0800
Subject: kallsyms: remove deprecated print_fn_descriptor_symbol()

According to feature-removal-schedule.txt, it is the time to remove
print_fn_descriptor_symbol().

And a quick grep shows that it no longer has any callers.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt |  9 ---------
 include/linux/kallsyms.h                   | 12 ------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index eb2c138c277c..21ab9357326d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -291,15 +291,6 @@ Who:	Michael Buesch <mb@bu3sch.de>
 
 ---------------------------
 
-What: print_fn_descriptor_symbol()
-When: October 2009
-Why:  The %pF vsprintf format provides the same functionality in a
-      simpler way.  print_fn_descriptor_symbol() is deprecated but
-      still present to give out-of-tree modules time to change.
-Who:  Bjorn Helgaas <bjorn.helgaas@hp.com>
-
----------------------------
-
 What:	/sys/o2cb symlink
 When:	January 2010
 Why:	/sys/fs/o2cb is the proper location for this information - /sys/o2cb
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 792274269f2b..d8e9b3d1c23c 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -107,18 +107,6 @@ static inline void print_symbol(const char *fmt, unsigned long addr)
 		       __builtin_extract_return_addr((void *)addr));
 }
 
-/*
- * Pretty-print a function pointer.  This function is deprecated.
- * Please use the "%pF" vsprintf format instead.
- */
-static inline void __deprecated print_fn_descriptor_symbol(const char *fmt, void *addr)
-{
-#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
-	addr = *(void **)addr;
-#endif
-	print_symbol(fmt, (unsigned long)addr);
-}
-
 static inline void print_ip_sym(unsigned long ip)
 {
 	printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
-- 
cgit v1.2.3


From 8a64f336bc1d4aa203b138d29d5a9c414a9fbb47 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Dec 2009 18:00:25 -0800
Subject: kernel.h: add printk_ratelimited and pr_<level>_rl

Add a printk_ratelimited statement expression macro that uses a per-call
ratelimit_state so that multiple subsystems output messages are not
suppressed by a global __ratelimit state.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: s/_rl/_ratelimited/g]
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Naohiro Ooiwa <nooiwa@miraclelinux.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 910db75b1a72..4d9c916d06d9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -404,6 +404,50 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
 #endif
 
+/*
+ * ratelimited messages with local ratelimit_state,
+ * no local ratelimit_state used in the !PRINTK case
+ */
+#ifdef CONFIG_PRINTK
+#define printk_ratelimited(fmt, ...)  ({		\
+	static struct ratelimit_state _rs = {		\
+		.interval = DEFAULT_RATELIMIT_INTERVAL, \
+		.burst = DEFAULT_RATELIMIT_BURST,       \
+	};                                              \
+							\
+	if (!__ratelimit(&_rs))                         \
+		printk(fmt, ##__VA_ARGS__);		\
+})
+#else
+/* No effect, but we still get type checking even in the !PRINTK case: */
+#define printk_ratelimited printk
+#endif
+
+#define pr_emerg_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/* no pr_cont_ratelimited, don't do that... */
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug_ratelimited(fmt, ...) \
+	({ if (0) printk_ratelimited(KERN_DEBUG pr_fmt(fmt), \
+				     ##__VA_ARGS__); 0; })
+#endif
+
 /*
  * General tracing related utility functions - trace_printk(),
  * tracing_on/tracing_off and tracing_start()/tracing_stop
-- 
cgit v1.2.3


From e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 14 Dec 2009 18:00:26 -0800
Subject: task_struct: make journal_info conditional

journal_info in task_struct is used in journaling file system only.  So
introduce CONFIG_FS_JOURNAL_INFO and make it conditional.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig                | 4 ++++
 fs/btrfs/Kconfig          | 1 +
 fs/ext4/Kconfig           | 1 +
 fs/gfs2/Kconfig           | 1 +
 fs/jbd/Kconfig            | 1 +
 fs/jbd2/Kconfig           | 1 +
 fs/nilfs2/Kconfig         | 1 +
 fs/reiserfs/Kconfig       | 1 +
 include/linux/init_task.h | 8 +++++++-
 include/linux/sched.h     | 2 ++
 10 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 64d44efad7a5..f8fccaaad628 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,6 +6,10 @@ menu "File systems"
 
 if BLOCK
 
+config FS_JOURNAL_INFO
+	bool
+	default n
+
 source "fs/ext2/Kconfig"
 source "fs/ext3/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c020e570..402afe0a0bfb 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,7 @@ config BTRFS_FS
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
+	select FS_JOURNAL_INFO
 	help
 	  Btrfs is a new filesystem with extents, writable snapshotting,
 	  support for multiple devices and many more features.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..e5f6774846e4 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,6 +2,7 @@ config EXT4_FS
 	tristate "The Extended 4 (ext4) filesystem"
 	select JBD2
 	select CRC16
+	select FS_JOURNAL_INFO
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 4dcddf83326f..b192c661caa6 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,6 +10,7 @@ config GFS2_FS
 	select SLOW_WORK
 	select QUOTA
 	select QUOTACTL
+	select FS_JOURNAL_INFO
 	help
 	  A cluster filesystem.
 
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index 4e28beeed157..a8408983abd4 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,5 +1,6 @@
 config JBD
 	tristate
+	select FS_JOURNAL_INFO
 	help
 	  This is a generic journalling layer for block devices.  It is
 	  currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index f32f346f4b0a..0f7d1ceafdfd 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,6 +1,7 @@
 config JBD2
 	tristate
 	select CRC32
+	select FS_JOURNAL_INFO
 	help
 	  This is a generic journaling layer for block devices that support
 	  both 32-bit and 64-bit block numbers.  It is currently used by
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..1225af7b2166 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,6 +2,7 @@ config NILFS2_FS
 	tristate "NILFS2 file system support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	select CRC32
+	select FS_JOURNAL_INFO
 	help
 	  NILFS2 is a log-structured file system (LFS) supporting continuous
 	  snapshotting.  In addition to versioning capability of the entire
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index 513f431038f9..ac7cd75c86f8 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,6 +1,7 @@
 config REISERFS_FS
 	tristate "Reiserfs support"
 	select CRC32
+	select FS_JOURNAL_INFO
 	help
 	  Stores not just filenames but the files themselves in a balanced
 	  tree.  Uses journalling.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 8d10aa7fd4c9..8ed0abf06f89 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,6 +111,12 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#ifdef CONFIG_FS_JOURNAL_INFO
+#define INIT_JOURNAL_INFO	.journal_info = NULL,
+#else
+#define INIT_JOURNAL_INFO
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -162,7 +168,6 @@ extern struct cred init_cred;
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
-	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= __SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
@@ -173,6 +178,7 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
+	INIT_JOURNAL_INFO						\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 294eb2f80144..7d388494f45d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1446,8 +1446,10 @@ struct task_struct {
 	gfp_t lockdep_reclaim_gfp;
 #endif
 
+#ifdef CONFIG_FS_JOURNAL_INFO
 /* journalling filesystem info */
 	void *journal_info;
+#endif
 
 /* stacked block device info */
 	struct bio *bio_list, **bio_tail;
-- 
cgit v1.2.3


From 603c4ba96be998a8dd7a6f9b23681c49acdf4b64 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Mon, 14 Dec 2009 18:00:29 -0800
Subject: err.h: add helper function to simplify pointer error checking

There are quite a few instances in the kernel of checks of pointers both
against NULL and against the errno range, handling both cases identically.
This additional helper function would simplify such code.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/err.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/err.h b/include/linux/err.h
index ec87f3142bf3..1b12642636c7 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -34,6 +34,11 @@ static inline long IS_ERR(const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+static inline long IS_ERR_OR_NULL(const void *ptr)
+{
+	return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+
 /**
  * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
  * @ptr: The pointer to cast.
-- 
cgit v1.2.3


From 5f0a96b044d8edaee20f4a32ef6c393599ca55f8 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Mon, 14 Dec 2009 18:00:32 -0800
Subject: cs5535-gpio: add AMD CS5535/CS5536 GPIO driver support

This creates a CS5535/CS5536 GPIO driver which uses a gpio_chip backend
(allowing GPIO users to use the generic GPIO API if desired) while also
allowing architecture-specific users directly (via the cs5535_gpio_*
functions).

Tested on an OLPC machine.  Some Leemotes also use CS5536 (with a mips
cpu), which is why this is in drivers/gpio rather than arch/x86.
Currently, it conflicts with older geode GPIO support; once MFGPT support
is reworked to also be more generic, the older geode code will be removed.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Jordan Crouse <jordan@cosmicpenguin.net>
Cc: David Brownell <david-b@pacbell.net>
Reviewed-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/geode.h |  28 +----
 drivers/gpio/Kconfig         |  10 ++
 drivers/gpio/Makefile        |   1 +
 drivers/gpio/cs5535-gpio.c   | 282 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cs5535.h       |  58 +++++++++
 5 files changed, 352 insertions(+), 27 deletions(-)
 create mode 100644 drivers/gpio/cs5535-gpio.c
 create mode 100644 include/linux/cs5535.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index ad3c2ed75481..5716214d37d9 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -12,6 +12,7 @@
 
 #include <asm/processor.h>
 #include <linux/io.h>
+#include <linux/cs5535.h>
 
 /* Generic southbridge functions */
 
@@ -115,33 +116,6 @@ extern int geode_get_dev_base(unsigned int dev);
 #define VSA_VR_MEM_SIZE		0x0200
 #define AMD_VSA_SIG		0x4132	/* signature is ascii 'VSA2' */
 #define GSW_VSA_SIG		0x534d  /* General Software signature */
-/* GPIO */
-
-#define GPIO_OUTPUT_VAL		0x00
-#define GPIO_OUTPUT_ENABLE	0x04
-#define GPIO_OUTPUT_OPEN_DRAIN	0x08
-#define GPIO_OUTPUT_INVERT	0x0C
-#define GPIO_OUTPUT_AUX1	0x10
-#define GPIO_OUTPUT_AUX2	0x14
-#define GPIO_PULL_UP		0x18
-#define GPIO_PULL_DOWN		0x1C
-#define GPIO_INPUT_ENABLE	0x20
-#define GPIO_INPUT_INVERT	0x24
-#define GPIO_INPUT_FILTER	0x28
-#define GPIO_INPUT_EVENT_COUNT	0x2C
-#define GPIO_READ_BACK		0x30
-#define GPIO_INPUT_AUX1		0x34
-#define GPIO_EVENTS_ENABLE	0x38
-#define GPIO_LOCK_ENABLE	0x3C
-#define GPIO_POSITIVE_EDGE_EN	0x40
-#define GPIO_NEGATIVE_EDGE_EN	0x44
-#define GPIO_POSITIVE_EDGE_STS	0x48
-#define GPIO_NEGATIVE_EDGE_STS	0x4C
-
-#define GPIO_MAP_X		0xE0
-#define GPIO_MAP_Y		0xE4
-#define GPIO_MAP_Z		0xE8
-#define GPIO_MAP_W		0xEC
 
 static inline u32 geode_gpio(unsigned int nr)
 {
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 2ad0128c63c6..df4b82d1348e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -174,6 +174,16 @@ config GPIO_ADP5520
 
 comment "PCI GPIO expanders:"
 
+config GPIO_CS5535
+	tristate "AMD CS5535/CS5536 GPIO support"
+	depends on PCI && !CS5535_GPIO && !MGEODE_LX
+	help
+	  The AMD CS5535 and CS5536 southbridges support 28 GPIO pins that
+	  can be used for quite a number of things.  The CS5535/6 is found on
+	  AMD Geode and Lemote Yeeloong devices.
+
+	  If unsure, say N.
+
 config GPIO_BT8XX
 	tristate "BT8XX GPIO abuser"
 	depends on PCI && VIDEO_BT848=n
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 00a532c9a1e2..270b6d7839f5 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_GPIO_PL061)	+= pl061.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_UCB1400)	+= ucb1400_gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
+obj-$(CONFIG_GPIO_CS5535)	+= cs5535-gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
 obj-$(CONFIG_GPIO_WM831X)	+= wm831x-gpio.o
diff --git a/drivers/gpio/cs5535-gpio.c b/drivers/gpio/cs5535-gpio.c
new file mode 100644
index 000000000000..56138893819a
--- /dev/null
+++ b/drivers/gpio/cs5535-gpio.c
@@ -0,0 +1,282 @@
+/*
+ * AMD CS5535/CS5536 GPIO driver
+ * Copyright (C) 2006  Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/cs5535.h>
+
+#define DRV_NAME "cs5535-gpio"
+#define GPIO_BAR 1
+
+static struct cs5535_gpio_chip {
+	struct gpio_chip chip;
+	resource_size_t base;
+
+	struct pci_dev *pdev;
+	spinlock_t lock;
+} cs5535_gpio_chip;
+
+/*
+ * The CS5535/CS5536 GPIOs support a number of extra features not defined
+ * by the gpio_chip API, so these are exported.  For a full list of the
+ * registers, see include/linux/cs5535.h.
+ */
+
+static void __cs5535_gpio_set(struct cs5535_gpio_chip *chip, unsigned offset,
+		unsigned int reg)
+{
+	if (offset < 16)
+		/* low bank register */
+		outl(1 << offset, chip->base + reg);
+	else
+		/* high bank register */
+		outl(1 << (offset - 16), chip->base + 0x80 + reg);
+}
+
+void cs5535_gpio_set(unsigned offset, unsigned int reg)
+{
+	struct cs5535_gpio_chip *chip = &cs5535_gpio_chip;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	__cs5535_gpio_set(chip, offset, reg);
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+EXPORT_SYMBOL_GPL(cs5535_gpio_set);
+
+static void __cs5535_gpio_clear(struct cs5535_gpio_chip *chip, unsigned offset,
+		unsigned int reg)
+{
+	if (offset < 16)
+		/* low bank register */
+		outl(1 << (offset + 16), chip->base + reg);
+	else
+		/* high bank register */
+		outl(1 << offset, chip->base + 0x80 + reg);
+}
+
+void cs5535_gpio_clear(unsigned offset, unsigned int reg)
+{
+	struct cs5535_gpio_chip *chip = &cs5535_gpio_chip;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	__cs5535_gpio_clear(chip, offset, reg);
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+EXPORT_SYMBOL_GPL(cs5535_gpio_clear);
+
+int cs5535_gpio_isset(unsigned offset, unsigned int reg)
+{
+	struct cs5535_gpio_chip *chip = &cs5535_gpio_chip;
+	unsigned long flags;
+	long val;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	if (offset < 16)
+		/* low bank register */
+		val = inl(chip->base + reg);
+	else {
+		/* high bank register */
+		val = inl(chip->base + 0x80 + reg);
+		offset -= 16;
+	}
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return (val & (1 << offset)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_gpio_isset);
+
+/*
+ * Generic gpio_chip API support.
+ */
+
+static int chip_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	return cs5535_gpio_isset(offset, GPIO_OUTPUT_VAL);
+}
+
+static void chip_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	if (val)
+		cs5535_gpio_set(offset, GPIO_OUTPUT_VAL);
+	else
+		cs5535_gpio_clear(offset, GPIO_OUTPUT_VAL);
+}
+
+static int chip_direction_input(struct gpio_chip *c, unsigned offset)
+{
+	struct cs5535_gpio_chip *chip = (struct cs5535_gpio_chip *) c;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	__cs5535_gpio_set(chip, offset, GPIO_INPUT_ENABLE);
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
+}
+
+static int chip_direction_output(struct gpio_chip *c, unsigned offset, int val)
+{
+	struct cs5535_gpio_chip *chip = (struct cs5535_gpio_chip *) c;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	__cs5535_gpio_set(chip, offset, GPIO_OUTPUT_ENABLE);
+	if (val)
+		__cs5535_gpio_set(chip, offset, GPIO_OUTPUT_VAL);
+	else
+		__cs5535_gpio_clear(chip, offset, GPIO_OUTPUT_VAL);
+
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
+}
+
+static struct cs5535_gpio_chip cs5535_gpio_chip = {
+	.chip = {
+		.owner = THIS_MODULE,
+		.label = DRV_NAME,
+
+		.base = 0,
+		.ngpio = 28,
+
+		.get = chip_gpio_get,
+		.set = chip_gpio_set,
+
+		.direction_input = chip_direction_input,
+		.direction_output = chip_direction_output,
+	},
+};
+
+static int __init cs5535_gpio_probe(struct pci_dev *pdev,
+		const struct pci_device_id *pci_id)
+{
+	int err;
+
+	/* There are two ways to get the GPIO base address; one is by
+	 * fetching it from MSR_LBAR_GPIO, the other is by reading the
+	 * PCI BAR info.  The latter method is easier (especially across
+	 * different architectures), so we'll stick with that for now.  If
+	 * it turns out to be unreliable in the face of crappy BIOSes, we
+	 * can always go back to using MSRs.. */
+
+	err = pci_enable_device_io(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "can't enable device IO\n");
+		goto done;
+	}
+
+	err = pci_request_region(pdev, GPIO_BAR, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", GPIO_BAR);
+		goto done;
+	}
+
+	/* set up the driver-specific struct */
+	cs5535_gpio_chip.base = pci_resource_start(pdev, GPIO_BAR);
+	cs5535_gpio_chip.pdev = pdev;
+	spin_lock_init(&cs5535_gpio_chip.lock);
+
+	dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", GPIO_BAR,
+			(unsigned long long) cs5535_gpio_chip.base);
+
+	/* finally, register with the generic GPIO API */
+	err = gpiochip_add(&cs5535_gpio_chip.chip);
+	if (err) {
+		dev_err(&pdev->dev, "failed to register gpio chip\n");
+		goto release_region;
+	}
+
+	printk(KERN_INFO DRV_NAME ": GPIO support successfully loaded.\n");
+	return 0;
+
+release_region:
+	pci_release_region(pdev, GPIO_BAR);
+done:
+	return err;
+}
+
+static void __exit cs5535_gpio_remove(struct pci_dev *pdev)
+{
+	int err;
+
+	err = gpiochip_remove(&cs5535_gpio_chip.chip);
+	if (err) {
+		/* uhh? */
+		dev_err(&pdev->dev, "unable to remove gpio_chip?\n");
+	}
+	pci_release_region(pdev, GPIO_BAR);
+}
+
+static struct pci_device_id cs5535_gpio_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, cs5535_gpio_pci_tbl);
+
+/*
+ * We can't use the standard PCI driver registration stuff here, since
+ * that allows only one driver to bind to each PCI device (and we want
+ * multiple drivers to be able to bind to the device).  Instead, manually
+ * scan for the PCI device, request a single region, and keep track of the
+ * devices that we're using.
+ */
+
+static int __init cs5535_gpio_scan_pci(void)
+{
+	struct pci_dev *pdev;
+	int err = -ENODEV;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs5535_gpio_pci_tbl); i++) {
+		pdev = pci_get_device(cs5535_gpio_pci_tbl[i].vendor,
+				cs5535_gpio_pci_tbl[i].device, NULL);
+		if (pdev) {
+			err = cs5535_gpio_probe(pdev, &cs5535_gpio_pci_tbl[i]);
+			if (err)
+				pci_dev_put(pdev);
+
+			/* we only support a single CS5535/6 southbridge */
+			break;
+		}
+	}
+
+	return err;
+}
+
+static void __exit cs5535_gpio_free_pci(void)
+{
+	cs5535_gpio_remove(cs5535_gpio_chip.pdev);
+	pci_dev_put(cs5535_gpio_chip.pdev);
+}
+
+static int __init cs5535_gpio_init(void)
+{
+	return cs5535_gpio_scan_pci();
+}
+
+static void __exit cs5535_gpio_exit(void)
+{
+	cs5535_gpio_free_pci();
+}
+
+module_init(cs5535_gpio_init);
+module_exit(cs5535_gpio_exit);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_DESCRIPTION("AMD CS5535/CS5536 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h
new file mode 100644
index 000000000000..cfea689dfa34
--- /dev/null
+++ b/include/linux/cs5535.h
@@ -0,0 +1,58 @@
+/*
+ * AMD CS5535/CS5536 definitions
+ * Copyright (C) 2006  Advanced Micro Devices, Inc.
+ * Copyright (C) 2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef _CS5535_H
+#define _CS5535_H
+
+/* MSRs */
+#define MSR_LBAR_SMB		0x5140000B
+#define MSR_LBAR_GPIO		0x5140000C
+#define MSR_LBAR_MFGPT		0x5140000D
+#define MSR_LBAR_ACPI		0x5140000E
+#define MSR_LBAR_PMS		0x5140000F
+
+/* resource sizes */
+#define LBAR_GPIO_SIZE		0xFF
+#define LBAR_MFGPT_SIZE		0x40
+#define LBAR_ACPI_SIZE		0x40
+#define LBAR_PMS_SIZE		0x80
+
+/* GPIOs */
+#define GPIO_OUTPUT_VAL		0x00
+#define GPIO_OUTPUT_ENABLE	0x04
+#define GPIO_OUTPUT_OPEN_DRAIN	0x08
+#define GPIO_OUTPUT_INVERT	0x0C
+#define GPIO_OUTPUT_AUX1	0x10
+#define GPIO_OUTPUT_AUX2	0x14
+#define GPIO_PULL_UP		0x18
+#define GPIO_PULL_DOWN		0x1C
+#define GPIO_INPUT_ENABLE	0x20
+#define GPIO_INPUT_INVERT	0x24
+#define GPIO_INPUT_FILTER	0x28
+#define GPIO_INPUT_EVENT_COUNT	0x2C
+#define GPIO_READ_BACK		0x30
+#define GPIO_INPUT_AUX1		0x34
+#define GPIO_EVENTS_ENABLE	0x38
+#define GPIO_LOCK_ENABLE	0x3C
+#define GPIO_POSITIVE_EDGE_EN	0x40
+#define GPIO_NEGATIVE_EDGE_EN	0x44
+#define GPIO_POSITIVE_EDGE_STS	0x48
+#define GPIO_NEGATIVE_EDGE_STS	0x4C
+
+#define GPIO_MAP_X		0xE0
+#define GPIO_MAP_Y		0xE4
+#define GPIO_MAP_Z		0xE8
+#define GPIO_MAP_W		0xEC
+
+void cs5535_gpio_set(unsigned offset, unsigned int reg);
+void cs5535_gpio_clear(unsigned offset, unsigned int reg);
+int cs5535_gpio_isset(unsigned offset, unsigned int reg);
+
+#endif
-- 
cgit v1.2.3


From 82dca611bb516ec5fb7d04077733d6a4b70f52d1 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Mon, 14 Dec 2009 18:00:37 -0800
Subject: cs5535: add a generic MFGPT driver

This is based on the old code on arch/x86/kernel/mfgpt_32.c, except it's
not x86 specific, it's modular, and it makes use of a PCI BAR rather than
a random MSR.  Currently module unloading is not supported; it's uncertain
whether or not it can be made work with the hardware.

[akpm@linux-foundation.org: add X86 dependency]
Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Jordan Crouse <jordan@cosmicpenguin.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Chris Ball <cjb@laptop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/geode.h |  40 -----
 drivers/misc/Kconfig         |  24 +++
 drivers/misc/Makefile        |   1 +
 drivers/misc/cs5535-mfgpt.c  | 370 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cs5535.h       |  67 ++++++++
 5 files changed, 462 insertions(+), 40 deletions(-)
 create mode 100644 drivers/misc/cs5535-mfgpt.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 5716214d37d9..547e9642642a 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -47,16 +47,6 @@ extern int geode_get_dev_base(unsigned int dev);
 
 #define MSR_DIVIL_SOFT_RESET	0x51400017
 
-#define MSR_PIC_YSEL_LOW	0x51400020
-#define MSR_PIC_YSEL_HIGH	0x51400021
-#define MSR_PIC_ZSEL_LOW	0x51400022
-#define MSR_PIC_ZSEL_HIGH	0x51400023
-#define MSR_PIC_IRQM_LPC	0x51400025
-
-#define MSR_MFGPT_IRQ		0x51400028
-#define MSR_MFGPT_NR		0x51400029
-#define MSR_MFGPT_SETUP		0x5140002B
-
 #define MSR_LX_SPARE_MSR	0x80000011	/* DC-specific */
 
 #define MSR_GX_GLD_MSR_CONFIG	0xC0002001
@@ -169,36 +159,6 @@ static inline int geode_has_vsa2(void)
 }
 #endif
 
-/* MFGPTs */
-
-#define MFGPT_MAX_TIMERS	8
-#define MFGPT_TIMER_ANY		(-1)
-
-#define MFGPT_DOMAIN_WORKING	1
-#define MFGPT_DOMAIN_STANDBY	2
-#define MFGPT_DOMAIN_ANY	(MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY)
-
-#define MFGPT_CMP1		0
-#define MFGPT_CMP2		1
-
-#define MFGPT_EVENT_IRQ		0
-#define MFGPT_EVENT_NMI		1
-#define MFGPT_EVENT_RESET	3
-
-#define MFGPT_REG_CMP1		0
-#define MFGPT_REG_CMP2		2
-#define MFGPT_REG_COUNTER	4
-#define MFGPT_REG_SETUP		6
-
-#define MFGPT_SETUP_CNTEN	(1 << 15)
-#define MFGPT_SETUP_CMP2	(1 << 14)
-#define MFGPT_SETUP_CMP1	(1 << 13)
-#define MFGPT_SETUP_SETUP	(1 << 12)
-#define MFGPT_SETUP_STOPEN	(1 << 11)
-#define MFGPT_SETUP_EXTEN	(1 << 10)
-#define MFGPT_SETUP_REVEN	(1 << 5)
-#define MFGPT_SETUP_CLKSEL	(1 << 4)
-
 static inline void geode_mfgpt_write(int timer, u16 reg, u16 value)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 724d1188a322..d6d6d846f0d6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -187,6 +187,30 @@ config SGI_XP
 	  this feature will allow for direct communication between SSIs
 	  based on a network adapter and DMA messaging.
 
+config CS5535_MFGPT
+	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
+	depends on PCI && !MGEODE_LX
+	depends on X86
+	default n
+	help
+	  This driver provides access to MFGPT functionality for other
+	  drivers that need timers.  MFGPTs are available in the CS5535 and
+	  CS5536 companion chips that are found in AMD Geode and several
+	  other platforms.  They have a better resolution and max interval
+	  than the generic PIT, and are suitable for use as high-res timers.
+	  You probably don't want to enable this manually; other drivers that
+	  make use of it should enable it.
+
+config CS5535_MFGPT_DEFAULT_IRQ
+	int
+	default 7
+	help
+	  MFGPTs on the CS5535 require an interrupt.  The selected IRQ
+	  can be overridden as a module option as well as by driver that
+	  use the cs5535_mfgpt_ API; however, different architectures might
+	  want to use a different IRQ by default.  This is here for
+	  architectures to set as necessary.
+
 config HP_ILO
 	tristate "Channel interface driver for HP iLO/iLO2 processor"
 	depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index e76b77977442..049ff2482f30 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
 obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
 obj-$(CONFIG_ISL29003)		+= isl29003.o
 obj-$(CONFIG_EP93XX_PWM)	+= ep93xx_pwm.o
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
new file mode 100644
index 000000000000..8110460558ff
--- /dev/null
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -0,0 +1,370 @@
+/*
+ * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT)
+ *
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007  Andres Salomon <dilinger@debian.org>
+ * Copyright (C) 2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/cs5535.h>
+
+#define DRV_NAME "cs5535-mfgpt"
+#define MFGPT_BAR 2
+
+static int mfgpt_reset_timers;
+module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644);
+MODULE_PARM_DESC(mfgptfix, "Reset the MFGPT timers during init; "
+		"required by some broken BIOSes (ie, TinyBIOS < 0.99).");
+
+struct cs5535_mfgpt_timer {
+	struct cs5535_mfgpt_chip *chip;
+	int nr;
+};
+
+static struct cs5535_mfgpt_chip {
+	DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS);
+	resource_size_t base;
+
+	struct pci_dev *pdev;
+	spinlock_t lock;
+	int initialized;
+} cs5535_mfgpt_chip;
+
+int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+		int event, int enable)
+{
+	uint32_t msr, mask, value, dummy;
+	int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * The register maps for these are described in sections 6.17.1.x of
+	 * the AMD Geode CS5536 Companion Device Data Book.
+	 */
+	switch (event) {
+	case MFGPT_EVENT_RESET:
+		/*
+		 * XXX: According to the docs, we cannot reset timers above
+		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
+		 * a problem?   -dilinger
+		 */
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + 24);
+		break;
+
+	case MFGPT_EVENT_NMI:
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	case MFGPT_EVENT_IRQ:
+		msr = MSR_MFGPT_IRQ;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	default:
+		return -EIO;
+	}
+
+	rdmsr(msr, value, dummy);
+
+	if (enable)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	wrmsr(msr, value, dummy);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event);
+
+int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq,
+		int enable)
+{
+	uint32_t zsel, lpc, dummy;
+	int shift;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+	 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+	 * 2, and we mustn't use nor change it.
+	 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+	 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+	 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+	 */
+	rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4;
+	if (((zsel >> shift) & 0xF) == 2)
+		return -EIO;
+
+	/* Choose IRQ: if none supplied, keep IRQ already set or use default */
+	if (!*irq)
+		*irq = (zsel >> shift) & 0xF;
+	if (!*irq)
+		*irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+
+	/* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+	if (*irq < 1 || *irq == 2 || *irq > 15)
+		return -EIO;
+	rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+	if (lpc & (1 << *irq))
+		return -EIO;
+
+	/* All chosen and checked - go for it */
+	if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+		return -EIO;
+	if (enable) {
+		zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+		wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq);
+
+struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain)
+{
+	struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip;
+	struct cs5535_mfgpt_timer *timer = NULL;
+	unsigned long flags;
+	int max;
+
+	if (!mfgpt->initialized)
+		goto done;
+
+	/* only allocate timers from the working domain if requested */
+	if (domain == MFGPT_DOMAIN_WORKING)
+		max = 6;
+	else
+		max = MFGPT_MAX_TIMERS;
+
+	if (timer_nr >= max) {
+		/* programmer error.  silly programmers! */
+		WARN_ON(1);
+		goto done;
+	}
+
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	if (timer_nr < 0) {
+		unsigned long t;
+
+		/* try to find any available timer */
+		t = find_first_bit(mfgpt->avail, max);
+		/* set timer_nr to -1 if no timers available */
+		timer_nr = t < max ? (int) t : -1;
+	} else {
+		/* check if the requested timer's available */
+		if (test_bit(timer_nr, mfgpt->avail))
+			timer_nr = -1;
+	}
+
+	if (timer_nr >= 0)
+		/* if timer_nr is not -1, it's an available timer */
+		__clear_bit(timer_nr, mfgpt->avail);
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	if (timer_nr < 0)
+		goto done;
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+	if (!timer) {
+		/* aw hell */
+		spin_lock_irqsave(&mfgpt->lock, flags);
+		__set_bit(timer_nr, mfgpt->avail);
+		spin_unlock_irqrestore(&mfgpt->lock, flags);
+		goto done;
+	}
+	timer->chip = mfgpt;
+	timer->nr = timer_nr;
+	dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr);
+
+done:
+	return timer;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer);
+
+/*
+ * XXX: This frees the timer memory, but never resets the actual hardware
+ * timer.  The old geode_mfgpt code did this; it would be good to figure
+ * out a way to actually release the hardware timer.  See comments below.
+ */
+void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer)
+{
+	kfree(timer);
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer);
+
+uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg)
+{
+	return inw(timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_read);
+
+void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+		uint16_t value)
+{
+	outw(value, timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
+
+/*
+ * This is a sledgehammer that resets all MFGPT timers. This is required by
+ * some broken BIOSes which leave the system in an unstable state
+ * (TinyBIOS 0.98, for example; fixed in 0.99).  It's uncertain as to
+ * whether or not this secret MSR can be used to release individual timers.
+ * Jordan tells me that he and Mitch once played w/ it, but it's unclear
+ * what the results of that were (and they experienced some instability).
+ */
+static void __init reset_all_timers(void)
+{
+	uint32_t val, dummy;
+
+	/* The following undocumented bit resets the MFGPT timers */
+	val = 0xFF; dummy = 0;
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
+}
+
+/*
+ * Check whether any MFGPTs are available for the kernel to use.  In most
+ * cases, firmware that uses AMD's VSA code will claim all timers during
+ * bootup; we certainly don't want to take them if they're already in use.
+ * In other cases (such as with VSAless OpenFirmware), the system firmware
+ * leaves timers available for us to use.
+ */
+static int __init scan_timers(struct cs5535_mfgpt_chip *mfgpt)
+{
+	struct cs5535_mfgpt_timer timer = { .chip = mfgpt };
+	unsigned long flags;
+	int timers = 0;
+	uint16_t val;
+	int i;
+
+	/* bios workaround */
+	if (mfgpt_reset_timers)
+		reset_all_timers();
+
+	/* just to be safe, protect this section w/ lock */
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		timer.nr = i;
+		val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP);
+		if (!(val & MFGPT_SETUP_SETUP)) {
+			__set_bit(i, mfgpt->avail);
+			timers++;
+		}
+	}
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	return timers;
+}
+
+static int __init cs5535_mfgpt_probe(struct pci_dev *pdev,
+		const struct pci_device_id *pci_id)
+{
+	int err, t;
+
+	/* There are two ways to get the MFGPT base address; one is by
+	 * fetching it from MSR_LBAR_MFGPT, the other is by reading the
+	 * PCI BAR info.  The latter method is easier (especially across
+	 * different architectures), so we'll stick with that for now.  If
+	 * it turns out to be unreliable in the face of crappy BIOSes, we
+	 * can always go back to using MSRs.. */
+
+	err = pci_enable_device_io(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "can't enable device IO\n");
+		goto done;
+	}
+
+	err = pci_request_region(pdev, MFGPT_BAR, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", MFGPT_BAR);
+		goto done;
+	}
+
+	/* set up the driver-specific struct */
+	cs5535_mfgpt_chip.base = pci_resource_start(pdev, MFGPT_BAR);
+	cs5535_mfgpt_chip.pdev = pdev;
+	spin_lock_init(&cs5535_mfgpt_chip.lock);
+
+	dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", MFGPT_BAR,
+			(unsigned long long) cs5535_mfgpt_chip.base);
+
+	/* detect the available timers */
+	t = scan_timers(&cs5535_mfgpt_chip);
+	dev_info(&pdev->dev, DRV_NAME ": %d MFGPT timers available\n", t);
+	cs5535_mfgpt_chip.initialized = 1;
+	return 0;
+
+done:
+	return err;
+}
+
+static struct pci_device_id cs5535_mfgpt_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, cs5535_mfgpt_pci_tbl);
+
+/*
+ * Just like with the cs5535-gpio driver, we can't use the standard PCI driver
+ * registration stuff.  It only allows only one driver to bind to each PCI
+ * device, and we want the GPIO and MFGPT drivers to be able to share a PCI
+ * device.  Instead, we manually scan for the PCI device, request a single
+ * region, and keep track of the devices that we're using.
+ */
+
+static int __init cs5535_mfgpt_scan_pci(void)
+{
+	struct pci_dev *pdev;
+	int err = -ENODEV;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs5535_mfgpt_pci_tbl); i++) {
+		pdev = pci_get_device(cs5535_mfgpt_pci_tbl[i].vendor,
+				cs5535_mfgpt_pci_tbl[i].device, NULL);
+		if (pdev) {
+			err = cs5535_mfgpt_probe(pdev,
+					&cs5535_mfgpt_pci_tbl[i]);
+			if (err)
+				pci_dev_put(pdev);
+
+			/* we only support a single CS5535/6 southbridge */
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int __init cs5535_mfgpt_init(void)
+{
+	return cs5535_mfgpt_scan_pci();
+}
+
+module_init(cs5535_mfgpt_init);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h
index cfea689dfa34..30ecb04fa570 100644
--- a/include/linux/cs5535.h
+++ b/include/linux/cs5535.h
@@ -18,6 +18,16 @@
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
+#define MSR_PIC_YSEL_LOW	0x51400020
+#define MSR_PIC_YSEL_HIGH	0x51400021
+#define MSR_PIC_ZSEL_LOW	0x51400022
+#define MSR_PIC_ZSEL_HIGH	0x51400023
+#define MSR_PIC_IRQM_LPC	0x51400025
+
+#define MSR_MFGPT_IRQ		0x51400028
+#define MSR_MFGPT_NR		0x51400029
+#define MSR_MFGPT_SETUP		0x5140002B
+
 /* resource sizes */
 #define LBAR_GPIO_SIZE		0xFF
 #define LBAR_MFGPT_SIZE		0x40
@@ -55,4 +65,61 @@ void cs5535_gpio_set(unsigned offset, unsigned int reg);
 void cs5535_gpio_clear(unsigned offset, unsigned int reg);
 int cs5535_gpio_isset(unsigned offset, unsigned int reg);
 
+/* MFGPTs */
+
+#define MFGPT_MAX_TIMERS	8
+#define MFGPT_TIMER_ANY		(-1)
+
+#define MFGPT_DOMAIN_WORKING	1
+#define MFGPT_DOMAIN_STANDBY	2
+#define MFGPT_DOMAIN_ANY	(MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY)
+
+#define MFGPT_CMP1		0
+#define MFGPT_CMP2		1
+
+#define MFGPT_EVENT_IRQ		0
+#define MFGPT_EVENT_NMI		1
+#define MFGPT_EVENT_RESET	3
+
+#define MFGPT_REG_CMP1		0
+#define MFGPT_REG_CMP2		2
+#define MFGPT_REG_COUNTER	4
+#define MFGPT_REG_SETUP		6
+
+#define MFGPT_SETUP_CNTEN	(1 << 15)
+#define MFGPT_SETUP_CMP2	(1 << 14)
+#define MFGPT_SETUP_CMP1	(1 << 13)
+#define MFGPT_SETUP_SETUP	(1 << 12)
+#define MFGPT_SETUP_STOPEN	(1 << 11)
+#define MFGPT_SETUP_EXTEN	(1 << 10)
+#define MFGPT_SETUP_REVEN	(1 << 5)
+#define MFGPT_SETUP_CLKSEL	(1 << 4)
+
+struct cs5535_mfgpt_timer;
+
+extern uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer,
+		uint16_t reg);
+extern void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+		uint16_t value);
+
+extern int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+		int event, int enable);
+extern int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp,
+		int *irq, int enable);
+extern struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer,
+		int domain);
+extern void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer);
+
+static inline int cs5535_mfgpt_setup_irq(struct cs5535_mfgpt_timer *timer,
+		int cmp, int *irq)
+{
+	return cs5535_mfgpt_set_irq(timer, cmp, irq, 1);
+}
+
+static inline int cs5535_mfgpt_release_irq(struct cs5535_mfgpt_timer *timer,
+		int cmp, int *irq)
+{
+	return cs5535_mfgpt_set_irq(timer, cmp, irq, 0);
+}
+
 #endif
-- 
cgit v1.2.3


From 2e8c12436f540d3c40137ebf10268803dc972f6a Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Mon, 14 Dec 2009 18:00:39 -0800
Subject: cs5535: move the DIVIL MSR definition into linux/cs5535.h

The only thing that uses this is the reboot_fixups code.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Jordan Crouse <jordan@cosmicpenguin.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Chris Ball <cjb@laptop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/geode.h       | 2 --
 arch/x86/kernel/reboot_fixups_32.c | 2 +-
 include/linux/cs5535.h             | 2 ++
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 547e9642642a..976b3f11c009 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -45,8 +45,6 @@ extern int geode_get_dev_base(unsigned int dev);
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
-#define MSR_DIVIL_SOFT_RESET	0x51400017
-
 #define MSR_LX_SPARE_MSR	0x80000011	/* DC-specific */
 
 #define MSR_GX_GLD_MSR_CONFIG	0xC0002001
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 201eab63b05f..fda313ebbb03 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <asm/reboot_fixups.h>
 #include <asm/msr.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h
index 30ecb04fa570..39e93e8ed95d 100644
--- a/include/linux/cs5535.h
+++ b/include/linux/cs5535.h
@@ -18,6 +18,8 @@
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
+#define MSR_DIVIL_SOFT_RESET	0x51400017
+
 #define MSR_PIC_YSEL_LOW	0x51400020
 #define MSR_PIC_YSEL_HIGH	0x51400021
 #define MSR_PIC_ZSEL_LOW	0x51400022
-- 
cgit v1.2.3


From f060f27007b393bac6e50ee6fc26d8505acf6fe4 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Mon, 14 Dec 2009 18:00:40 -0800
Subject: cs5535: move VSA2 checks into linux/cs5535.h

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Jordan Crouse <jordan@cosmicpenguin.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Chris Ball <cjb@laptop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/geode.h     | 19 -------------------
 arch/x86/kernel/geode_32.c       | 22 ----------------------
 arch/x86/kernel/olpc.c           |  4 ++--
 drivers/video/geode/display_gx.c |  2 +-
 drivers/video/geode/lxfb_ops.c   |  2 +-
 include/linux/cs5535.h           | 32 ++++++++++++++++++++++++++++++++
 6 files changed, 36 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 976b3f11c009..df1eaf87426a 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -95,16 +95,6 @@ extern int geode_get_dev_base(unsigned int dev);
 #define PM_AWKD			0x50
 #define PM_SSC			0x54
 
-/* VSA2 magic values */
-
-#define VSA_VRC_INDEX		0xAC1C
-#define VSA_VRC_DATA		0xAC1E
-#define VSA_VR_UNLOCK		0xFC53	/* unlock virtual register */
-#define VSA_VR_SIGNATURE	0x0003
-#define VSA_VR_MEM_SIZE		0x0200
-#define AMD_VSA_SIG		0x4132	/* signature is ascii 'VSA2' */
-#define GSW_VSA_SIG		0x534d  /* General Software signature */
-
 static inline u32 geode_gpio(unsigned int nr)
 {
 	BUG_ON(nr > 28);
@@ -148,15 +138,6 @@ static inline int is_geode(void)
 	return (is_geode_gx() || is_geode_lx());
 }
 
-#ifdef CONFIG_MGEODE_LX
-extern int geode_has_vsa2(void);
-#else
-static inline int geode_has_vsa2(void)
-{
-	return 0;
-}
-#endif
-
 static inline void geode_mfgpt_write(int timer, u16 reg, u16 value)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9b08e852fd1a..9dad6ca6cd70 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -161,28 +161,6 @@ void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
 }
 EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
 
-int geode_has_vsa2(void)
-{
-	static int has_vsa2 = -1;
-
-	if (has_vsa2 == -1) {
-		u16 val;
-
-		/*
-		 * The VSA has virtual registers that we can query for a
-		 * signature.
-		 */
-		outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
-		outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
-
-		val = inw(VSA_VRC_DATA);
-		has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
-	}
-
-	return has_vsa2;
-}
-EXPORT_SYMBOL_GPL(geode_has_vsa2);
-
 static int __init geode_southbridge_init(void)
 {
 	if (!is_geode())
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 4006c522adc7..9d1d263f786f 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -212,7 +212,7 @@ static int __init olpc_init(void)
 	unsigned char *romsig;
 
 	/* The ioremap check is dangerous; limit what we run it on */
-	if (!is_geode() || geode_has_vsa2())
+	if (!is_geode() || cs5535_has_vsa2())
 		return 0;
 
 	spin_lock_init(&ec_lock);
@@ -244,7 +244,7 @@ static int __init olpc_init(void)
 			(unsigned char *) &olpc_platform_info.ecver, 1);
 
 	/* check to see if the VSA exists */
-	if (geode_has_vsa2())
+	if (cs5535_has_vsa2())
 		olpc_platform_info.flags |= OLPC_F_VSA;
 
 	printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
diff --git a/drivers/video/geode/display_gx.c b/drivers/video/geode/display_gx.c
index e759895bf3d3..3ddf055e302e 100644
--- a/drivers/video/geode/display_gx.c
+++ b/drivers/video/geode/display_gx.c
@@ -25,7 +25,7 @@ unsigned int gx_frame_buffer_size(void)
 {
 	unsigned int val;
 
-	if (!geode_has_vsa2()) {
+	if (!cs5535_has_vsa2()) {
 		uint32_t hi, lo;
 
 		/* The number of pages is (PMAX - PMIN)+1 */
diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c
index b1cd49c99356..fe1ee7cbccda 100644
--- a/drivers/video/geode/lxfb_ops.c
+++ b/drivers/video/geode/lxfb_ops.c
@@ -307,7 +307,7 @@ unsigned int lx_framebuffer_size(void)
 {
 	unsigned int val;
 
-	if (!geode_has_vsa2()) {
+	if (!cs5535_has_vsa2()) {
 		uint32_t hi, lo;
 
 		/* The number of pages is (PMAX - PMIN)+1 */
diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h
index 39e93e8ed95d..eb34108a608b 100644
--- a/include/linux/cs5535.h
+++ b/include/linux/cs5535.h
@@ -36,6 +36,38 @@
 #define LBAR_ACPI_SIZE		0x40
 #define LBAR_PMS_SIZE		0x80
 
+/* VSA2 magic values */
+#define VSA_VRC_INDEX		0xAC1C
+#define VSA_VRC_DATA		0xAC1E
+#define VSA_VR_UNLOCK		0xFC53  /* unlock virtual register */
+#define VSA_VR_SIGNATURE	0x0003
+#define VSA_VR_MEM_SIZE		0x0200
+#define AMD_VSA_SIG		0x4132  /* signature is ascii 'VSA2' */
+#define GSW_VSA_SIG		0x534d  /* General Software signature */
+
+#include <linux/io.h>
+
+static inline int cs5535_has_vsa2(void)
+{
+	static int has_vsa2 = -1;
+
+	if (has_vsa2 == -1) {
+		uint16_t val;
+
+		/*
+		 * The VSA has virtual registers that we can query for a
+		 * signature.
+		 */
+		outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
+		outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
+
+		val = inw(VSA_VRC_DATA);
+		has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
+	}
+
+	return has_vsa2;
+}
+
 /* GPIOs */
 #define GPIO_OUTPUT_VAL		0x00
 #define GPIO_OUTPUT_ENABLE	0x04
-- 
cgit v1.2.3


From f3a57a60d3e107d17aebb9e52b61c503e5bc14f9 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Mon, 14 Dec 2009 18:00:40 -0800
Subject: cs5535: define lxfb/gxfb MSRs in linux/cs5535.h

..and include them in the lxfb/gxfb drivers rather than asm/geode.h (where
possible).

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Jordan Crouse <jordan@cosmicpenguin.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Chris Ball <cjb@laptop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/geode.h     | 13 -------------
 drivers/video/geode/display_gx.c |  2 +-
 drivers/video/geode/gxfb.h       |  2 +-
 drivers/video/geode/gxfb_core.c  |  2 +-
 drivers/video/geode/lxfb.h       |  2 +-
 drivers/video/geode/lxfb_ops.c   |  2 +-
 drivers/video/geode/suspend_gx.c |  2 +-
 drivers/video/geode/video_gx.c   |  2 +-
 include/linux/cs5535.h           | 13 +++++++++++++
 9 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index df1eaf87426a..ae104da6ad5a 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -31,25 +31,12 @@ extern int geode_get_dev_base(unsigned int dev);
 
 /* MSRS */
 
-#define MSR_GLIU_P2D_RO0	0x10000029
-
-#define MSR_LX_GLD_MSR_CONFIG	0x48002001
-#define MSR_LX_MSR_PADSEL	0x48002011	/* NOT 0x48000011; the data
-						 * sheet has the wrong value */
-#define MSR_GLCP_SYS_RSTPLL	0x4C000014
-#define MSR_GLCP_DOTPLL		0x4C000015
-
 #define MSR_LBAR_SMB		0x5140000B
 #define MSR_LBAR_GPIO		0x5140000C
 #define MSR_LBAR_MFGPT		0x5140000D
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
-#define MSR_LX_SPARE_MSR	0x80000011	/* DC-specific */
-
-#define MSR_GX_GLD_MSR_CONFIG	0xC0002001
-#define MSR_GX_MSR_PADSEL	0xC0002011
-
 /* Resource Sizes */
 
 #define LBAR_GPIO_SIZE		0xFF
diff --git a/drivers/video/geode/display_gx.c b/drivers/video/geode/display_gx.c
index 3ddf055e302e..f0af911a096d 100644
--- a/drivers/video/geode/display_gx.c
+++ b/drivers/video/geode/display_gx.c
@@ -17,7 +17,7 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 #include <asm/delay.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 
 #include "gxfb.h"
 
diff --git a/drivers/video/geode/gxfb.h b/drivers/video/geode/gxfb.h
index 16a96f8fd8c5..d19e9378b0c0 100644
--- a/drivers/video/geode/gxfb.h
+++ b/drivers/video/geode/gxfb.h
@@ -340,7 +340,7 @@ static inline void write_fp(struct gxfb_par *par, int reg, uint32_t val)
 }
 
 
-/* MSRs are defined in asm/geode.h; their bitfields are here */
+/* MSRs are defined in linux/cs5535.h; their bitfields are here */
 
 #define MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3	(1 << 3)
 #define MSR_GLCP_SYS_RSTPLL_DOTPREMULT2	(1 << 2)
diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c
index 2552cac39e1c..b3e639d1e12c 100644
--- a/drivers/video/geode/gxfb_core.c
+++ b/drivers/video/geode/gxfb_core.c
@@ -32,7 +32,7 @@
 #include <linux/suspend.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 
 #include "gxfb.h"
 
diff --git a/drivers/video/geode/lxfb.h b/drivers/video/geode/lxfb.h
index 6a51448fd3f7..fc68a8b0a144 100644
--- a/drivers/video/geode/lxfb.h
+++ b/drivers/video/geode/lxfb.h
@@ -409,7 +409,7 @@ static inline void write_fp(struct lxfb_par *par, int reg, uint32_t val)
 }
 
 
-/* MSRs are defined in asm/geode.h; their bitfields are here */
+/* MSRs are defined in linux/cs5535.h; their bitfields are here */
 
 #define MSR_GLCP_DOTPLL_LOCK		(1 << 25)	/* r/o */
 #define MSR_GLCP_DOTPLL_HALFPIX		(1 << 24)
diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c
index fe1ee7cbccda..0e5d8c7c3eba 100644
--- a/drivers/video/geode/lxfb_ops.c
+++ b/drivers/video/geode/lxfb_ops.c
@@ -13,7 +13,7 @@
 #include <linux/fb.h>
 #include <linux/uaccess.h>
 #include <linux/delay.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 
 #include "lxfb.h"
 
diff --git a/drivers/video/geode/suspend_gx.c b/drivers/video/geode/suspend_gx.c
index 9aff32ef8bb6..1bb043d70c64 100644
--- a/drivers/video/geode/suspend_gx.c
+++ b/drivers/video/geode/suspend_gx.c
@@ -10,7 +10,7 @@
 #include <linux/fb.h>
 #include <asm/io.h>
 #include <asm/msr.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 #include <asm/delay.h>
 
 #include "gxfb.h"
diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c
index b8d52a8360db..6082f653c68a 100644
--- a/drivers/video/geode/video_gx.c
+++ b/drivers/video/geode/video_gx.c
@@ -16,7 +16,7 @@
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/msr.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
 
 #include "gxfb.h"
 
diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h
index eb34108a608b..d5a1d4810b80 100644
--- a/include/linux/cs5535.h
+++ b/include/linux/cs5535.h
@@ -12,6 +12,14 @@
 #define _CS5535_H
 
 /* MSRs */
+#define MSR_GLIU_P2D_RO0	0x10000029
+
+#define MSR_LX_GLD_MSR_CONFIG	0x48002001
+#define MSR_LX_MSR_PADSEL	0x48002011	/* NOT 0x48000011; the data
+						 * sheet has the wrong value */
+#define MSR_GLCP_SYS_RSTPLL	0x4C000014
+#define MSR_GLCP_DOTPLL		0x4C000015
+
 #define MSR_LBAR_SMB		0x5140000B
 #define MSR_LBAR_GPIO		0x5140000C
 #define MSR_LBAR_MFGPT		0x5140000D
@@ -30,6 +38,11 @@
 #define MSR_MFGPT_NR		0x51400029
 #define MSR_MFGPT_SETUP		0x5140002B
 
+#define MSR_LX_SPARE_MSR	0x80000011	/* DC-specific */
+
+#define MSR_GX_GLD_MSR_CONFIG	0xC0002001
+#define MSR_GX_MSR_PADSEL	0xC0002011
+
 /* resource sizes */
 #define LBAR_GPIO_SIZE		0xFF
 #define LBAR_MFGPT_SIZE		0x40
-- 
cgit v1.2.3


From 5ada918b82399eef3afd6a71e3637697d6bd719f Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bernhard@bwalle.de>
Date: Mon, 14 Dec 2009 18:00:43 -0800
Subject: vt: introduce and use vt_kmsg_redirect() function

The kernel offers with TIOCL_GETKMSGREDIRECT ioctl() the possibility to
redirect the kernel messages to a specific console.

However, since it's not possible to switch to the kernel message console
after a panic(), it would be nice if the kernel would print the panic
message on the current console.

This patch series adds a new interface to access the global kmsg_redirect
variable by a function to be able to use it in code where
CONFIG_VT_CONSOLE is not set (kernel/panic.c).

This patch:

Instead of using and exporting a global value kmsg_redirect, introduce a
function vt_kmsg_redirect() that both can set and return the console where
messages are printed.

Change all users of kmsg_redirect (the VT code itself and kernel/power.c)
to the new interface.

The main advantage is that vt_kmsg_redirect() can also be used when
CONFIG_VT_CONSOLE is not set.

Signed-off-by: Bernhard Walle <bernhard@bwalle.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt.c      | 43 +++++++++++++++++++++++++++++++++++++------
 include/linux/tty.h    |  2 --
 include/linux/vt.h     | 15 +++++++++++++++
 kernel/power/console.c |  7 +++----
 4 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 1e3d728dbf7e..e43fbc66aef0 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -184,12 +184,10 @@ static DECLARE_WORK(console_work, console_callback);
  * fg_console is the current virtual console,
  * last_console is the last used one,
  * want_console is the console we want to switch to,
- * kmsg_redirect is the console for kernel messages,
  */
 int fg_console;
 int last_console;
 int want_console = -1;
-int kmsg_redirect;
 
 /*
  * For each existing display, we have a pointer to console currently visible
@@ -2434,6 +2432,37 @@ struct tty_driver *console_driver;
 
 #ifdef CONFIG_VT_CONSOLE
 
+/**
+ * vt_kmsg_redirect() - Sets/gets the kernel message console
+ * @new:	The new virtual terminal number or -1 if the console should stay
+ * 		unchanged
+ *
+ * By default, the kernel messages are always printed on the current virtual
+ * console. However, the user may modify that default with the
+ * TIOCL_SETKMSGREDIRECT ioctl call.
+ *
+ * This function sets the kernel message console to be @new. It returns the old
+ * virtual console number. The virtual terminal number 0 (both as parameter and
+ * return value) means no redirection (i.e. always printed on the currently
+ * active console).
+ *
+ * The parameter -1 means that only the current console is returned, but the
+ * value is not modified. You may use the macro vt_get_kmsg_redirect() in that
+ * case to make the code more understandable.
+ *
+ * When the kernel is compiled without CONFIG_VT_CONSOLE, this function ignores
+ * the parameter and always returns 0.
+ */
+int vt_kmsg_redirect(int new)
+{
+	static int kmsg_con;
+
+	if (new != -1)
+		return xchg(&kmsg_con, new);
+	else
+		return kmsg_con;
+}
+
 /*
  *	Console on virtual terminal
  *
@@ -2448,6 +2477,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 	const ushort *start;
 	ushort cnt = 0;
 	ushort myx;
+	int kmsg_console;
 
 	/* console busy or not yet initialized */
 	if (!printable)
@@ -2455,8 +2485,9 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 	if (!spin_trylock(&printing_lock))
 		return;
 
-	if (kmsg_redirect && vc_cons_allocated(kmsg_redirect - 1))
-		vc = vc_cons[kmsg_redirect - 1].d;
+	kmsg_console = vt_get_kmsg_redirect();
+	if (kmsg_console && vc_cons_allocated(kmsg_console - 1))
+		vc = vc_cons[kmsg_console - 1].d;
 
 	/* read `x' only after setting currcons properly (otherwise
 	   the `x' macro will read the x of the foreground console). */
@@ -2613,7 +2644,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg)
 			ret = set_vesa_blanking(p);
 			break;
 		case TIOCL_GETKMSGREDIRECT:
-			data = kmsg_redirect;
+			data = vt_get_kmsg_redirect();
 			ret = __put_user(data, p);
 			break;
 		case TIOCL_SETKMSGREDIRECT:
@@ -2623,7 +2654,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg)
 				if (get_user(data, p+1))
 					ret = -EFAULT;
 				else
-					kmsg_redirect = data;
+					vt_kmsg_redirect(data);
 			}
 			break;
 		case TIOCL_GETFGCONSOLE:
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 405a9035fe40..ef3a2947b102 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -350,8 +350,6 @@ extern void tty_write_flush(struct tty_struct *);
 
 extern struct ktermios tty_std_termios;
 
-extern int kmsg_redirect;
-
 extern void console_init(void);
 extern int vcs_init(void);
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 7ffa11f06232..3fb9944e50a6 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -84,4 +84,19 @@ struct vt_setactivate {
 
 #define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
 
+#ifdef CONFIG_VT_CONSOLE
+
+extern int vt_kmsg_redirect(int new);
+
+#else
+
+static inline int vt_kmsg_redirect(int new)
+{
+	return 0;
+}
+
+#endif
+
+#define vt_get_kmsg_redirect() vt_kmsg_redirect(-1)
+
 #endif /* _LINUX_VT_H */
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 5187136fe1de..218e5af90156 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -6,7 +6,7 @@
 
 #include <linux/vt_kern.h>
 #include <linux/kbd_kern.h>
-#include <linux/console.h>
+#include <linux/vt.h>
 #include <linux/module.h>
 #include "power.h"
 
@@ -21,8 +21,7 @@ int pm_prepare_console(void)
 	if (orig_fgconsole < 0)
 		return 1;
 
-	orig_kmsg = kmsg_redirect;
-	kmsg_redirect = SUSPEND_CONSOLE;
+	orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
 	return 0;
 }
 
@@ -30,7 +29,7 @@ void pm_restore_console(void)
 {
 	if (orig_fgconsole >= 0) {
 		vt_move_to_console(orig_fgconsole, 0);
-		kmsg_redirect = orig_kmsg;
+		vt_kmsg_redirect(orig_kmsg);
 	}
 }
 #endif
-- 
cgit v1.2.3


From 7707e61c70999a1f9f1fd9ac92e293c198585152 Mon Sep 17 00:00:00 2001
From: André Goddard Rosa <andre.goddard@gmail.com>
Date: Mon, 14 Dec 2009 18:01:02 -0800
Subject: ctype: constify read-only _ctype string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While at it, use tabs to indent the comments.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ctype.h |  2 +-
 lib/ctype.c           | 50 +++++++++++++++++++++++++-------------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ctype.h b/include/linux/ctype.h
index afa36392297a..6dec944a370b 100644
--- a/include/linux/ctype.h
+++ b/include/linux/ctype.h
@@ -15,7 +15,7 @@
 #define _X	0x40	/* hex digit */
 #define _SP	0x80	/* hard space (0x20) */
 
-extern unsigned char _ctype[];
+extern const unsigned char _ctype[];
 
 #define __ismask(x) (_ctype[(int)(unsigned char)(x)])
 
diff --git a/lib/ctype.c b/lib/ctype.c
index d02ace14a322..26baa620e95b 100644
--- a/lib/ctype.c
+++ b/lib/ctype.c
@@ -7,30 +7,30 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,			/* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,				/* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,				/* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,				/* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,				/* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,		/* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,				/* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,		/* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,				/* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,	/* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,	/* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,	/* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};	/* 240-255 */
 
 EXPORT_SYMBOL(_ctype);
-- 
cgit v1.2.3


From f653398c86a1c104f0992bd788dd4bb065449be4 Mon Sep 17 00:00:00 2001
From: André Goddard Rosa <andre.goddard@gmail.com>
Date: Mon, 14 Dec 2009 18:01:04 -0800
Subject: string: factorize skip_spaces and export it to be generally available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On the following sentence:
    while (*s && isspace(*s))
        s++;

If *s == 0, isspace() evaluates to ((_ctype[*s] & 0x20) != 0), which
evaluates to ((0x08 & 0x20) != 0) which equals to 0 as well.
If *s == 1, we depend on isspace() result anyway. In other words,
"a char equals zero is never a space", so remove this check.

Also, *s != 0 is most common case (non-null string).

Fixed const return as noticed by Jan Engelhardt and James Bottomley.
Fixed unnecessary extra cast on strstrip() as noticed by Jan Engelhardt.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ctype.h  |  1 +
 include/linux/string.h |  1 +
 lib/string.c           | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ctype.h b/include/linux/ctype.h
index 6dec944a370b..a3d6ee0044f9 100644
--- a/include/linux/ctype.h
+++ b/include/linux/ctype.h
@@ -27,6 +27,7 @@ extern const unsigned char _ctype[];
 #define islower(c)	((__ismask(c)&(_L)) != 0)
 #define isprint(c)	((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
 #define ispunct(c)	((__ismask(c)&(_P)) != 0)
+/* Note: isspace() must return false for %NUL-terminator */
 #define isspace(c)	((__ismask(c)&(_S)) != 0)
 #define isupper(c)	((__ismask(c)&(_U)) != 0)
 #define isxdigit(c)	((__ismask(c)&(_D|_X)) != 0)
diff --git a/include/linux/string.h b/include/linux/string.h
index b8508868d5ad..168dad11ae03 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -62,6 +62,7 @@ extern char * strnchr(const char *, size_t, int);
 #ifndef __HAVE_ARCH_STRRCHR
 extern char * strrchr(const char *,int);
 #endif
+extern char * __must_check skip_spaces(const char *);
 extern char * __must_check strstrip(char *);
 #ifndef __HAVE_ARCH_STRSTR
 extern char * strstr(const char *,const char *);
diff --git a/lib/string.c b/lib/string.c
index e96421ab9a9a..3a912a4e9a63 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -337,6 +337,20 @@ char *strnchr(const char *s, size_t count, int c)
 EXPORT_SYMBOL(strnchr);
 #endif
 
+/**
+ * skip_spaces - Removes leading whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Returns a pointer to the first non-whitespace character in @s.
+ */
+char *skip_spaces(const char *str)
+{
+	while (isspace(*str))
+		++str;
+	return (char *)str;
+}
+EXPORT_SYMBOL(skip_spaces);
+
 /**
  * strstrip - Removes leading and trailing whitespace from @s.
  * @s: The string to be stripped.
@@ -360,10 +374,7 @@ char *strstrip(char *s)
 		end--;
 	*(end + 1) = '\0';
 
-	while (*s && isspace(*s))
-		s++;
-
-	return s;
+	return skip_spaces(s);
 }
 EXPORT_SYMBOL(strstrip);
 
-- 
cgit v1.2.3


From 925ede0bf4ecef96fc2d939b16619530111aa16e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Dec 2009 18:01:14 -0800
Subject: efi.h: use %pUl to print UUIDs

Shrinks vmlinux

without:
$ size vmlinux
   text    data     bss     dec     hex filename
6975863  679652 1359668 9015183  898f8f vmlinux

with:
$ size vmlinux
   text    data     bss     dec     hex filename
6975639 679652 1359668 9014959 898eaf vmlinux

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Jeff Garzik <jgarzik@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/efi.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index ce4581fbc08b..fb737bc19a8c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -280,11 +280,7 @@ efi_guidcmp (efi_guid_t left, efi_guid_t right)
 static inline char *
 efi_guid_unparse(efi_guid_t *guid, char *out)
 {
-	sprintf(out, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-		guid->b[3], guid->b[2], guid->b[1], guid->b[0],
-		guid->b[5], guid->b[4], guid->b[7], guid->b[6],
-		guid->b[8], guid->b[9], guid->b[10], guid->b[11],
-		guid->b[12], guid->b[13], guid->b[14], guid->b[15]);
+	sprintf(out, "%pUl", guid->b);
         return out;
 }
 
-- 
cgit v1.2.3


From ca54cb8c9eb38095dc420b73c6380ce1dbeb10fa Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 14 Dec 2009 18:01:15 -0800
Subject: Subject: Re: [PATCH] strstrip incorrectly marked __must_check

Recently, We marked strstrip() as must_check.  because it was frequently
misused and it should be checked.  However, we found one exception.
scsi/ipr.c intentionally ignore return value of strstrip.  Because it
wishes to keep the whitespace at the beginning.

Thus we need to keep with and without checked whitespace trim function.
This patch adds a new strim() and changes ipr.c to use it.

[akpm@linux-foundation.org: coding-style fixes]
Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/ipr.c     | 4 ++--
 include/linux/string.h | 9 ++++++++-
 lib/string.c           | 6 +++---
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 206c2fa8c1ba..8643f5089361 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -1333,7 +1333,7 @@ static void ipr_log_enhanced_dual_ioa_error(struct ipr_ioa_cfg *ioa_cfg,
 
 	error = &hostrcb->hcam.u.error.u.type_17_error;
 	error->failure_reason[sizeof(error->failure_reason) - 1] = '\0';
-	strstrip(error->failure_reason);
+	strim(error->failure_reason);
 
 	ipr_hcam_err(hostrcb, "%s [PRC: %08X]\n", error->failure_reason,
 		     be32_to_cpu(hostrcb->hcam.u.error.prc));
@@ -1359,7 +1359,7 @@ static void ipr_log_dual_ioa_error(struct ipr_ioa_cfg *ioa_cfg,
 
 	error = &hostrcb->hcam.u.error.u.type_07_error;
 	error->failure_reason[sizeof(error->failure_reason) - 1] = '\0';
-	strstrip(error->failure_reason);
+	strim(error->failure_reason);
 
 	ipr_hcam_err(hostrcb, "%s [PRC: %08X]\n", error->failure_reason,
 		     be32_to_cpu(hostrcb->hcam.u.error.prc));
diff --git a/include/linux/string.h b/include/linux/string.h
index 168dad11ae03..651839a2a755 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -63,7 +63,14 @@ extern char * strnchr(const char *, size_t, int);
 extern char * strrchr(const char *,int);
 #endif
 extern char * __must_check skip_spaces(const char *);
-extern char * __must_check strstrip(char *);
+
+extern char *strim(char *);
+
+static inline __must_check char *strstrip(char *str)
+{
+	return strim(str);
+}
+
 #ifndef __HAVE_ARCH_STRSTR
 extern char * strstr(const char *,const char *);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 765566a6a088..afce96af3afd 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -352,14 +352,14 @@ char *skip_spaces(const char *str)
 EXPORT_SYMBOL(skip_spaces);
 
 /**
- * strstrip - Removes leading and trailing whitespace from @s.
+ * strim - Removes leading and trailing whitespace from @s.
  * @s: The string to be stripped.
  *
  * Note that the first trailing whitespace is replaced with a %NUL-terminator
  * in the given string @s. Returns a pointer to the first non-whitespace
  * character in @s.
  */
-char *strstrip(char *s)
+char *strim(char *s)
 {
 	size_t size;
 	char *end;
@@ -376,7 +376,7 @@ char *strstrip(char *s)
 
 	return s;
 }
-EXPORT_SYMBOL(strstrip);
+EXPORT_SYMBOL(strim);
 
 #ifndef __HAVE_ARCH_STRLEN
 /**
-- 
cgit v1.2.3


From 2635d1ba711560d521f6218c585a3e0401f566e1 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 14 Dec 2009 18:01:30 -0800
Subject: atmel-mci: change use of dma slave interface

Allow the use of another DMA controller driver in atmel-mci sd/mmc driver.
 This adds a generic dma_slave pointer to the mci platform structure where
we can store DMA controller information.  In atmel-mci we use information
provided by this structure to initialize the driver (with new helper
functions that are architecture dependant).

This also adds at32/avr32 chip modifications to cope with this new access
method.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-at91/include/mach/atmel-mci.h     | 24 +++++++++++
 arch/avr32/mach-at32ap/at32ap700x.c             | 18 +++++---
 arch/avr32/mach-at32ap/include/mach/atmel-mci.h | 24 +++++++++++
 drivers/mmc/host/atmel-mci.c                    | 56 ++++++++++++++++---------
 include/linux/atmel-mci.h                       |  4 +-
 5 files changed, 98 insertions(+), 28 deletions(-)
 create mode 100644 arch/arm/mach-at91/include/mach/atmel-mci.h
 create mode 100644 arch/avr32/mach-at32ap/include/mach/atmel-mci.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/include/mach/atmel-mci.h b/arch/arm/mach-at91/include/mach/atmel-mci.h
new file mode 100644
index 000000000000..998cb0c07135
--- /dev/null
+++ b/arch/arm/mach-at91/include/mach/atmel-mci.h
@@ -0,0 +1,24 @@
+#ifndef __MACH_ATMEL_MCI_H
+#define __MACH_ATMEL_MCI_H
+
+#include <mach/at_hdmac.h>
+
+/**
+ * struct mci_dma_data - DMA data for MCI interface
+ */
+struct mci_dma_data {
+	struct at_dma_slave	sdata;
+};
+
+/* accessor macros */
+#define	slave_data_ptr(s)	(&(s)->sdata)
+#define find_slave_dev(s)	((s)->sdata.dma_dev)
+
+#define	setup_dma_addr(s, t, r)	do {		\
+	if (s) {				\
+		(s)->sdata.tx_reg = (t);	\
+		(s)->sdata.rx_reg = (r);	\
+	}					\
+} while (0)
+
+#endif /* __MACH_ATMEL_MCI_H */
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index eb9d4dc2e86d..b40ff39e0ac8 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -15,6 +15,8 @@
 #include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/atmel_usba_udc.h>
+
+#include <mach/atmel-mci.h>
 #include <linux/atmel-mci.h>
 
 #include <asm/io.h>
@@ -1320,7 +1322,7 @@ struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
 	struct platform_device		*pdev;
-	struct dw_dma_slave		*dws = &data->dma_slave;
+	struct mci_dma_slave		*slave;
 	u32				pioa_mask;
 	u32				piob_mask;
 
@@ -1339,13 +1341,17 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
-	dws->dma_dev = &dw_dmac0_device.dev;
-	dws->reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
-	dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
+	slave = kzalloc(sizeof(struct mci_dma_slave), GFP_KERNEL);
+
+	slave->sdata.dma_dev = &dw_dmac0_device.dev;
+	slave->sdata.reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
+	slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0)
 				| DWC_CFGH_DST_PER(1));
-	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
+	slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL
 				| DWC_CFGL_HS_SRC_POL);
 
+	data->dma_slave = slave;
+
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
 		goto fail;
@@ -1411,6 +1417,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 	return pdev;
 
 fail:
+	data->dma_slave = NULL;
+	kfree(slave);
 	platform_device_put(pdev);
 	return NULL;
 }
diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
new file mode 100644
index 000000000000..a9b38967f703
--- /dev/null
+++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
@@ -0,0 +1,24 @@
+#ifndef __MACH_ATMEL_MCI_H
+#define __MACH_ATMEL_MCI_H
+
+#include <linux/dw_dmac.h>
+
+/**
+ * struct mci_dma_data - DMA data for MCI interface
+ */
+struct mci_dma_data {
+	struct dw_dma_slave	sdata;
+};
+
+/* accessor macros */
+#define	slave_data_ptr(s)	(&(s)->sdata)
+#define find_slave_dev(s)	((s)->sdata.dma_dev)
+
+#define	setup_dma_addr(s, t, r)	do {		\
+	if (s) {				\
+		(s)->sdata.tx_reg = (t);	\
+		(s)->sdata.rx_reg = (r);	\
+	}					\
+} while (0)
+
+#endif /* __MACH_ATMEL_MCI_H */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index fc25586b7ee1..ba8b219d44c1 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -25,6 +25,8 @@
 #include <linux/stat.h>
 
 #include <linux/mmc/host.h>
+
+#include <mach/atmel-mci.h>
 #include <linux/atmel-mci.h>
 
 #include <asm/io.h>
@@ -1584,14 +1586,43 @@ static void __exit atmci_cleanup_slot(struct atmel_mci_slot *slot,
 #ifdef CONFIG_MMC_ATMELMCI_DMA
 static bool filter(struct dma_chan *chan, void *slave)
 {
-	struct dw_dma_slave *dws = slave;
+	struct mci_dma_data	*sl = slave;
 
-	if (dws->dma_dev == chan->device->dev) {
-		chan->private = dws;
+	if (sl && find_slave_dev(sl) == chan->device->dev) {
+		chan->private = slave_data_ptr(sl);
 		return true;
-	} else
+	} else {
 		return false;
+	}
 }
+
+static void atmci_configure_dma(struct atmel_mci *host)
+{
+	struct mci_platform_data	*pdata;
+
+	if (host == NULL)
+		return;
+
+	pdata = host->pdev->dev.platform_data;
+
+	if (pdata && find_slave_dev(pdata->dma_slave)) {
+		dma_cap_mask_t mask;
+
+		setup_dma_addr(pdata->dma_slave,
+			       host->mapbase + MCI_TDR,
+			       host->mapbase + MCI_RDR);
+
+		/* Try to grab a DMA channel */
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+		host->dma.chan =
+			dma_request_channel(mask, filter, pdata->dma_slave);
+	}
+	if (!host->dma.chan)
+		dev_notice(&host->pdev->dev, "DMA not available, using PIO\n");
+}
+#else
+static void atmci_configure_dma(struct atmel_mci *host) {}
 #endif
 
 static int __init atmci_probe(struct platform_device *pdev)
@@ -1645,22 +1676,7 @@ static int __init atmci_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_request_irq;
 
-#ifdef CONFIG_MMC_ATMELMCI_DMA
-	if (pdata->dma_slave.dma_dev) {
-		struct dw_dma_slave *dws = &pdata->dma_slave;
-		dma_cap_mask_t mask;
-
-		dws->tx_reg = regs->start + MCI_TDR;
-		dws->rx_reg = regs->start + MCI_RDR;
-
-		/* Try to grab a DMA channel */
-		dma_cap_zero(mask);
-		dma_cap_set(DMA_SLAVE, mask);
-		host->dma.chan = dma_request_channel(mask, filter, dws);
-	}
-	if (!host->dma.chan)
-		dev_notice(&pdev->dev, "DMA not available, using PIO\n");
-#endif /* CONFIG_MMC_ATMELMCI_DMA */
+	atmci_configure_dma(host);
 
 	platform_set_drvdata(pdev, host);
 
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 57b1846a3c87..3e09b345f4d6 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -3,8 +3,6 @@
 
 #define ATMEL_MCI_MAX_NR_SLOTS	2
 
-#include <linux/dw_dmac.h>
-
 /**
  * struct mci_slot_pdata - board-specific per-slot configuration
  * @bus_width: Number of data lines wired up the slot
@@ -34,7 +32,7 @@ struct mci_slot_pdata {
  * @slot: Per-slot configuration data.
  */
 struct mci_platform_data {
-	struct dw_dma_slave	dma_slave;
+	struct mci_dma_data	*dma_slave;
 	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
 };
 
-- 
cgit v1.2.3


From e40d6eaa79bc9d9d347c3c51fe0c9204e9025b79 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Mon, 14 Dec 2009 18:01:35 -0800
Subject: lis3lv02d: axis remap and resource setup/release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the possibility to remap axes via platform data.  Function pointers
for resource setup and release purposes

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Éric Piel <eric.piel@tremplin-utc.net>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Jean Delvare <khali@linux-fr.org>
Cc:  "Trisal, Kalhan" <kalhan.trisal@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lis3lv02d.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 3cc2f2c53e4c..89701355c74f 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -43,6 +43,18 @@ struct lis3lv02d_platform_data {
 #define LIS3_WAKEUP_Z_HI	(1 << 5)
 	unsigned char wakeup_flags;
 	unsigned char wakeup_thresh;
+#define LIS3_NO_MAP		0
+#define LIS3_DEV_X		1
+#define LIS3_DEV_Y		2
+#define LIS3_DEV_Z		3
+#define LIS3_INV_DEV_X	       -1
+#define LIS3_INV_DEV_Y	       -2
+#define LIS3_INV_DEV_Z	       -3
+	s8 axis_x;
+	s8 axis_y;
+	s8 axis_z;
+	int (*setup_resources)(void);
+	int (*release_resources)(void);
 };
 
 #endif /* __LIS3LV02D_H_ */
-- 
cgit v1.2.3


From 2db4a76d5f3554e9e5632c8f91828313318579c8 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Mon, 14 Dec 2009 18:01:43 -0800
Subject: lis3: selftest support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement selftest feature as specified by chip manufacturer.  Control:
read selftest sysfs entry

Response: "OK x y z" or "FAIL x y z"

where x, y, and z are difference between selftest mode and normal mode.
Test is passed when values are within acceptance limit values.

Acceptance limits are provided via platform data.  See chip spesifications
for acceptance limits.  If limits are not properly set, OK / FAIL decision
is meaningless.  However, userspace application can still make decision
based on the numeric x, y, z values.

Selftest is meant for HW diagnostic purposes.  It is not meant to be
called during normal use of the chip.  It may cause false interrupt
events.  Selftest mode delays polling of the normal results but it doesn't
cause wrong values.  Chip must be in static state during selftest.  Any
acceration during the test causes most probably failure.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Éric Piel <Eric.Piel@tremplin-utc.net>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/lis3lv02d.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/hwmon/lis3lv02d.h | 14 ++++++++--
 include/linux/lis3lv02d.h |  3 +++
 3 files changed, 81 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 39b9ac8e18ed..55ec883e2026 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -106,9 +106,11 @@ static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z)
 {
 	int position[3];
 
+	mutex_lock(&lis3->mutex);
 	position[0] = lis3->read_data(lis3, OUTX);
 	position[1] = lis3->read_data(lis3, OUTY);
 	position[2] = lis3->read_data(lis3, OUTZ);
+	mutex_unlock(&lis3->mutex);
 
 	*x = lis3lv02d_get_axis(lis3->ac.x, position);
 	*y = lis3lv02d_get_axis(lis3->ac.y, position);
@@ -133,6 +135,55 @@ static int lis3lv02d_get_odr(void)
 	return val;
 }
 
+static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3])
+{
+	u8 reg;
+	s16 x, y, z;
+	u8 selftest;
+	int ret;
+
+	mutex_lock(&lis3->mutex);
+	if (lis3_dev.whoami == WAI_12B)
+		selftest = CTRL1_ST;
+	else
+		selftest = CTRL1_STP;
+
+	lis3->read(lis3, CTRL_REG1, &reg);
+	lis3->write(lis3, CTRL_REG1, (reg | selftest));
+	msleep(lis3->pwron_delay / lis3lv02d_get_odr());
+
+	/* Read directly to avoid axis remap */
+	x = lis3->read_data(lis3, OUTX);
+	y = lis3->read_data(lis3, OUTY);
+	z = lis3->read_data(lis3, OUTZ);
+
+	/* back to normal settings */
+	lis3->write(lis3, CTRL_REG1, reg);
+	msleep(lis3->pwron_delay / lis3lv02d_get_odr());
+
+	results[0] = x - lis3->read_data(lis3, OUTX);
+	results[1] = y - lis3->read_data(lis3, OUTY);
+	results[2] = z - lis3->read_data(lis3, OUTZ);
+
+	ret = 0;
+	if (lis3->pdata) {
+		int i;
+		for (i = 0; i < 3; i++) {
+			/* Check against selftest acceptance limits */
+			if ((results[i] < lis3->pdata->st_min_limits[i]) ||
+			    (results[i] > lis3->pdata->st_max_limits[i])) {
+				ret = -EIO;
+				goto fail;
+			}
+		}
+	}
+
+	/* test passed */
+fail:
+	mutex_unlock(&lis3->mutex);
+	return ret;
+}
+
 void lis3lv02d_poweroff(struct lis3lv02d *lis3)
 {
 	/* disable X,Y,Z axis and power down */
@@ -365,6 +416,17 @@ void lis3lv02d_joystick_disable(void)
 EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable);
 
 /* Sysfs stuff */
+static ssize_t lis3lv02d_selftest_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int result;
+	s16 values[3];
+
+	result = lis3lv02d_selftest(&lis3_dev, values);
+	return sprintf(buf, "%s %d %d %d\n", result == 0 ? "OK" : "FAIL",
+		values[0], values[1], values[2]);
+}
+
 static ssize_t lis3lv02d_position_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -394,12 +456,14 @@ static ssize_t lis3lv02d_rate_show(struct device *dev,
 	return sprintf(buf, "%d\n", lis3lv02d_get_odr());
 }
 
+static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL);
 static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL);
 static DEVICE_ATTR(calibrate, S_IRUGO|S_IWUSR, lis3lv02d_calibrate_show,
 	lis3lv02d_calibrate_store);
 static DEVICE_ATTR(rate, S_IRUGO, lis3lv02d_rate_show, NULL);
 
 static struct attribute *lis3lv02d_attributes[] = {
+	&dev_attr_selftest.attr,
 	&dev_attr_position.attr,
 	&dev_attr_calibrate.attr,
 	&dev_attr_rate.attr,
@@ -455,6 +519,8 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 		return -EINVAL;
 	}
 
+	mutex_init(&dev->mutex);
+
 	lis3lv02d_add_fs(dev);
 	lis3lv02d_poweron(dev);
 
@@ -507,4 +573,3 @@ EXPORT_SYMBOL_GPL(lis3lv02d_init_device);
 MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver");
 MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
 MODULE_LICENSE("GPL");
-
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index c57f21f45676..166794cb91a3 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -98,7 +98,7 @@ enum lis3_who_am_i {
 	WAI_6B		= 0x52, /* 6 bits: LIS331DLF - not supported */
 };
 
-enum lis3lv02d_ctrl1 {
+enum lis3lv02d_ctrl1_12b {
 	CTRL1_Xen	= 0x01,
 	CTRL1_Yen	= 0x02,
 	CTRL1_Zen	= 0x04,
@@ -107,8 +107,17 @@ enum lis3lv02d_ctrl1 {
 	CTRL1_DF1	= 0x20,
 	CTRL1_PD0	= 0x40,
 	CTRL1_PD1	= 0x80,
-	CTRL1_DR	= 0x80, /* Data rate on 8 bits */
 };
+
+/* Delta to ctrl1_12b version */
+enum lis3lv02d_ctrl1_8b {
+	CTRL1_STM	= 0x08,
+	CTRL1_STP	= 0x10,
+	CTRL1_FS	= 0x20,
+	CTRL1_PD	= 0x40,
+	CTRL1_DR	= 0x80,
+};
+
 enum lis3lv02d_ctrl2 {
 	CTRL2_DAS	= 0x01,
 	CTRL2_SIM	= 0x02,
@@ -218,6 +227,7 @@ struct lis3lv02d {
 	unsigned long		misc_opened; /* bit0: whether the device is open */
 
 	struct lis3lv02d_platform_data *pdata;	/* for passing board config */
+	struct mutex		mutex;     /* Serialize poll and selftest */
 };
 
 int lis3lv02d_init_device(struct lis3lv02d *lis3);
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 89701355c74f..f1ca0dcc1628 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -55,6 +55,9 @@ struct lis3lv02d_platform_data {
 	s8 axis_z;
 	int (*setup_resources)(void);
 	int (*release_resources)(void);
+	/* Limits for selftest are specified in chip data sheet */
+	s16 st_min_limits[3]; /* min pass limit x, y, z */
+	s16 st_max_limits[3]; /* max pass limit x, y, z */
 };
 
 #endif /* __LIS3LV02D_H_ */
-- 
cgit v1.2.3


From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:53 -0800
Subject: rpc: add rpc_queue_empty function

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/sched.c           | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 1906782ec86b..9157405f9320 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -229,6 +229,7 @@ void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
+int		rpc_queue_empty(struct rpc_wait_queue *);
 void		rpc_delay(struct rpc_task *, unsigned long);
 void *		rpc_malloc(struct rpc_task *, size_t);
 void		rpc_free(void *);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef74ba0666c..89ea8e69ec78 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
 		__rpc_do_wake_up_task(queue, task);
 }
 
+/*
+ * Tests whether rpc queue is empty
+ */
+int rpc_queue_empty(struct rpc_wait_queue *queue)
+{
+	int res;
+
+	spin_lock_bh(&queue->lock);
+	res = queue->qlen;
+	spin_unlock_bh(&queue->lock);
+	return (res == 0);
+}
+EXPORT_SYMBOL_GPL(rpc_queue_empty);
+
 /*
  * Wake up a task on a specific queue
  */
-- 
cgit v1.2.3


From cf3b01b54880debb01ea7d471123da5887a7c2cb Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:55 -0800
Subject: rpc: add a new priority in RPC task

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9157405f9320..7bc7fd5291ce 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -173,7 +173,8 @@ struct rpc_task_setup {
 #define RPC_PRIORITY_LOW	(-1)
 #define RPC_PRIORITY_NORMAL	(0)
 #define RPC_PRIORITY_HIGH	(1)
-#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
+#define RPC_PRIORITY_PRIVILEGED	(2)
+#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW)
 
 struct rpc_timer {
 	struct timer_list timer;
@@ -255,6 +256,16 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 	return __rpc_wait_for_completion_task(task, NULL);
 }
 
+static inline void rpc_task_set_priority(struct rpc_task *task, unsigned char prio)
+{
+	task->tk_priority = prio - RPC_PRIORITY_LOW;
+}
+
+static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char prio)
+{
+	return (task->tk_priority + RPC_PRIORITY_LOW == prio);
+}
+
 #ifdef RPC_DEBUG
 static inline const char * rpc_qname(struct rpc_wait_queue *q)
 {
-- 
cgit v1.2.3


From eb4c86c6a5adec423c9e615d4937fdddd06a16c5 Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Wed, 9 Sep 2009 14:58:22 -0400
Subject: nfsd: introduce export flag for v4 pseudoroot

NFSv4 differs from v2 and v3 in that it presents a single unified
filesystem tree, whereas v2 and v3 exported multiple filesystem (whose
roots could be found using a separate mount protocol).

Our original NFSv4 server implementation asked the administrator to
designate a single filesystem as the NFSv4 root, then to mount
filesystems they wished to export underneath.  (Often using bind mounts
of already-existing filesystems.)

This was conceptually simple, and allowed easy implementation, but
created a serious obstacle to upgrading between v2/v3: since the paths
to v4 filesystems were different, administrators would have to adjust
all the paths in client-side mount commands when switching to v4.

Various workarounds are possible.  For example, the administrator could
export "/" and designate it as the v4 root.  However, the security risks
of that approach are obvious, and in any case we shouldn't be requiring
the administrator to take extra steps to fix this problem; instead, the
server should present consistent paths across different versions by
default.

These patches take a modified version of that approach: we provide a new
export option which exports only a subset of a filesystem.  With this
flag, it becomes safe for mountd to export "/" by default, with no need
for additional configuration.

We begin just by defining the new flag.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c            |  1 +
 include/linux/nfsd/export.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cb3dae2fcd86..c64d55f319bd 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1425,6 +1425,7 @@ static struct flags {
 	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+	{ NFSEXP_V4ROOT, {"v4root", ""}},
 #ifdef MSNFS
 	{ NFSEXP_MSNFS, {"msnfs", ""}},
 #endif
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4cafbe1255f0..41f0d4e25374 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,6 +39,16 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
+/*
+ * The NFSEXP_V4ROOT flag causes the kernel to give access only to NFSv4
+ * clients, and only to the single directory that is the root of the
+ * export; further lookup and readdir operations are treated as if every
+ * subdirectory was a mountpoint, and ignored if they are not themselves
+ * exported.  This is used by nfsd and mountd to construct the NFSv4
+ * pseudofilesystem, which provides access only to paths leading to each
+ * exported filesystem.
+ */
+#define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
 #define NFSEXP_ALLFLAGS		0x7E3F
 
-- 
cgit v1.2.3


From f13c12c634e124d5d31f912b969d542a016d6105 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 15 Dec 2009 19:43:11 +0100
Subject: perf_events: Fix perf_event_attr layout

The miss-alignment of bp_addr created a 32bit hole, causing
different structure packings on 32 and 64 bit machines.

Fix that by moving __reserve_2 into that hole.

Further, remove the useless struct and redundant __bp_reserve
muck.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1260902591.8023.781.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 12 +++---------
 kernel/perf_event.c        |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 64a53f74c9a9..5fcbf7d2712a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,17 +211,11 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	struct { /* Hardware breakpoint info */
-		__u64		bp_addr;
-		__u32		bp_type;
-		__u32		bp_len;
-		__u64		__bp_reserved_1;
-		__u64		__bp_reserved_2;
-	};
-
 	__u32			__reserved_2;
 
-	__u64			__reserved_3;
+	__u64			bp_addr;
+	__u32			bp_type;
+	__u32			bp_len;
 };
 
 /*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8823b0885183..0dd8e5d02c66 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4564,7 +4564,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+	if (attr->__reserved_1 || attr->__reserved_2)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f2511774863487e61b56a97da07ebf8dd61d7836 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 13 Dec 2009 20:29:01 +0100
Subject: PM: Add initcall_debug style timing for suspend/resume

In order to diagnose overall suspend/resume times, we need
basic instrumentation to break down the total time into per
device timing, similar to initcall_debug.

This patch adds the basic timing instrumentation, needed
for a scritps/bootgraph.pl equivalent or humans.
The bootgraph.pl program is still a work in progress, but
is far enough along to know that this patch is sufficient.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 31 +++++++++++++++++++++++++++++++
 include/linux/init.h      |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 8aa2443182d5..30f0ceebd36c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -25,6 +25,7 @@
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 
 #include "../base.h"
 #include "power.h"
@@ -172,6 +173,13 @@ static int pm_op(struct device *dev,
 		 pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -219,6 +227,14 @@ static int pm_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
@@ -236,6 +252,13 @@ static int pm_noirq_op(struct device *dev,
 			pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s_i+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -283,6 +306,14 @@ static int pm_noirq_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		printk("initcall %s_i+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
diff --git a/include/linux/init.h b/include/linux/init.h
index ff8bde520d03..ab1d31f9352b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -149,6 +149,8 @@ void prepare_namespace(void);
 
 extern void (*late_time_init)(void);
 
+extern int initcall_debug;
+
 #endif
   
 #ifndef MODULE
-- 
cgit v1.2.3


From 3d8986c7585457c45fd349b2c542c7c1ecd20843 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Dec 2009 14:09:03 -0500
Subject: nfsd: enable V4ROOT exports

With the v4root option now enforced everywhere it should be, it is safe
to advertise support for it to mountd.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 41f0d4e25374..8ae78a61eea4 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -50,7 +50,7 @@
  */
 #define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x7E3F
+#define NFSEXP_ALLFLAGS		0x17E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From c7af6b0895229bd080b86afc91302b66f6df0378 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 18:29:33 -0500
Subject: nfsd: remove unused field rq_reffh

This field is never referenced anywhere else.  I don't know what it was
intended for.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d1567d627557..5a3085b9b394 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -273,10 +273,6 @@ struct svc_rqst {
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	struct knfsd_fh *	rq_reffh;	/* Referrence filehandle, used to
-						 * determine what device number
-						 * to report (real or virtual)
-						 */
 	int			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-- 
cgit v1.2.3


From 1557aca7904ed6fadd22cdc3364754070bb3d3c3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 19:36:06 -0500
Subject: nfsd: move most of nfsfh.h to fs/nfsd

Most of this can be trivially moved to a private header as well.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c           |   1 +
 fs/nfsd/nfsfh.h            | 208 +++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h            |   1 +
 fs/nfsd/vfs.h              |   2 +
 fs/nfsd/xdr.h              |   1 +
 include/linux/nfsd/nfsfh.h | 199 -------------------------------------------
 6 files changed, 213 insertions(+), 199 deletions(-)
 create mode 100644 fs/nfsd/nfsfh.h

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7d5ba1b0ffcf..b26a3644fbb9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
 #include <net/ipv6.h>
 
 #include "nfsd.h"
+#include "nfsfh.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
+/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+
+#ifndef _LINUX_NFSD_FH_INT_H
+#define _LINUX_NFSD_FH_INT_H
+
+#include <linux/nfsd/nfsfh.h>
+
+enum nfsd_fsid {
+	FSID_DEV = 0,
+	FSID_NUM,
+	FSID_MAJOR_MINOR,
+	FSID_ENCODE_DEV,
+	FSID_UUID4_INUM,
+	FSID_UUID8,
+	FSID_UUID16,
+	FSID_UUID16_INUM,
+};
+
+enum fsid_source {
+	FSIDSOURCE_DEV,
+	FSIDSOURCE_FSID,
+	FSIDSOURCE_UUID,
+};
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
+
+
+/* This might look a little large to "inline" but in all calls except
+ * one, 'vers' is constant so moste of the function disappears.
+ */
+static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
+			   u32 fsid, unsigned char *uuid)
+{
+	u32 *up;
+	switch(vers) {
+	case FSID_DEV:
+		fsidv[0] = htonl((MAJOR(dev)<<16) |
+				 MINOR(dev));
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+	case FSID_NUM:
+		fsidv[0] = fsid;
+		break;
+	case FSID_MAJOR_MINOR:
+		fsidv[0] = htonl(MAJOR(dev));
+		fsidv[1] = htonl(MINOR(dev));
+		fsidv[2] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_ENCODE_DEV:
+		fsidv[0] = new_encode_dev(dev);
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_UUID4_INUM:
+		/* 4 byte fsid and inode number */
+		up = (u32*)uuid;
+		fsidv[0] = ino_t_to_u32(ino);
+		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
+		break;
+
+	case FSID_UUID8:
+		/* 8 byte fsid  */
+		up = (u32*)uuid;
+		fsidv[0] = up[0] ^ up[2];
+		fsidv[1] = up[1] ^ up[3];
+		break;
+
+	case FSID_UUID16:
+		/* 16 byte fsid - NFSv3+ only */
+		memcpy(fsidv, uuid, 16);
+		break;
+
+	case FSID_UUID16_INUM:
+		/* 8 byte inode and 16 byte fsid */
+		*(u64*)fsidv = (u64)ino;
+		memcpy(fsidv+2, uuid, 16);
+		break;
+	default: BUG();
+	}
+}
+
+static inline int key_len(int type)
+{
+	switch(type) {
+	case FSID_DEV:		return 8;
+	case FSID_NUM: 		return 4;
+	case FSID_MAJOR_MINOR:	return 12;
+	case FSID_ENCODE_DEV:	return 8;
+	case FSID_UUID4_INUM:	return 8;
+	case FSID_UUID8:	return 8;
+	case FSID_UUID16:	return 16;
+	case FSID_UUID16_INUM:	return 24;
+	default: return 0;
+	}
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+extern char * SVCFH_fmt(struct svc_fh *fhp);
+
+/*
+ * Function prototypes
+ */
+__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
+__be32	fh_update(struct svc_fh *);
+void	fh_put(struct svc_fh *);
+
+static __inline__ struct svc_fh *
+fh_copy(struct svc_fh *dst, struct svc_fh *src)
+{
+	WARN_ON(src->fh_dentry || src->fh_locked);
+			
+	*dst = *src;
+	return dst;
+}
+
+static inline void
+fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+{
+	dst->fh_size = src->fh_size;
+	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
+}
+
+static __inline__ struct svc_fh *
+fh_init(struct svc_fh *fhp, int maxsize)
+{
+	memset(fhp, 0, sizeof(*fhp));
+	fhp->fh_maxsize = maxsize;
+	return fhp;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Fill in the pre_op attr for the wcc data
+ */
+static inline void
+fill_pre_wcc(struct svc_fh *fhp)
+{
+	struct inode    *inode;
+
+	inode = fhp->fh_dentry->d_inode;
+	if (!fhp->fh_pre_saved) {
+		fhp->fh_pre_mtime = inode->i_mtime;
+		fhp->fh_pre_ctime = inode->i_ctime;
+		fhp->fh_pre_size  = inode->i_size;
+		fhp->fh_pre_change = inode->i_version;
+		fhp->fh_pre_saved = 1;
+	}
+}
+
+extern void fill_post_wcc(struct svc_fh *);
+#else
+#define	fill_pre_wcc(ignored)
+#define fill_post_wcc(notused)
+#endif /* CONFIG_NFSD_V3 */
+
+
+/*
+ * Lock a file handle/inode
+ * NOTE: both fh_lock and fh_unlock are done "by hand" in
+ * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+ * so, any changes here should be reflected there.
+ */
+
+static inline void
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct inode	*inode;
+
+	BUG_ON(!dentry);
+
+	if (fhp->fh_locked) {
+		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name);
+		return;
+	}
+
+	inode = dentry->d_inode;
+	mutex_lock_nested(&inode->i_mutex, subclass);
+	fill_pre_wcc(fhp);
+	fhp->fh_locked = 1;
+}
+
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+	fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
+/*
+ * Unlock a file handle/inode
+ */
+static inline void
+fh_unlock(struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_dentry);
+
+	if (fhp->fh_locked) {
+		fill_post_wcc(fhp);
+		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+		fhp->fh_locked = 0;
+	}
+}
+
+#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2af75686e0d3..775b8d281d6a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -38,6 +38,7 @@
 #define _NFSD4_STATE_H
 
 #include <linux/nfsd/nfsfh.h>
+#include "nfsfh.h"
 
 typedef struct {
 	u32             cl_boot;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index f4fa6d351bbd..4b1de0a9ea75 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -5,6 +5,8 @@
 #ifndef LINUX_NFSD_VFS_H
 #define LINUX_NFSD_VFS_H
 
+#include "nfsfh.h"
+
 /*
  * Flags for nfsd_permission
  */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 235ee5c3be54..87fe6f64b8f7 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -9,6 +9,7 @@
 
 #include <linux/vfs.h>
 #include "nfsd.h"
+#include "nfsfh.h"
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 49523edbc510..65e333afaee4 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -162,205 +162,6 @@ typedef struct svc_fh {
 
 } svc_fh;
 
-enum nfsd_fsid {
-	FSID_DEV = 0,
-	FSID_NUM,
-	FSID_MAJOR_MINOR,
-	FSID_ENCODE_DEV,
-	FSID_UUID4_INUM,
-	FSID_UUID8,
-	FSID_UUID16,
-	FSID_UUID16_INUM,
-};
-
-enum fsid_source {
-	FSIDSOURCE_DEV,
-	FSIDSOURCE_FSID,
-	FSIDSOURCE_UUID,
-};
-extern enum fsid_source fsid_source(struct svc_fh *fhp);
-
-
-/* This might look a little large to "inline" but in all calls except
- * one, 'vers' is constant so moste of the function disappears.
- */
-static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
-			   u32 fsid, unsigned char *uuid)
-{
-	u32 *up;
-	switch(vers) {
-	case FSID_DEV:
-		fsidv[0] = htonl((MAJOR(dev)<<16) |
-				 MINOR(dev));
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-	case FSID_NUM:
-		fsidv[0] = fsid;
-		break;
-	case FSID_MAJOR_MINOR:
-		fsidv[0] = htonl(MAJOR(dev));
-		fsidv[1] = htonl(MINOR(dev));
-		fsidv[2] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_ENCODE_DEV:
-		fsidv[0] = new_encode_dev(dev);
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_UUID4_INUM:
-		/* 4 byte fsid and inode number */
-		up = (u32*)uuid;
-		fsidv[0] = ino_t_to_u32(ino);
-		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
-		break;
-
-	case FSID_UUID8:
-		/* 8 byte fsid  */
-		up = (u32*)uuid;
-		fsidv[0] = up[0] ^ up[2];
-		fsidv[1] = up[1] ^ up[3];
-		break;
-
-	case FSID_UUID16:
-		/* 16 byte fsid - NFSv3+ only */
-		memcpy(fsidv, uuid, 16);
-		break;
-
-	case FSID_UUID16_INUM:
-		/* 8 byte inode and 16 byte fsid */
-		*(u64*)fsidv = (u64)ino;
-		memcpy(fsidv+2, uuid, 16);
-		break;
-	default: BUG();
-	}
-}
-
-static inline int key_len(int type)
-{
-	switch(type) {
-	case FSID_DEV:		return 8;
-	case FSID_NUM: 		return 4;
-	case FSID_MAJOR_MINOR:	return 12;
-	case FSID_ENCODE_DEV:	return 8;
-	case FSID_UUID4_INUM:	return 8;
-	case FSID_UUID8:	return 8;
-	case FSID_UUID16:	return 16;
-	case FSID_UUID16_INUM:	return 24;
-	default: return 0;
-	}
-}
-
-/*
- * Shorthand for dprintk()'s
- */
-extern char * SVCFH_fmt(struct svc_fh *fhp);
-
-/*
- * Function prototypes
- */
-__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
-__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
-__be32	fh_update(struct svc_fh *);
-void	fh_put(struct svc_fh *);
-
-static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, struct svc_fh *src)
-{
-	WARN_ON(src->fh_dentry || src->fh_locked);
-			
-	*dst = *src;
-	return dst;
-}
-
-static inline void
-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
-{
-	dst->fh_size = src->fh_size;
-	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
-}
-
-static __inline__ struct svc_fh *
-fh_init(struct svc_fh *fhp, int maxsize)
-{
-	memset(fhp, 0, sizeof(*fhp));
-	fhp->fh_maxsize = maxsize;
-	return fhp;
-}
-
-#ifdef CONFIG_NFSD_V3
-/*
- * Fill in the pre_op attr for the wcc data
- */
-static inline void
-fill_pre_wcc(struct svc_fh *fhp)
-{
-	struct inode    *inode;
-
-	inode = fhp->fh_dentry->d_inode;
-	if (!fhp->fh_pre_saved) {
-		fhp->fh_pre_mtime = inode->i_mtime;
-		fhp->fh_pre_ctime = inode->i_ctime;
-		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = inode->i_version;
-		fhp->fh_pre_saved = 1;
-	}
-}
-
-extern void fill_post_wcc(struct svc_fh *);
-#else
-#define	fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
-
-
-/*
- * Lock a file handle/inode
- * NOTE: both fh_lock and fh_unlock are done "by hand" in
- * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
- * so, any changes here should be reflected there.
- */
-
-static inline void
-fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
-{
-	struct dentry	*dentry = fhp->fh_dentry;
-	struct inode	*inode;
-
-	BUG_ON(!dentry);
-
-	if (fhp->fh_locked) {
-		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
-		return;
-	}
-
-	inode = dentry->d_inode;
-	mutex_lock_nested(&inode->i_mutex, subclass);
-	fill_pre_wcc(fhp);
-	fhp->fh_locked = 1;
-}
-
-static inline void
-fh_lock(struct svc_fh *fhp)
-{
-	fh_lock_nested(fhp, I_MUTEX_NORMAL);
-}
-
-/*
- * Unlock a file handle/inode
- */
-static inline void
-fh_unlock(struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_dentry);
-
-	if (fhp->fh_locked) {
-		fill_post_wcc(fhp);
-		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
-		fhp->fh_locked = 0;
-	}
-}
 #endif /* __KERNEL__ */
 
 
-- 
cgit v1.2.3


From c1e7c3ae59b065bf7ff24a05cb609b2f9e314db6 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Mon, 14 Dec 2009 21:45:19 +0000
Subject: bzip2/lzma/gzip: pre-boot malloc doesn't return NULL on failure

The trivial malloc implementation used in the pre-boot environment by the
decompressors returns a bad pointer on failure (falling through after
calling error).  This is doubly wrong - the callers expect malloc to
return NULL on failure, second the error function is intended to be
used by the decompressors to propagate errors to *their* callers.  The
decompressors have no access to any state set by the error function.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
LKML-Reference: <4b26b1ef.hIInb2AYPMtImAJO%phillip@lougher.demon.co.uk>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/decompress/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 12ff8c3f1d05..5032b9a31ae7 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -25,7 +25,7 @@ static void *malloc(int size)
 	void *p;
 
 	if (size < 0)
-		error("Malloc error");
+		return NULL;
 	if (!malloc_ptr)
 		malloc_ptr = free_mem_ptr;
 
@@ -35,7 +35,7 @@ static void *malloc(int size)
 	malloc_ptr += size;
 
 	if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
-		error("Out of memory");
+		return NULL;
 
 	malloc_count++;
 	return p;
-- 
cgit v1.2.3


From 9065ce4500085b9ca66b19d3c4d21a73cb410173 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 17 Nov 2009 17:05:19 -0700
Subject: PNP: add interface to retrieve ACPI device from a PNPACPI device

Add pnp_acpi_device(pnp_dev), which takes a PNP device and returns the
associated ACPI device (or NULL, if the device is not a PNPACPI device).

This allows us to write a PNP driver that can manage both traditional
PNPBIOS and ACPI devices, treating ACPI-only functionality as an optional
extension.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/pnp/pnpacpi/core.c |  3 ++-
 include/linux/pnp.h        | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index b2348fc2378e..5314bf630bc4 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -144,7 +144,7 @@ static int pnpacpi_resume(struct pnp_dev *dev)
 }
 #endif
 
-static struct pnp_protocol pnpacpi_protocol = {
+struct pnp_protocol pnpacpi_protocol = {
 	.name	 = "Plug and Play ACPI",
 	.get	 = pnpacpi_get_resources,
 	.set	 = pnpacpi_set_resources,
@@ -154,6 +154,7 @@ static struct pnp_protocol pnpacpi_protocol = {
 	.resume = pnpacpi_resume,
 #endif
 };
+EXPORT_SYMBOL(pnpacpi_protocol);
 
 static int __init pnpacpi_add_device(struct acpi_device *device)
 {
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index fddfafaed024..7c4193eb0072 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -334,6 +334,19 @@ extern struct pnp_protocol pnpbios_protocol;
 #define pnp_device_is_pnpbios(dev) 0
 #endif
 
+#ifdef CONFIG_PNPACPI
+extern struct pnp_protocol pnpacpi_protocol;
+
+static inline struct acpi_device *pnp_acpi_device(struct pnp_dev *dev)
+{
+	if (dev->protocol == &pnpacpi_protocol)
+		return dev->data;
+	return NULL;
+}
+#else
+#define pnp_acpi_device(dev) 0
+#endif
+
 /* status */
 #define PNP_READY		0x0000
 #define PNP_ATTACHED		0x0001
-- 
cgit v1.2.3


From b6456c0cfe9d94e6d2bf684e6e6c031fc0b10031 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Thu, 19 Nov 2009 12:00:31 -0300
Subject: V4L/DVB (13571): v4l: Adding Digital Video Timings APIs

This adds the above APIs to the v4l2 core. This is based on version v1.2
of the RFC titled "V4L - Support for video timings at the input/output interface"
Following new ioctls are added:-

        - VIDIOC_ENUM_DV_PRESETS
        - VIDIOC_S_DV_PRESET
        - VIDIOC_G_DV_PRESET
        - VIDIOC_QUERY_DV_PRESET
        - VIDIOC_S_DV_TIMINGS
        - VIDIOC_G_DV_TIMINGS

Please refer to the RFC for the details. This code was tested using vpfe
capture driver on TI's DM365. Following is the test configuration used :-

Blu-Ray HD DVD source -> TVP7002 -> DM365 (VPFE) ->DDR

A draft version of the TVP7002 driver (currently being reviewed in the mailing
list) was used that supports V4L2_DV_1080I60 & V4L2_DV_720P60 presets.

A loopback video capture application was used for testing these APIs. This calls
following IOCTLS :-

 -  verify the new v4l2_input capabilities flag added
 -  Enumerate available presets using VIDIOC_ENUM_DV_PRESETS
 -  Set one of the supported preset using VIDIOC_S_DV_PRESET
 -  Get current preset using VIDIOC_G_DV_PRESET
 -  Detect current preset using VIDIOC_QUERY_DV_PRESET
 -  Using stub functions in tvp7002, verify VIDIOC_S_DV_TIMINGS
    and VIDIOC_G_DV_TIMINGS ioctls are received at the sub device.
 -  Tested on 64bit platform by Hans Verkuil

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c |   6 ++
 drivers/media/video/v4l2-ioctl.c          | 147 ++++++++++++++++++++++++++++++
 include/linux/videodev2.h                 | 116 ++++++++++++++++++++++-
 include/media/v4l2-ioctl.h                |  15 +++
 include/media/v4l2-subdev.h               |  21 +++++
 5 files changed, 303 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 997975d5e024..c4150bd26337 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1077,6 +1077,12 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_DBG_G_REGISTER:
 	case VIDIOC_DBG_G_CHIP_IDENT:
 	case VIDIOC_S_HW_FREQ_SEEK:
+	case VIDIOC_ENUM_DV_PRESETS:
+	case VIDIOC_S_DV_PRESET:
+	case VIDIOC_G_DV_PRESET:
+	case VIDIOC_QUERY_DV_PRESET:
+	case VIDIOC_S_DV_TIMINGS:
+	case VIDIOC_G_DV_TIMINGS:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
 
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 30cc3347ae52..4b11257c3184 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -284,6 +284,12 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_DBG_G_CHIP_IDENT)] = "VIDIOC_DBG_G_CHIP_IDENT",
 	[_IOC_NR(VIDIOC_S_HW_FREQ_SEEK)]   = "VIDIOC_S_HW_FREQ_SEEK",
 #endif
+	[_IOC_NR(VIDIOC_ENUM_DV_PRESETS)]  = "VIDIOC_ENUM_DV_PRESETS",
+	[_IOC_NR(VIDIOC_S_DV_PRESET)]	   = "VIDIOC_S_DV_PRESET",
+	[_IOC_NR(VIDIOC_G_DV_PRESET)]	   = "VIDIOC_G_DV_PRESET",
+	[_IOC_NR(VIDIOC_QUERY_DV_PRESET)]  = "VIDIOC_QUERY_DV_PRESET",
+	[_IOC_NR(VIDIOC_S_DV_TIMINGS)]     = "VIDIOC_S_DV_TIMINGS",
+	[_IOC_NR(VIDIOC_G_DV_TIMINGS)]     = "VIDIOC_G_DV_TIMINGS",
 };
 #define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls)
 
@@ -1135,6 +1141,19 @@ static long __video_do_ioctl(struct file *file,
 	{
 		struct v4l2_input *p = arg;
 
+		/*
+		 * We set the flags for CAP_PRESETS, CAP_CUSTOM_TIMINGS &
+		 * CAP_STD here based on ioctl handler provided by the
+		 * driver. If the driver doesn't support these
+		 * for a specific input, it must override these flags.
+		 */
+		if (ops->vidioc_s_std)
+			p->capabilities |= V4L2_IN_CAP_STD;
+		if (ops->vidioc_s_dv_preset)
+			p->capabilities |= V4L2_IN_CAP_PRESETS;
+		if (ops->vidioc_s_dv_timings)
+			p->capabilities |= V4L2_IN_CAP_CUSTOM_TIMINGS;
+
 		if (!ops->vidioc_enum_input)
 			break;
 
@@ -1179,6 +1198,19 @@ static long __video_do_ioctl(struct file *file,
 		if (!ops->vidioc_enum_output)
 			break;
 
+		/*
+		 * We set the flags for CAP_PRESETS, CAP_CUSTOM_TIMINGS &
+		 * CAP_STD here based on ioctl handler provided by the
+		 * driver. If the driver doesn't support these
+		 * for a specific output, it must override these flags.
+		 */
+		if (ops->vidioc_s_std)
+			p->capabilities |= V4L2_OUT_CAP_STD;
+		if (ops->vidioc_s_dv_preset)
+			p->capabilities |= V4L2_OUT_CAP_PRESETS;
+		if (ops->vidioc_s_dv_timings)
+			p->capabilities |= V4L2_OUT_CAP_CUSTOM_TIMINGS;
+
 		ret = ops->vidioc_enum_output(file, fh, p);
 		if (!ret)
 			dbgarg(cmd, "index=%d, name=%s, type=%d, "
@@ -1794,6 +1826,121 @@ static long __video_do_ioctl(struct file *file,
 		}
 		break;
 	}
+	case VIDIOC_ENUM_DV_PRESETS:
+	{
+		struct v4l2_dv_enum_preset *p = arg;
+
+		if (!ops->vidioc_enum_dv_presets)
+			break;
+
+		ret = ops->vidioc_enum_dv_presets(file, fh, p);
+		if (!ret)
+			dbgarg(cmd,
+				"index=%d, preset=%d, name=%s, width=%d,"
+				" height=%d ",
+				p->index, p->preset, p->name, p->width,
+				p->height);
+		break;
+	}
+	case VIDIOC_S_DV_PRESET:
+	{
+		struct v4l2_dv_preset *p = arg;
+
+		if (!ops->vidioc_s_dv_preset)
+			break;
+
+		dbgarg(cmd, "preset=%d\n", p->preset);
+		ret = ops->vidioc_s_dv_preset(file, fh, p);
+		break;
+	}
+	case VIDIOC_G_DV_PRESET:
+	{
+		struct v4l2_dv_preset *p = arg;
+
+		if (!ops->vidioc_g_dv_preset)
+			break;
+
+		ret = ops->vidioc_g_dv_preset(file, fh, p);
+		if (!ret)
+			dbgarg(cmd, "preset=%d\n", p->preset);
+		break;
+	}
+	case VIDIOC_QUERY_DV_PRESET:
+	{
+		struct v4l2_dv_preset *p = arg;
+
+		if (!ops->vidioc_query_dv_preset)
+			break;
+
+		ret = ops->vidioc_query_dv_preset(file, fh, p);
+		if (!ret)
+			dbgarg(cmd, "preset=%d\n", p->preset);
+		break;
+	}
+	case VIDIOC_S_DV_TIMINGS:
+	{
+		struct v4l2_dv_timings *p = arg;
+
+		if (!ops->vidioc_s_dv_timings)
+			break;
+
+		switch (p->type) {
+		case V4L2_DV_BT_656_1120:
+			dbgarg2("bt-656/1120:interlaced=%d, pixelclock=%lld,"
+				" width=%d, height=%d, polarities=%x,"
+				" hfrontporch=%d, hsync=%d, hbackporch=%d,"
+				" vfrontporch=%d, vsync=%d, vbackporch=%d,"
+				" il_vfrontporch=%d, il_vsync=%d,"
+				" il_vbackporch=%d\n",
+				p->bt.interlaced, p->bt.pixelclock,
+				p->bt.width, p->bt.height, p->bt.polarities,
+				p->bt.hfrontporch, p->bt.hsync,
+				p->bt.hbackporch, p->bt.vfrontporch,
+				p->bt.vsync, p->bt.vbackporch,
+				p->bt.il_vfrontporch, p->bt.il_vsync,
+				p->bt.il_vbackporch);
+			ret = ops->vidioc_s_dv_timings(file, fh, p);
+			break;
+		default:
+			dbgarg2("Unknown type %d!\n", p->type);
+			break;
+		}
+		break;
+	}
+	case VIDIOC_G_DV_TIMINGS:
+	{
+		struct v4l2_dv_timings *p = arg;
+
+		if (!ops->vidioc_g_dv_timings)
+			break;
+
+		ret = ops->vidioc_g_dv_timings(file, fh, p);
+		if (!ret) {
+			switch (p->type) {
+			case V4L2_DV_BT_656_1120:
+				dbgarg2("bt-656/1120:interlaced=%d,"
+					" pixelclock=%lld,"
+					" width=%d, height=%d, polarities=%x,"
+					" hfrontporch=%d, hsync=%d,"
+					" hbackporch=%d, vfrontporch=%d,"
+					" vsync=%d, vbackporch=%d,"
+					" il_vfrontporch=%d, il_vsync=%d,"
+					" il_vbackporch=%d\n",
+					p->bt.interlaced, p->bt.pixelclock,
+					p->bt.width, p->bt.height,
+					p->bt.polarities, p->bt.hfrontporch,
+					p->bt.hsync, p->bt.hbackporch,
+					p->bt.vfrontporch, p->bt.vsync,
+					p->bt.vbackporch, p->bt.il_vfrontporch,
+					p->bt.il_vsync, p->bt.il_vbackporch);
+				break;
+			default:
+				dbgarg2("Unknown type %d!\n", p->type);
+				break;
+			}
+		}
+		break;
+	}
 
 	default:
 	{
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 32b92298fd79..59047ebf452e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -731,6 +731,99 @@ struct v4l2_standard {
 	__u32		     reserved[4];
 };
 
+/*
+ *	V I D E O	T I M I N G S	D V	P R E S E T
+ */
+struct v4l2_dv_preset {
+	__u32	preset;
+	__u32	reserved[4];
+};
+
+/*
+ *	D V	P R E S E T S	E N U M E R A T I O N
+ */
+struct v4l2_dv_enum_preset {
+	__u32	index;
+	__u32	preset;
+	__u8	name[32]; /* Name of the preset timing */
+	__u32	width;
+	__u32	height;
+	__u32	reserved[4];
+};
+
+/*
+ * 	D V	P R E S E T	V A L U E S
+ */
+#define		V4L2_DV_INVALID		0
+#define		V4L2_DV_480P59_94	1 /* BT.1362 */
+#define		V4L2_DV_576P50		2 /* BT.1362 */
+#define		V4L2_DV_720P24		3 /* SMPTE 296M */
+#define		V4L2_DV_720P25		4 /* SMPTE 296M */
+#define		V4L2_DV_720P30		5 /* SMPTE 296M */
+#define		V4L2_DV_720P50		6 /* SMPTE 296M */
+#define		V4L2_DV_720P59_94	7 /* SMPTE 274M */
+#define		V4L2_DV_720P60		8 /* SMPTE 274M/296M */
+#define		V4L2_DV_1080I29_97	9 /* BT.1120/ SMPTE 274M */
+#define		V4L2_DV_1080I30		10 /* BT.1120/ SMPTE 274M */
+#define		V4L2_DV_1080I25		11 /* BT.1120 */
+#define		V4L2_DV_1080I50		12 /* SMPTE 296M */
+#define		V4L2_DV_1080I60		13 /* SMPTE 296M */
+#define		V4L2_DV_1080P24		14 /* SMPTE 296M */
+#define		V4L2_DV_1080P25		15 /* SMPTE 296M */
+#define		V4L2_DV_1080P30		16 /* SMPTE 296M */
+#define		V4L2_DV_1080P50		17 /* BT.1120 */
+#define		V4L2_DV_1080P60		18 /* BT.1120 */
+
+/*
+ *	D V 	B T	T I M I N G S
+ */
+
+/* BT.656/BT.1120 timing data */
+struct v4l2_bt_timings {
+	__u32	width;		/* width in pixels */
+	__u32	height;		/* height in lines */
+	__u32	interlaced;	/* Interlaced or progressive */
+	__u32	polarities;	/* Positive or negative polarity */
+	__u64	pixelclock;	/* Pixel clock in HZ. Ex. 74.25MHz->74250000 */
+	__u32	hfrontporch;	/* Horizpontal front porch in pixels */
+	__u32	hsync;		/* Horizontal Sync length in pixels */
+	__u32	hbackporch;	/* Horizontal back porch in pixels */
+	__u32	vfrontporch;	/* Vertical front porch in pixels */
+	__u32	vsync;		/* Vertical Sync length in lines */
+	__u32	vbackporch;	/* Vertical back porch in lines */
+	__u32	il_vfrontporch;	/* Vertical front porch for bottom field of
+				 * interlaced field formats
+				 */
+	__u32	il_vsync;	/* Vertical sync length for bottom field of
+				 * interlaced field formats
+				 */
+	__u32	il_vbackporch;	/* Vertical back porch for bottom field of
+				 * interlaced field formats
+				 */
+	__u32	reserved[16];
+} __attribute__ ((packed));
+
+/* Interlaced or progressive format */
+#define	V4L2_DV_PROGRESSIVE	0
+#define	V4L2_DV_INTERLACED	1
+
+/* Polarities. If bit is not set, it is assumed to be negative polarity */
+#define V4L2_DV_VSYNC_POS_POL	0x00000001
+#define V4L2_DV_HSYNC_POS_POL	0x00000002
+
+
+/* DV timings */
+struct v4l2_dv_timings {
+	__u32 type;
+	union {
+		struct v4l2_bt_timings	bt;
+		__u32	reserved[32];
+	};
+} __attribute__ ((packed));
+
+/* Values for the type field */
+#define V4L2_DV_BT_656_1120	0	/* BT.656/1120 timing type */
+
 /*
  *	V I D E O   I N P U T S
  */
@@ -742,7 +835,8 @@ struct v4l2_input {
 	__u32        tuner;             /*  Associated tuner */
 	v4l2_std_id  std;
 	__u32	     status;
-	__u32	     reserved[4];
+	__u32	     capabilities;
+	__u32	     reserved[3];
 };
 
 /*  Values for the 'type' field */
@@ -773,6 +867,11 @@ struct v4l2_input {
 #define V4L2_IN_ST_NO_ACCESS   0x02000000  /* Conditional access denied */
 #define V4L2_IN_ST_VTR         0x04000000  /* VTR time constant */
 
+/* capabilities flags */
+#define V4L2_IN_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
+#define V4L2_IN_CAP_CUSTOM_TIMINGS	0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_IN_CAP_STD			0x00000004 /* Supports S_STD */
+
 /*
  *	V I D E O   O U T P U T S
  */
@@ -783,13 +882,19 @@ struct v4l2_output {
 	__u32	     audioset;		/*  Associated audios (bitfield) */
 	__u32	     modulator;         /*  Associated modulator */
 	v4l2_std_id  std;
-	__u32	     reserved[4];
+	__u32	     capabilities;
+	__u32	     reserved[3];
 };
 /*  Values for the 'type' field */
 #define V4L2_OUTPUT_TYPE_MODULATOR		1
 #define V4L2_OUTPUT_TYPE_ANALOG			2
 #define V4L2_OUTPUT_TYPE_ANALOGVGAOVERLAY	3
 
+/* capabilities flags */
+#define V4L2_OUT_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
+#define V4L2_OUT_CAP_CUSTOM_TIMINGS	0x00000002 /* Supports S_DV_TIMINGS */
+#define V4L2_OUT_CAP_STD		0x00000004 /* Supports S_STD */
+
 /*
  *	C O N T R O L S
  */
@@ -1624,6 +1729,13 @@ struct v4l2_dbg_chip_ident {
 #endif
 
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
+#define	VIDIOC_ENUM_DV_PRESETS	_IOWR('V', 83, struct v4l2_dv_enum_preset)
+#define	VIDIOC_S_DV_PRESET	_IOWR('V', 84, struct v4l2_dv_preset)
+#define	VIDIOC_G_DV_PRESET	_IOWR('V', 85, struct v4l2_dv_preset)
+#define	VIDIOC_QUERY_DV_PRESET	_IOR('V',  86, struct v4l2_dv_preset)
+#define	VIDIOC_S_DV_TIMINGS	_IOWR('V', 87, struct v4l2_dv_timings)
+#define	VIDIOC_G_DV_TIMINGS	_IOWR('V', 88, struct v4l2_dv_timings)
+
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 7a4529defa88..e8ba0f2efbae 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -239,6 +239,21 @@ struct v4l2_ioctl_ops {
 	int (*vidioc_enum_frameintervals) (struct file *file, void *fh,
 					   struct v4l2_frmivalenum *fival);
 
+	/* DV Timings IOCTLs */
+	int (*vidioc_enum_dv_presets) (struct file *file, void *fh,
+				       struct v4l2_dv_enum_preset *preset);
+
+	int (*vidioc_s_dv_preset) (struct file *file, void *fh,
+				   struct v4l2_dv_preset *preset);
+	int (*vidioc_g_dv_preset) (struct file *file, void *fh,
+				   struct v4l2_dv_preset *preset);
+	int (*vidioc_query_dv_preset) (struct file *file, void *fh,
+					struct v4l2_dv_preset *qpreset);
+	int (*vidioc_s_dv_timings) (struct file *file, void *fh,
+				    struct v4l2_dv_timings *timings);
+	int (*vidioc_g_dv_timings) (struct file *file, void *fh,
+				    struct v4l2_dv_timings *timings);
+
 	/* For other private ioctls */
 	long (*vidioc_default)	       (struct file *file, void *fh,
 					int cmd, void *arg);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 00bf17608453..7a3408f9d0f2 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -217,6 +217,19 @@ struct v4l2_subdev_audio_ops {
 
    s_routing: see s_routing in audio_ops, except this version is for video
 	devices.
+
+   s_dv_preset: set dv (Digital Video) preset in the sub device. Similar to
+	s_std()
+
+   query_dv_preset: query dv preset in the sub device. This is similar to
+	querystd()
+
+   s_dv_timings(): Set custom dv timings in the sub device. This is used
+	when sub device is capable of setting detailed timing information
+	in the hardware to generate/detect the video signal.
+
+   g_dv_timings(): Get custom dv timings in the sub device.
+
  */
 struct v4l2_subdev_video_ops {
 	int (*s_routing)(struct v4l2_subdev *sd, u32 input, u32 output, u32 config);
@@ -240,6 +253,14 @@ struct v4l2_subdev_video_ops {
 	int (*s_parm)(struct v4l2_subdev *sd, struct v4l2_streamparm *param);
 	int (*enum_framesizes)(struct v4l2_subdev *sd, struct v4l2_frmsizeenum *fsize);
 	int (*enum_frameintervals)(struct v4l2_subdev *sd, struct v4l2_frmivalenum *fival);
+	int (*s_dv_preset)(struct v4l2_subdev *sd,
+			struct v4l2_dv_preset *preset);
+	int (*query_dv_preset)(struct v4l2_subdev *sd,
+			struct v4l2_dv_preset *preset);
+	int (*s_dv_timings)(struct v4l2_subdev *sd,
+			struct v4l2_dv_timings *timings);
+	int (*g_dv_timings)(struct v4l2_subdev *sd,
+			struct v4l2_dv_timings *timings);
 };
 
 /*
-- 
cgit v1.2.3


From bb5b7c11263dbbe78253cd05945a6bf8f55add8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Dec 2009 20:56:42 -0800
Subject: tcp: Revert per-route SACK/DSACK/TIMESTAMP changes.

It creates a regression, triggering badness for SYN_RECV
sockets, for example:

[19148.022102] Badness at net/ipv4/inet_connection_sock.c:293
[19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000
[19148.023035] REGS: eeecbd30 TRAP: 0700   Not tainted  (2.6.32)
[19148.023496] MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 24002442  XER: 00000000
[19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000

This is likely caused by the change in the 'estab' parameter
passed to tcp_parse_options() when invoked by the functions
in net/ipv4/tcp_minisocks.c

But even if that is fixed, the ->conn_request() changes made in
this patch series is fundamentally wrong.  They try to use the
listening socket's 'dst' to probe the route settings.  The
listening socket doesn't even have a route, and you can't
get the right route (the child request one) until much later
after we setup all of the state, and it must be done by hand.

This stuff really isn't ready, so the best thing to do is a
full revert.  This reverts the following commits:

f55017a93f1a74d50244b1254b9a2bd7ac9bbf7d
022c3f7d82f0f1c68018696f2f027b87b9bb45c2
1aba721eba1d84a2defce45b950272cee1e6c72a
cda42ebd67ee5fdf09d7057b5a4584d36fe8a335
345cda2fd695534be5a4494f1b59da9daed33663
dc343475ed062e13fc260acccaab91d7d80fd5b2
05eaade2782fb0c90d3034fd7a7d5a16266182bb
6a2a2d6bf8581216e08be15fcb563cfd6c430e1e

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  6 ++----
 include/net/dst.h         |  2 +-
 include/net/tcp.h         |  3 +--
 net/ipv4/syncookies.c     | 27 +++++++++++++--------------
 net/ipv4/tcp_input.c      | 24 ++++++++----------------
 net/ipv4/tcp_ipv4.c       | 21 +++++++++------------
 net/ipv4/tcp_minisocks.c  | 10 +++++-----
 net/ipv4/tcp_output.c     | 18 +++++-------------
 net/ipv6/syncookies.c     | 28 +++++++++++++---------------
 net/ipv6/tcp_ipv6.c       |  3 +--
 10 files changed, 58 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 14fc906ed602..05330fc5b436 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -368,11 +368,9 @@ enum {
 #define RTAX_MAX (__RTAX_MAX - 1)
 
 #define RTAX_FEATURE_ECN	0x00000001
-#define RTAX_FEATURE_NO_SACK	0x00000002
-#define RTAX_FEATURE_NO_TSTAMP	0x00000004
+#define RTAX_FEATURE_SACK	0x00000002
+#define RTAX_FEATURE_TIMESTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
-#define RTAX_FEATURE_NO_WSCALE	0x00000010
-#define RTAX_FEATURE_NO_DSACK	0x00000020
 
 struct rta_session {
 	__u8	proto;
diff --git a/include/net/dst.h b/include/net/dst.h
index 387cb3cfde7e..39c4a5963e12 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric)
 static inline u32
 dst_feature(const struct dst_entry *dst, u32 feature)
 {
-	return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0);
+	return dst_metric(dst, RTAX_FEATURES) & feature;
 }
 
 static inline u32 dst_mtu(const struct dst_entry *dst)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1b6f7d348cee..34f5cc24d903 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -408,8 +408,7 @@ extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
 						  u8 **hvpp,
-						  int estab,
-						  struct dst_entry *dst);
+						  int estab);
 
 extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26399ad2a289..66fd80ef2473 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
@@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
 	/* Try to redo what tcp_v4_send_synack did. */
 	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12cab7d74dba..28e029632493 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3727,7 +3727,7 @@ old_ack:
  * the fast version below fails.
  */
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab,  struct dst_entry *dst)
+		       u8 **hvpp, int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_WINDOW:
 				if (opsize == TCPOLEN_WINDOW && th->syn &&
-				    !estab && sysctl_tcp_window_scaling &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
+				    !estab && sysctl_tcp_window_scaling) {
 					__u8 snd_wscale = *(__u8 *)ptr;
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
@@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 			case TCPOPT_TIMESTAMP:
 				if ((opsize == TCPOLEN_TIMESTAMP) &&
 				    ((estab && opt_rx->tstamp_ok) ||
-				     (!estab && sysctl_tcp_timestamps &&
-				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
+				     (!estab && sysctl_tcp_timestamps))) {
 					opt_rx->saw_tstamp = 1;
 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-				    !estab && sysctl_tcp_sack &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
+				    !estab && sysctl_tcp_sack) {
 					opt_rx->sack_ok = 1;
 					tcp_sack_reset(opt_rx);
 				}
@@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
 	return 1;
 }
 
@@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
 
 	if (th->ack) {
 		/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 15e96030ce47..65b8ebfd078a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
-	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
-	ireq->opt = tcp_v4_save_options(sk, skb);
-
-	dst = inet_csk_route_req(sk, req);
-	if(!dst)
-		goto drop_and_free;
-
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
@@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	ireq = inet_rsk(req);
+	ireq->loc_addr = daddr;
+	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
+	ireq->opt = tcp_v4_save_options(sk, skb);
+
 	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_release;
+		goto drop_and_free;
 
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 87accec8d097..f206ee5dda80 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
@@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 
-	if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 93316a96d820..383ce237640f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
-	struct dst_entry *dst = __sk_dst_get(sk);
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
 	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
 			 tcp_cookie_size_check(cvp->cookie_desired) :
@@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	opts->mss = tcp_advertise_mss(sk);
 	remaining -= TCPOLEN_MSS_ALIGNED;
 
-	if (likely(sysctl_tcp_timestamps &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
-		   *md5 == NULL)) {
+	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
-	if (likely(sysctl_tcp_window_scaling &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
+	if (likely(sysctl_tcp_window_scaling)) {
 		opts->ws = tp->rx_opt.rcv_wscale;
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(sysctl_tcp_sack &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
+	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
 		if (unlikely(!(OPTION_TS & opts->options)))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk)
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
 	 */
 	tp->tcp_header_len = sizeof(struct tcphdr) +
-		(sysctl_tcp_timestamps &&
-		(!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
-		  TCPOLEN_TSTAMP_ALIGNED : 0));
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk)
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
-				  (sysctl_tcp_window_scaling &&
-				   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
+				  sysctl_tcp_window_scaling,
 				  &rcv_wscale);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5b9af508b8f2..7208a06576c6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -185,6 +185,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
@@ -218,6 +225,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->expires = 0UL;
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -253,21 +266,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ee9cf62458d4..febfd595a40d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
@@ -1208,7 +1207,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
-- 
cgit v1.2.3


From 588f9ce6ca61ecb4663ee6ef2f75d2d96c73151e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Be more aggressive at freeing non LRU caches

shake_page handles more types of page caches than lru_drain_all()

- per cpu page allocator pages
- per CPU LRU

Stops early when the page became free.

Used in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  |  1 +
 mm/memory-failure.c | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d65ae4ba0e0..68c84bb2ad3f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1335,6 +1335,7 @@ extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int ref);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
+extern void shake_page(struct page *p);
 extern atomic_long_t mce_bad_pages;
 
 #endif /* __KERNEL__ */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 50d4f8d7024a..38fcbb22eab9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -82,6 +82,28 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
 	return ret;
 }
 
+/*
+ * When a unknown page type is encountered drain as many buffers as possible
+ * in the hope to turn the page into a LRU or free page, which we can handle.
+ */
+void shake_page(struct page *p)
+{
+	if (!PageSlab(p)) {
+		lru_add_drain_all();
+		if (PageLRU(p))
+			return;
+		drain_all_pages();
+		if (PageLRU(p) || is_free_buddy_page(p))
+			return;
+	}
+	/*
+	 * Could call shrink_slab here (which would also
+	 * shrink other caches). Unfortunately that might
+	 * also access the corrupted page, which could be fatal.
+	 */
+}
+EXPORT_SYMBOL_GPL(shake_page);
+
 /*
  * Kill all processes that have a poisoned page mapped and then isolate
  * the page.
-- 
cgit v1.2.3


From 82ba011b9041dd31c15e4f63797b08aa0a288e61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Turn ref argument into flags argument

Now that "ref" is just a boolean turn it into
a flags argument. First step is only a single flag
that makes the code's intention more clear, but more
may follow.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  | 5 ++++-
 mm/madvise.c        | 2 +-
 mm/memory-failure.c | 5 +++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68c84bb2ad3f..135e19198cd3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1331,8 +1331,11 @@ extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 				 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 
+enum mf_flags {
+	MF_COUNT_INCREASED = 1 << 0,
+};
 extern void memory_failure(unsigned long pfn, int trapno);
-extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/mm/madvise.c b/mm/madvise.c
index 18970aec0d2f..6ca34f0cd4aa 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -237,7 +237,7 @@ static int madvise_hwpoison(unsigned long start, unsigned long end)
 		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
 		       page_to_pfn(p), start);
 		/* Ignore return value for now */
-		__memory_failure(page_to_pfn(p), 0, 1);
+		__memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
 	}
 	return ret;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4253e14fa709..3338c443272c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -737,7 +737,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		      ret != SWAP_SUCCESS, pfn);
 }
 
-int __memory_failure(unsigned long pfn, int trapno, int ref)
+int __memory_failure(unsigned long pfn, int trapno, int flags)
 {
 	unsigned long lru_flag;
 	struct page_state *ps;
@@ -773,7 +773,8 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
 	 * In fact it's dangerous to directly bump up page count from 0,
 	 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
 	 */
-	if (!ref && !get_page_unless_zero(compound_head(p))) {
+	if (!(flags & MF_COUNT_INCREASED) &&
+		!get_page_unless_zero(compound_head(p))) {
 		action_result(pfn, "free or high order kernel", IGNORED);
 		return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
 	}
-- 
cgit v1.2.3


From 847ce401df392b0704369fd3f75df614ac1414b4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:58 +0100
Subject: HWPOISON: Add unpoisoning support

The unpoisoning interface is useful for stress testing tools to
reclaim poisoned pages (to prevent OOM)

There is no hardware level unpoisioning, so this
cannot be used for real memory errors, only for software injected errors.

Note that it may leak pages silently - those who have been removed from
LRU cache, but not isolated from page cache/swap cache at hwpoison time.
Especially the stress test of dirty swap cache pages shall reboot system
before exhausting memory.

AK: Fix comments, add documentation, add printks, rename symbol

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/vm/hwpoison.txt | 16 ++++++++--
 include/linux/mm.h            |  1 +
 include/linux/page-flags.h    |  2 +-
 mm/hwpoison-inject.c          | 36 +++++++++++++++++++----
 mm/memory-failure.c           | 68 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 3ffadf8da61f..f047e75acb23 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -98,10 +98,22 @@ madvise(MADV_POISON, ....)
 
 
 hwpoison-inject module through debugfs
-	/sys/debug/hwpoison/corrupt-pfn
 
-Inject hwpoison fault at PFN echoed into this file
+/sys/debug/hwpoison/
 
+corrupt-pfn
+
+Inject hwpoison fault at PFN echoed into this file.
+
+unpoison-pfn
+
+Software-unpoison page at PFN echoed into this file. This
+way a page can be reused again.
+This only works for Linux injected failures, not for real
+memory failures.
+
+Note these injection interfaces are not stable and might change between
+kernel versions
 
 Architecture specific MCE injector
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 135e19198cd3..8cdb941fc7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1336,6 +1336,7 @@ enum mf_flags {
 };
 extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int flags);
+extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 49e907bd067f..f9df6308af95 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -275,7 +275,7 @@ PAGEFLAG_FALSE(Uncached)
 
 #ifdef CONFIG_MEMORY_FAILURE
 PAGEFLAG(HWPoison, hwpoison)
-TESTSETFLAG(HWPoison, hwpoison)
+TESTSCFLAG(HWPoison, hwpoison)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
 #else
 PAGEFLAG_FALSE(HWPoison)
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index e1d85137f086..6e35e563bf50 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 
-static struct dentry *hwpoison_dir, *corrupt_pfn;
+static struct dentry *hwpoison_dir;
 
 static int hwpoison_inject(void *data, u64 val)
 {
@@ -14,7 +14,16 @@ static int hwpoison_inject(void *data, u64 val)
 	return __memory_failure(val, 18, 0);
 }
 
+static int hwpoison_unpoison(void *data, u64 val)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return unpoison_memory(val);
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
 
 static void pfn_inject_exit(void)
 {
@@ -24,16 +33,31 @@ static void pfn_inject_exit(void)
 
 static int pfn_inject_init(void)
 {
+	struct dentry *dentry;
+
 	hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
 	if (hwpoison_dir == NULL)
 		return -ENOMEM;
-	corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+
+	/*
+	 * Note that the below poison/unpoison interfaces do not involve
+	 * hardware status change, hence do not require hardware support.
+	 * They are mainly for testing hwpoison in software level.
+	 */
+	dentry = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
 					  NULL, &hwpoison_fops);
-	if (corrupt_pfn == NULL) {
-		pfn_inject_exit();
-		return -ENOMEM;
-	}
+	if (!dentry)
+		goto fail;
+
+	dentry = debugfs_create_file("unpoison-pfn", 0600, hwpoison_dir,
+				     NULL, &unpoison_fops);
+	if (!dentry)
+		goto fail;
+
 	return 0;
+fail:
+	pfn_inject_exit();
+	return -ENOMEM;
 }
 
 module_init(pfn_inject_init);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5055b940df5f..ed6e91c87a54 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -838,6 +838,16 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * and in many cases impossible, so we just avoid it here.
 	 */
 	lock_page_nosync(p);
+
+	/*
+	 * unpoison always clear PG_hwpoison inside page lock
+	 */
+	if (!PageHWPoison(p)) {
+		action_result(pfn, "unpoisoned", IGNORED);
+		res = 0;
+		goto out;
+	}
+
 	wait_on_page_writeback(p);
 
 	/*
@@ -893,3 +903,61 @@ void memory_failure(unsigned long pfn, int trapno)
 {
 	__memory_failure(pfn, trapno, 0);
 }
+
+/**
+ * unpoison_memory - Unpoison a previously poisoned page
+ * @pfn: Page number of the to be unpoisoned page
+ *
+ * Software-unpoison a page that has been poisoned by
+ * memory_failure() earlier.
+ *
+ * This is only done on the software-level, so it only works
+ * for linux injected failures, not real hardware failures
+ *
+ * Returns 0 for success, otherwise -errno.
+ */
+int unpoison_memory(unsigned long pfn)
+{
+	struct page *page;
+	struct page *p;
+	int freeit = 0;
+
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+
+	p = pfn_to_page(pfn);
+	page = compound_head(p);
+
+	if (!PageHWPoison(p)) {
+		pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
+		return 0;
+	}
+
+	if (!get_page_unless_zero(page)) {
+		if (TestClearPageHWPoison(p))
+			atomic_long_dec(&mce_bad_pages);
+		pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
+		return 0;
+	}
+
+	lock_page_nosync(page);
+	/*
+	 * This test is racy because PG_hwpoison is set outside of page lock.
+	 * That's acceptable because that won't trigger kernel panic. Instead,
+	 * the PG_hwpoison page will be caught and isolated on the entrance to
+	 * the free buddy page pool.
+	 */
+	if (TestClearPageHWPoison(p)) {
+		pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
+		atomic_long_dec(&mce_bad_pages);
+		freeit = 1;
+	}
+	unlock_page(page);
+
+	put_page(page);
+	if (freeit)
+		put_page(page);
+
+	return 0;
+}
+EXPORT_SYMBOL(unpoison_memory);
-- 
cgit v1.2.3


From 1a9b5b7fe0c5dad8a635288882d36785dea742f9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: mm: export stable page flags

Rename get_uflags() to stable_page_flags() and make it a global function
for use in the hwpoison page flags filter, which need to compare user
page flags with the value provided by user space.

Also move KPF_* to kernel-page-flags.h for use by user space tools.

Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
CC: Nick Piggin <npiggin@suse.de>
CC: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/proc/page.c                    | 45 +++-----------------------------------
 include/linux/kernel-page-flags.h | 46 +++++++++++++++++++++++++++++++++++++++
 include/linux/page-flags.h        |  2 ++
 3 files changed, 51 insertions(+), 42 deletions(-)
 create mode 100644 include/linux/kernel-page-flags.h

(limited to 'include/linux')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
+#include <linux/kernel-page-flags.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
  * physical page flags.
  */
 
-/* These macros are used to decouple internal flags from exported ones */
-
-#define KPF_LOCKED		0
-#define KPF_ERROR		1
-#define KPF_REFERENCED		2
-#define KPF_UPTODATE		3
-#define KPF_DIRTY		4
-#define KPF_LRU			5
-#define KPF_ACTIVE		6
-#define KPF_SLAB		7
-#define KPF_WRITEBACK		8
-#define KPF_RECLAIM		9
-#define KPF_BUDDY		10
-
-/* 11-20: new additions in 2.6.31 */
-#define KPF_MMAP		11
-#define KPF_ANON		12
-#define KPF_SWAPCACHE		13
-#define KPF_SWAPBACKED		14
-#define KPF_COMPOUND_HEAD	15
-#define KPF_COMPOUND_TAIL	16
-#define KPF_HUGE		17
-#define KPF_UNEVICTABLE		18
-#define KPF_HWPOISON		19
-#define KPF_NOPAGE		20
-
-#define KPF_KSM			21
-
-/* kernel hacking assistances
- * WARNING: subject to change, never rely on them!
- */
-#define KPF_RESERVED		32
-#define KPF_MLOCKED		33
-#define KPF_MAPPEDTODISK	34
-#define KPF_PRIVATE		35
-#define KPF_PRIVATE_2		36
-#define KPF_OWNER_PRIVATE	37
-#define KPF_ARCH		38
-#define KPF_UNCACHED		39
-
 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
 {
 	return ((kflags >> kbit) & 1) << ubit;
 }
 
-static u64 get_uflags(struct page *page)
+u64 stable_page_flags(struct page *page)
 {
 	u64 k;
 	u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
 		else
 			ppage = NULL;
 
-		if (put_user(get_uflags(ppage), out)) {
+		if (put_user(stable_page_flags(ppage), out)) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
new file mode 100644
index 000000000000..bd92a89f4b0a
--- /dev/null
+++ b/include/linux/kernel-page-flags.h
@@ -0,0 +1,46 @@
+#ifndef LINUX_KERNEL_PAGE_FLAGS_H
+#define LINUX_KERNEL_PAGE_FLAGS_H
+
+/*
+ * Stable page flag bits exported to user space
+ */
+
+#define KPF_LOCKED		0
+#define KPF_ERROR		1
+#define KPF_REFERENCED		2
+#define KPF_UPTODATE		3
+#define KPF_DIRTY		4
+#define KPF_LRU			5
+#define KPF_ACTIVE		6
+#define KPF_SLAB		7
+#define KPF_WRITEBACK		8
+#define KPF_RECLAIM		9
+#define KPF_BUDDY		10
+
+/* 11-20: new additions in 2.6.31 */
+#define KPF_MMAP		11
+#define KPF_ANON		12
+#define KPF_SWAPCACHE		13
+#define KPF_SWAPBACKED		14
+#define KPF_COMPOUND_HEAD	15
+#define KPF_COMPOUND_TAIL	16
+#define KPF_HUGE		17
+#define KPF_UNEVICTABLE		18
+#define KPF_HWPOISON		19
+#define KPF_NOPAGE		20
+
+#define KPF_KSM			21
+
+/* kernel hacking assistances
+ * WARNING: subject to change, never rely on them!
+ */
+#define KPF_RESERVED		32
+#define KPF_MLOCKED		33
+#define KPF_MAPPEDTODISK	34
+#define KPF_PRIVATE		35
+#define KPF_PRIVATE_2		36
+#define KPF_OWNER_PRIVATE	37
+#define KPF_ARCH		38
+#define KPF_UNCACHED		39
+
+#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f9df6308af95..feee2ba8d06a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -282,6 +282,8 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
+u64 stable_page_flags(struct page *page);
+
 static inline int PageUptodate(struct page *page)
 {
 	int ret = test_bit(PG_uptodate, &(page)->flags);
-- 
cgit v1.2.3


From e42d9d5d47961fb5db0be65b56dd52fe7b2421f1 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: rename and export try_get_mem_cgroup_from_page()

So that the hwpoison injector can get mem_cgroup for arbitrary page
and thus know whether it is owned by some mem_cgroup task(s).

[AK: Merged with latest git tree]

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 11 ++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bf9213b2db8f..fc9bae82ac42 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,6 +68,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
+extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 static inline
@@ -189,6 +190,11 @@ mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 {
 }
 
+static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e0c2066495e3..b5ac61ce7346 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1379,25 +1379,22 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 	return container_of(css, struct mem_cgroup, css);
 }
 
-static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
+struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
-	struct mem_cgroup *mem;
+	struct mem_cgroup *mem = NULL;
 	struct page_cgroup *pc;
 	unsigned short id;
 	swp_entry_t ent;
 
 	VM_BUG_ON(!PageLocked(page));
 
-	if (!PageSwapCache(page))
-		return NULL;
-
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		mem = pc->mem_cgroup;
 		if (mem && !css_tryget(&mem->css))
 			mem = NULL;
-	} else {
+	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup(ent);
 		rcu_read_lock();
@@ -1743,7 +1740,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	 */
 	if (!PageSwapCache(page))
 		goto charge_cur_mm;
-	mem = try_get_mem_cgroup_from_swapcache(page);
+	mem = try_get_mem_cgroup_from_page(page);
 	if (!mem)
 		goto charge_cur_mm;
 	*ptr = mem;
-- 
cgit v1.2.3


From d324236b3333e87c8825b35f2104184734020d35 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: add accessor to mem_cgroup.css

So that an outside user can free the reference count grabbed by
try_get_mem_cgroup_from_page().

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h | 7 +++++++
 mm/memcontrol.c            | 5 +++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fc9bae82ac42..2c30a1116d84 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -81,6 +81,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 	return cgroup == mem;
 }
 
+extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
+
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
@@ -206,6 +208,11 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
+static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return NULL;
+}
+
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b5ac61ce7346..9eee80d6d490 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -282,6 +282,11 @@ mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
 }
 
+struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return &mem->css;
+}
+
 static struct mem_cgroup_per_zone *
 page_cgroup_zoneinfo(struct page_cgroup *pc)
 {
-- 
cgit v1.2.3


From facb6011f3993947283fa15d039dacb4ad140230 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:20:00 +0100
Subject: HWPOISON: Add soft page offline support

This is a simpler, gentler variant of memory_failure() for soft page
offlining controlled from user space.  It doesn't kill anything, just
tries to invalidate and if that doesn't work migrate the
page away.

This is useful for predictive failure analysis, where a page has
a high rate of corrected errors, but hasn't gone bad yet. Instead
it can be offlined early and avoided.

The offlining is controlled from sysfs, including a new generic
entry point for hard page offlining for symmetry too.

We use the page isolate facility to prevent re-allocation
race. Normally this is only used by memory hotplug. To avoid
races with memory allocation I am using lock_system_sleep().
This avoids the situation where memory hotplug is about
to isolate a page range and then hwpoison undoes that work.
This is a big hammer currently, but the simplest solution
currently.

When the page is not free or LRU we try to free pages
from slab and other caches. The slab freeing is currently
quite dumb and does not try to focus on the specific slab
cache which might own the page. This could be potentially
improved later.

Thanks to Fengguang Wu and Haicheng Li for some fixes.

[Added fix from Andrew Morton to adapt to new migrate_pages prototype]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 .../ABI/testing/sysfs-memory-page-offline          |  44 +++++
 drivers/base/memory.c                              |  61 +++++++
 include/linux/mm.h                                 |   3 +-
 mm/hwpoison-inject.c                               |   2 +-
 mm/memory-failure.c                                | 194 ++++++++++++++++++++-
 5 files changed, 297 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-memory-page-offline

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline
new file mode 100644
index 000000000000..e14703f12fdf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-memory-page-offline
@@ -0,0 +1,44 @@
+What:		/sys/devices/system/memory/soft_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Soft-offline the memory page containing the physical address
+		written into this file. Input is a hex number specifying the
+		physical address of the page. The kernel will then attempt
+		to soft-offline it, by moving the contents elsewhere or
+		dropping it if possible. The kernel will then be placed
+		on the bad page list and never be reused.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		The page must be still accessible, not poisoned. The
+		kernel will never kill anything for this, but rather
+		fail the offline.  Return value is the size of the
+		number, or a error when the offlining failed.  Reading
+		the file is not allowed.
+
+What:		/sys/devices/system/memory/hard_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Hard-offline the memory page containing the physical
+		address written into this file. Input is a hex number
+		specifying the physical address of the page. The
+		kernel will then attempt to hard-offline the page, by
+		trying to drop the page or killing any owner or
+		triggering IO errors if needed.  Note this may kill
+		any processes owning the page. The kernel will avoid
+		to access this page assuming it's poisoned by the
+		hardware.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		Return value is the size of the number, or a error when
+		the offlining failed.
+		Reading the file is not allowed.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 989429cfed88..c4c8f2e1dd15 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -341,6 +341,64 @@ static inline int memory_probe_init(void)
 }
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for offlining pages of memory
+ */
+
+/* Soft offline a page */
+static ssize_t
+store_soft_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+	ret = soft_offline_page(pfn_to_page(pfn), 0);
+	return ret == 0 ? count : ret;
+}
+
+/* Forcibly offline a page, including killing processes. */
+static ssize_t
+store_hard_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	ret = __memory_failure(pfn, 0, 0);
+	return ret ? ret : count;
+}
+
+static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+
+static __init int memory_fail_init(void)
+{
+	int err;
+
+	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_soft_offline_page.attr);
+	if (!err)
+		err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_hard_offline_page.attr);
+	return err;
+}
+#else
+static inline int memory_fail_init(void)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note that phys_device is optional.  It is here to allow for
  * differentiation between which *physical* devices each
@@ -471,6 +529,9 @@ int __init memory_dev_init(void)
 	}
 
 	err = memory_probe_init();
+	if (!ret)
+		ret = err;
+	err = memory_fail_init();
 	if (!ret)
 		ret = err;
 	err = block_size_init();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8cdb941fc7b5..849b4a61bd8f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1339,8 +1339,9 @@ extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
-extern void shake_page(struct page *p);
+extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
+extern int soft_offline_page(struct page *page, int flags);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c597f46ac18a..a77fe3f9e211 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -29,7 +29,7 @@ static int hwpoison_inject(void *data, u64 val)
 		return 0;
 
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	/*
 	 * This implies unable to support non-LRU pages.
 	 */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b5c3b6bd511f..bcce28755832 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,9 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/backing-dev.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
+#include <linux/suspend.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -201,7 +204,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
  * When a unknown page type is encountered drain as many buffers as possible
  * in the hope to turn the page into a LRU or free page, which we can handle.
  */
-void shake_page(struct page *p)
+void shake_page(struct page *p, int access)
 {
 	if (!PageSlab(p)) {
 		lru_add_drain_all();
@@ -211,11 +214,19 @@ void shake_page(struct page *p)
 		if (PageLRU(p) || is_free_buddy_page(p))
 			return;
 	}
+
 	/*
-	 * Could call shrink_slab here (which would also
-	 * shrink other caches). Unfortunately that might
-	 * also access the corrupted page, which could be fatal.
+	 * Only all shrink_slab here (which would also
+	 * shrink other caches) if access is not potentially fatal.
 	 */
+	if (access) {
+		int nr;
+		do {
+			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			if (page_count(p) == 0)
+				break;
+		} while (nr > 10);
+	}
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
@@ -949,7 +960,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	if (!PageLRU(p)) {
 		/*
 		 * shake_page could have turned it free.
@@ -1099,3 +1110,176 @@ int unpoison_memory(unsigned long pfn)
 	return 0;
 }
 EXPORT_SYMBOL(unpoison_memory);
+
+static struct page *new_page(struct page *p, unsigned long private, int **x)
+{
+	return alloc_pages(GFP_HIGHUSER_MOVABLE, 0);
+}
+
+/*
+ * Safely get reference count of an arbitrary page.
+ * Returns 0 for a free page, -EIO for a zero refcount page
+ * that is not free, and 1 for any other page type.
+ * For 1 the page is returned with increased page count, otherwise not.
+ */
+static int get_any_page(struct page *p, unsigned long pfn, int flags)
+{
+	int ret;
+
+	if (flags & MF_COUNT_INCREASED)
+		return 1;
+
+	/*
+	 * The lock_system_sleep prevents a race with memory hotplug,
+	 * because the isolation assumes there's only a single user.
+	 * This is a big hammer, a better would be nicer.
+	 */
+	lock_system_sleep();
+
+	/*
+	 * Isolate the page, so that it doesn't get reallocated if it
+	 * was free.
+	 */
+	set_migratetype_isolate(p);
+	if (!get_page_unless_zero(compound_head(p))) {
+		if (is_free_buddy_page(p)) {
+			pr_debug("get_any_page: %#lx free buddy page\n", pfn);
+			/* Set hwpoison bit while page is still isolated */
+			SetPageHWPoison(p);
+			ret = 0;
+		} else {
+			pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
+				pfn, p->flags);
+			ret = -EIO;
+		}
+	} else {
+		/* Not a free page */
+		ret = 1;
+	}
+	unset_migratetype_isolate(p);
+	unlock_system_sleep();
+	return ret;
+}
+
+/**
+ * soft_offline_page - Soft offline a page.
+ * @page: page to offline
+ * @flags: flags. Same as memory_failure().
+ *
+ * Returns 0 on success, otherwise negated errno.
+ *
+ * Soft offline a page, by migration or invalidation,
+ * without killing anything. This is for the case when
+ * a page is not corrupted yet (so it's still valid to access),
+ * but has had a number of corrected errors and is better taken
+ * out.
+ *
+ * The actual policy on when to do that is maintained by
+ * user space.
+ *
+ * This should never impact any application or cause data loss,
+ * however it might take some time.
+ *
+ * This is not a 100% solution for all memory, but tries to be
+ * ``good enough'' for the majority of memory.
+ */
+int soft_offline_page(struct page *page, int flags)
+{
+	int ret;
+	unsigned long pfn = page_to_pfn(page);
+
+	ret = get_any_page(page, pfn, flags);
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		goto done;
+
+	/*
+	 * Page cache page we can handle?
+	 */
+	if (!PageLRU(page)) {
+		/*
+		 * Try to free it.
+		 */
+		put_page(page);
+		shake_page(page, 1);
+
+		/*
+		 * Did it turn free?
+		 */
+		ret = get_any_page(page, pfn, 0);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			goto done;
+	}
+	if (!PageLRU(page)) {
+		pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
+				pfn, page->flags);
+		return -EIO;
+	}
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+
+	/*
+	 * Synchronized using the page lock with memory_failure()
+	 */
+	if (PageHWPoison(page)) {
+		unlock_page(page);
+		put_page(page);
+		pr_debug("soft offline: %#lx page already poisoned\n", pfn);
+		return -EBUSY;
+	}
+
+	/*
+	 * Try to invalidate first. This should work for
+	 * non dirty unmapped page cache pages.
+	 */
+	ret = invalidate_inode_page(page);
+	unlock_page(page);
+
+	/*
+	 * Drop count because page migration doesn't like raised
+	 * counts. The page could get re-allocated, but if it becomes
+	 * LRU the isolation will just fail.
+	 * RED-PEN would be better to keep it isolated here, but we
+	 * would need to fix isolation locking first.
+	 */
+	put_page(page);
+	if (ret == 1) {
+		ret = 0;
+		pr_debug("soft_offline: %#lx: invalidated\n", pfn);
+		goto done;
+	}
+
+	/*
+	 * Simple invalidation didn't work.
+	 * Try to migrate to a new page instead. migrate.c
+	 * handles a large number of cases for us.
+	 */
+	ret = isolate_lru_page(page);
+	if (!ret) {
+		LIST_HEAD(pagelist);
+
+		list_add(&page->lru, &pagelist);
+		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
+		if (ret) {
+			pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
+				pfn, ret, page->flags);
+			if (ret > 0)
+				ret = -EIO;
+		}
+	} else {
+		pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
+				pfn, ret, page_count(page), page->flags);
+	}
+	if (ret)
+		return ret;
+
+done:
+	atomic_long_add(1, &mce_bad_pages);
+	SetPageHWPoison(page);
+	/* keep elevated page count for bad page */
+	return ret;
+}
-- 
cgit v1.2.3


From ee81152ff007a94f90327809566a4b7ff771ffd3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 11 Dec 2009 11:15:06 -0300
Subject: V4L/DVB (13647): v4l: Add a 10-bit monochrome and missing 8- and
 10-bit Bayer fourcc codes

The 16-bit monochrome fourcc code has been previously abused for a 10-bit
format, add a new 10-bit code instead. Also add missing 8- and 10-bit Bayer
fourcc codes for completeness.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 59047ebf452e..d4962a782b8a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -294,6 +294,7 @@ struct v4l2_pix_format {
 
 /* Grey formats */
 #define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
+#define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
 /* Palette formats */
@@ -329,7 +330,11 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG8  v4l2_fourcc('G', 'R', 'B', 'G') /*  8  GRGR.. BGBG.. */
-#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10bit raw bayer */
+#define V4L2_PIX_FMT_SRGGB8  v4l2_fourcc('R', 'G', 'G', 'B') /*  8  RGRG.. GBGB.. */
+#define V4L2_PIX_FMT_SBGGR10 v4l2_fourcc('B', 'G', '1', '0') /* 10  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SGBRG10 v4l2_fourcc('G', 'B', '1', '0') /* 10  GBGB.. RGRG.. */
+#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10  GRGR.. BGBG.. */
+#define V4L2_PIX_FMT_SRGGB10 v4l2_fourcc('R', 'G', '1', '0') /* 10  RGRG.. GBGB.. */
 	/* 10bit raw bayer DPCM compressed to 8 bits */
 #define V4L2_PIX_FMT_SGRBG10DPCM8 v4l2_fourcc('B', 'D', '1', '0')
 	/*
-- 
cgit v1.2.3


From 9905a43b2d563e6f89e4c63c4278ada03f2ebb14 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Mon, 14 Dec 2009 00:58:57 +0100
Subject: backlight: Constify struct backlight_ops

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/adp5520_bl.c    |  2 +-
 drivers/video/backlight/adx_bl.c        |  2 +-
 drivers/video/backlight/atmel-pwm-bl.c  |  2 +-
 drivers/video/backlight/backlight.c     |  2 +-
 drivers/video/backlight/corgi_lcd.c     |  2 +-
 drivers/video/backlight/cr_bllcd.c      |  2 +-
 drivers/video/backlight/da903x_bl.c     |  2 +-
 drivers/video/backlight/generic_bl.c    |  2 +-
 drivers/video/backlight/hp680_bl.c      |  2 +-
 drivers/video/backlight/jornada720_bl.c |  2 +-
 drivers/video/backlight/kb3886_bl.c     |  2 +-
 drivers/video/backlight/locomolcd.c     |  2 +-
 drivers/video/backlight/mbp_nvidia_bl.c |  2 +-
 drivers/video/backlight/omap1_bl.c      |  2 +-
 drivers/video/backlight/progear_bl.c    |  2 +-
 drivers/video/backlight/pwm_bl.c        |  2 +-
 drivers/video/backlight/tosa_bl.c       |  2 +-
 drivers/video/backlight/wm831x_bl.c     |  2 +-
 include/linux/backlight.h               | 12 ++++++------
 19 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 4c10edecfb66..86d95c228adb 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -85,7 +85,7 @@ static int adp5520_bl_get_brightness(struct backlight_device *bl)
 	return error ? data->current_brightness : reg_val;
 }
 
-static struct backlight_ops adp5520_bl_ops = {
+static const struct backlight_ops adp5520_bl_ops = {
 	.update_status	= adp5520_bl_update_status,
 	.get_brightness	= adp5520_bl_get_brightness,
 };
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index 2c3bdfc620b7..d769b0bab21a 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -61,7 +61,7 @@ static int adx_backlight_check_fb(struct fb_info *fb)
 	return 1;
 }
 
-static struct backlight_ops adx_backlight_ops = {
+static const struct backlight_ops adx_backlight_ops = {
 	.options = 0,
 	.update_status = adx_backlight_update_status,
 	.get_brightness = adx_backlight_get_brightness,
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index 2cf7ba52f67c..f625ffc69ad3 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -113,7 +113,7 @@ static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
 	return pwm_channel_enable(&pwmbl->pwmc);
 }
 
-static struct backlight_ops atmel_pwm_bl_ops = {
+static const struct backlight_ops atmel_pwm_bl_ops = {
 	.get_brightness = atmel_pwm_bl_get_intensity,
 	.update_status  = atmel_pwm_bl_set_intensity,
 };
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 6615ac7fa60a..18829cf68b1b 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(backlight_force_update);
  * ERR_PTR() or a pointer to the newly allocated device.
  */
 struct backlight_device *backlight_device_register(const char *name,
-		struct device *parent, void *devdata, struct backlight_ops *ops)
+		struct device *parent, void *devdata, const struct backlight_ops *ops)
 {
 	struct backlight_device *new_bd;
 	int rc;
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 96774949cd30..b4bcf8043797 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -451,7 +451,7 @@ void corgi_lcd_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgi_lcd_limit_intensity);
 
-static struct backlight_ops corgi_bl_ops = {
+static const struct backlight_ops corgi_bl_ops = {
 	.get_brightness	= corgi_bl_get_intensity,
 	.update_status  = corgi_bl_update_status,
 };
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index b9fe62b475c6..2914bf104adf 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -108,7 +108,7 @@ static int cr_backlight_get_intensity(struct backlight_device *bd)
 	return intensity;
 }
 
-static struct backlight_ops cr_backlight_ops = {
+static const struct backlight_ops cr_backlight_ops = {
 	.get_brightness = cr_backlight_get_intensity,
 	.update_status = cr_backlight_set_intensity,
 };
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index f2d76dae1eb3..74cdc640173d 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -95,7 +95,7 @@ static int da903x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops da903x_backlight_ops = {
+static const struct backlight_ops da903x_backlight_ops = {
 	.update_status	= da903x_backlight_update_status,
 	.get_brightness	= da903x_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 6d27f62fdcd0..e6d348e63596 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -70,7 +70,7 @@ void corgibl_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgibl_limit_intensity);
 
-static struct backlight_ops genericbl_ops = {
+static const struct backlight_ops genericbl_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.get_brightness = genericbl_get_intensity,
 	.update_status  = genericbl_send_intensity,
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 7fb4eefff80d..f7cc528d5be7 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -98,7 +98,7 @@ static int hp680bl_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops hp680bl_ops = {
+static const struct backlight_ops hp680bl_ops = {
 	.get_brightness = hp680bl_get_intensity,
 	.update_status  = hp680bl_set_intensity,
 };
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 7aed2565c1bd..db9071fc5665 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -93,7 +93,7 @@ out:
 	return ret;
 }
 
-static struct backlight_ops jornada_bl_ops = {
+static const struct backlight_ops jornada_bl_ops = {
 	.get_brightness = jornada_bl_get_brightness,
 	.update_status = jornada_bl_update_status,
 	.options = BL_CORE_SUSPENDRESUME,
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index a38fda1742dd..939e7b830cf3 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -134,7 +134,7 @@ static int kb3886bl_get_intensity(struct backlight_device *bd)
 	return kb3886bl_intensity;
 }
 
-static struct backlight_ops kb3886bl_ops = {
+static const struct backlight_ops kb3886bl_ops = {
 	.get_brightness = kb3886bl_get_intensity,
 	.update_status  = kb3886bl_send_intensity,
 };
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 6b488b8a7eee..00a9591b0003 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -141,7 +141,7 @@ static int locomolcd_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops locomobl_data = {
+static const struct backlight_ops locomobl_data = {
 	.get_brightness = locomolcd_get_intensity,
 	.update_status  = locomolcd_set_intensity,
 };
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 9edb8d7c295f..581246894733 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -33,7 +33,7 @@ struct dmi_match_data {
 	unsigned long iostart;
 	unsigned long iolen;
 	/* Backlight operations structure. */
-	struct backlight_ops backlight_ops;
+	const struct backlight_ops backlight_ops;
 };
 
 /* Module parameters. */
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 8693e5fcd2eb..409ca9643528 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -125,7 +125,7 @@ static int omapbl_get_intensity(struct backlight_device *dev)
 	return bl->current_intensity;
 }
 
-static struct backlight_ops omapbl_ops = {
+static const struct backlight_ops omapbl_ops = {
 	.get_brightness = omapbl_get_intensity,
 	.update_status  = omapbl_update_status,
 };
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 9edaf24fd82d..075786e05034 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -54,7 +54,7 @@ static int progearbl_get_intensity(struct backlight_device *bd)
 	return intensity - HW_LEVEL_MIN;
 }
 
-static struct backlight_ops progearbl_ops = {
+static const struct backlight_ops progearbl_ops = {
 	.get_brightness = progearbl_get_intensity,
 	.update_status = progearbl_set_intensity,
 };
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 887166267443..df9e0b32cf39 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -56,7 +56,7 @@ static int pwm_backlight_get_brightness(struct backlight_device *bl)
 	return bl->props.brightness;
 }
 
-static struct backlight_ops pwm_backlight_ops = {
+static const struct backlight_ops pwm_backlight_ops = {
 	.update_status	= pwm_backlight_update_status,
 	.get_brightness	= pwm_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index 43edbada12d1..e14ce4d469f5 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -72,7 +72,7 @@ static int tosa_bl_get_brightness(struct backlight_device *dev)
 	return props->brightness;
 }
 
-static struct backlight_ops bl_ops = {
+static const struct backlight_ops bl_ops = {
 	.get_brightness		= tosa_bl_get_brightness,
 	.update_status		= tosa_bl_update_status,
 };
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index 467bdb7efb23..e32add37a203 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -112,7 +112,7 @@ static int wm831x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops wm831x_backlight_ops = {
+static const struct backlight_ops wm831x_backlight_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.update_status	= wm831x_backlight_update_status,
 	.get_brightness	= wm831x_backlight_get_brightness,
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 0f5f57858a23..8c4f884db6b4 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -36,18 +36,18 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
-	unsigned int options;
+	const unsigned int options;
 
 #define BL_CORE_SUSPENDRESUME	(1 << 0)
 
 	/* Notify the backlight driver some property has changed */
-	int (*update_status)(struct backlight_device *);
+	int (* const update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
 	   fb_blank etc.) */
-	int (*get_brightness)(struct backlight_device *);
+	int (* const get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (* const check_fb)(struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
@@ -86,7 +86,7 @@ struct backlight_device {
 	   registered this device has been unloaded, and if class_get_devdata()
 	   points to something in the body of that driver, it is also invalid. */
 	struct mutex ops_lock;
-	struct backlight_ops *ops;
+	const struct backlight_ops *ops;
 
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
@@ -103,7 +103,7 @@ static inline void backlight_update_status(struct backlight_device *bd)
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-	struct device *dev, void *devdata, struct backlight_ops *ops);
+	struct device *dev, void *devdata, const struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-- 
cgit v1.2.3


From f42647acc4eab1befa9e290691ed7a40f9a7d3cc Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Wed, 16 Dec 2009 08:56:57 +0000
Subject: fix ksm.h breakage of nommu build

Commit 5ad6468801d28c4d4ac9f48ec19297817c915f6a "ksm: let shared pages
be swappable" breaks the build on m68knommu and I suspect on any nommu:

  In file included from kernel/fork.c:52:
  include/linux/ksm.h:129: warning: 'enum ttu_flags' declared inside parameter list
  include/linux/ksm.h:129: warning: its scope is only this definition or declaration, which is probably not what you want
  include/linux/ksm.h:129: error: parameter 2 ('flags') has incomplete type
  make[1]: *** [kernel/fork.o] Error 1
  make[1]: *** Waiting for unfinished jobs....

Let's fix that with CONFIG_MMU around most of the !CONFIG_KSM declarations.

Reported-by: Steven King <sfking@fdwdc.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tested-by: Steven King <sfking@fdwdc.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index bed5f16ba827..43bdab769fc3 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -94,12 +94,6 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage);
 
 #else  /* !CONFIG_KSM */
 
-static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
-		unsigned long end, int advice, unsigned long *vm_flags)
-{
-	return 0;
-}
-
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	return 0;
@@ -114,6 +108,13 @@ static inline int PageKsm(struct page *page)
 	return 0;
 }
 
+#ifdef CONFIG_MMU
+static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	return 0;
+}
+
 static inline struct page *ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
@@ -140,6 +141,7 @@ static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
 static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
 }
+#endif /* CONFIG_MMU */
 #endif /* !CONFIG_KSM */
 
 #endif /* __LINUX_KSM_H */
-- 
cgit v1.2.3


From 4365a5676fa3aa1d5ae6c90c22a0044f09ba584e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
Date: Tue, 15 Dec 2009 16:45:33 -0800
Subject: oom-kill: fix NUMA constraint check with nodemask

Fix node-oriented allocation handling in oom-kill.c I myself think of this
as a bugfix not as an ehnancement.

In these days, things are changed as
  - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask().
  - mempolicy don't maintain its own private zonelists.
  (And cpuset doesn't use nodemask for __alloc_pages_nodemask())

So, current oom-killer's check function is wrong.

This patch does
  - check nodemask, if nodemask && nodemask doesn't cover all
    node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY.
  - Scan all zonelist under nodemask, if it hits cpuset's wall
    this faiulre is from cpuset.
And
  - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE.
    This doesn't change "current" behavior. If callers use __GFP_THISNODE
    it should handle "page allocation failure" by itself.

  - handle __GFP_NOFAIL+__GFP_THISNODE path.
    This is something like a FIXME but this gfpmask is not used now.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/sysrq.c |  2 +-
 include/linux/oom.h  |  4 +++-
 mm/oom_kill.c        | 46 +++++++++++++++++++++++++++++++++-------------
 mm/page_alloc.c      | 22 ++++++++++++++++------
 4 files changed, 53 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 44203ff599da..1ae2de7d8b4f 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -339,7 +339,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
-	out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0);
+	out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL);
 }
 
 static DECLARE_WORK(moom_work, moom_callback);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6aac5fe4f6f1..537662315627 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -10,6 +10,7 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
+#include <linux/nodemask.h>
 
 struct zonelist;
 struct notifier_block;
@@ -26,7 +27,8 @@ enum oom_constraint {
 extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
-extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
+		int order, nodemask_t *mask);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6bb8a7a7ec9a..25c679e0288a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 /*
  * Determine the type of allocation constraint.
  */
-static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-						    gfp_t gfp_mask)
-{
 #ifdef CONFIG_NUMA
+static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
+				    gfp_t gfp_mask, nodemask_t *nodemask)
+{
 	struct zone *zone;
 	struct zoneref *z;
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
-	nodemask_t nodes = node_states[N_HIGH_MEMORY];
 
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
-		if (cpuset_zone_allowed_softwall(zone, gfp_mask))
-			node_clear(zone_to_nid(zone), nodes);
-		else
-			return CONSTRAINT_CPUSET;
+	/*
+	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
+	 * to kill current.We have to random task kill in this case.
+	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
+	 */
+	if (gfp_mask & __GFP_THISNODE)
+		return CONSTRAINT_NONE;
 
-	if (!nodes_empty(nodes))
+	/*
+	 * The nodemask here is a nodemask passed to alloc_pages(). Now,
+	 * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy
+	 * feature. mempolicy is an only user of nodemask here.
+	 * check mempolicy's nodemask contains all N_HIGH_MEMORY
+	 */
+	if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask))
 		return CONSTRAINT_MEMORY_POLICY;
-#endif
+
+	/* Check this allocation failure is caused by cpuset's wall function */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist,
+			high_zoneidx, nodemask)
+		if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
+			return CONSTRAINT_CPUSET;
 
 	return CONSTRAINT_NONE;
 }
+#else
+static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
+				gfp_t gfp_mask, nodemask_t *nodemask)
+{
+	return CONSTRAINT_NONE;
+}
+#endif
 
 /*
  * Simple selection loop. We chose the process with the highest
@@ -613,7 +632,8 @@ rest_and_return:
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
+void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
+		int order, nodemask_t *nodemask)
 {
 	unsigned long freed = 0;
 	enum oom_constraint constraint;
@@ -632,7 +652,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask);
+	constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
 	read_lock(&tasklist_lock);
 
 	switch (constraint) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59d2e88fb47c..850c4a7e2fe5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1654,12 +1654,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto out;
 
-	/* The OOM killer will not help higher order allocs */
-	if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
-		goto out;
-
+	if (!(gfp_mask & __GFP_NOFAIL)) {
+		/* The OOM killer will not help higher order allocs */
+		if (order > PAGE_ALLOC_COSTLY_ORDER)
+			goto out;
+		/*
+		 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
+		 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
+		 * The caller should handle page allocation failure by itself if
+		 * it specifies __GFP_THISNODE.
+		 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
+		 */
+		if (gfp_mask & __GFP_THISNODE)
+			goto out;
+	}
 	/* Exhausted what can be done so it's blamo time */
-	out_of_memory(zonelist, gfp_mask, order);
+	out_of_memory(zonelist, gfp_mask, order, nodemask);
 
 out:
 	clear_zonelist_oom(zonelist, gfp_mask);
@@ -3123,7 +3133,7 @@ static int __cpuinit process_zones(int cpu)
 
 		if (percpu_pagelist_fraction)
 			setup_pagelist_highmark(zone_pcp(zone, cpu),
-			 	(zone->present_pages / percpu_pagelist_fraction));
+			    (zone->present_pages / percpu_pagelist_fraction));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 35570ac6039ef490b9c5abde1fee4803a39bf4e1 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@mocean-labs.com>
Date: Tue, 15 Dec 2009 16:46:18 -0800
Subject: gpio: add GPIO driver for the Timberdale FPGA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A GPIO driver for the Timberdale FPGA found on the Intel Atom board
Russellville.

The GPIO driver also has an IRQ-chip to support interrupts on the pins.

Signed-off-by: Richard Röjfors <richard.rojfors@mocean-labs.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig      |   6 +
 drivers/gpio/Makefile     |   1 +
 drivers/gpio/timbgpio.c   | 342 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/timb_gpio.h |  37 +++++
 4 files changed, 386 insertions(+)
 create mode 100644 drivers/gpio/timbgpio.c
 create mode 100644 include/linux/timb_gpio.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 57ca339924ef..a019b49ecc9b 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -206,6 +206,12 @@ config GPIO_LANGWELL
 	help
 	  Say Y here to support Intel Moorestown platform GPIO.
 
+config GPIO_TIMBERDALE
+	bool "Support for timberdale GPIO IP"
+	depends on MFD_TIMBERDALE && GPIOLIB && HAS_IOMEM
+	---help---
+	Add support for the GPIO IP in the timberdale FPGA.
+
 comment "SPI GPIO expanders:"
 
 config GPIO_MAX7301
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 270b6d7839f5..52fe4cf734c7 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PL061)	+= pl061.o
+obj-$(CONFIG_GPIO_TIMBERDALE)	+= timbgpio.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_UCB1400)	+= ucb1400_gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c
new file mode 100644
index 000000000000..a4d344ba8e5c
--- /dev/null
+++ b/drivers/gpio/timbgpio.c
@@ -0,0 +1,342 @@
+/*
+ * timbgpio.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA GPIO
+ */
+
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/timb_gpio.h>
+#include <linux/interrupt.h>
+
+#define DRIVER_NAME "timb-gpio"
+
+#define TGPIOVAL	0x00
+#define TGPIODIR	0x04
+#define TGPIO_IER	0x08
+#define TGPIO_ISR	0x0c
+#define TGPIO_IPR	0x10
+#define TGPIO_ICR	0x14
+#define TGPIO_FLR	0x18
+#define TGPIO_LVR	0x1c
+
+struct timbgpio {
+	void __iomem		*membase;
+	spinlock_t		lock; /* mutual exclusion */
+	struct gpio_chip	gpio;
+	int			irq_base;
+};
+
+static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index,
+	unsigned offset, bool enabled)
+{
+	struct timbgpio *tgpio = container_of(gpio, struct timbgpio, gpio);
+	u32 reg;
+
+	spin_lock(&tgpio->lock);
+	reg = ioread32(tgpio->membase + offset);
+
+	if (enabled)
+		reg |= (1 << index);
+	else
+		reg &= ~(1 << index);
+
+	iowrite32(reg, tgpio->membase + offset);
+	spin_unlock(&tgpio->lock);
+
+	return 0;
+}
+
+static int timbgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
+{
+	return timbgpio_update_bit(gpio, nr, TGPIODIR, true);
+}
+
+static int timbgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
+{
+	struct timbgpio *tgpio = container_of(gpio, struct timbgpio, gpio);
+	u32 value;
+
+	value = ioread32(tgpio->membase + TGPIOVAL);
+	return (value & (1 << nr)) ? 1 : 0;
+}
+
+static int timbgpio_gpio_direction_output(struct gpio_chip *gpio,
+						unsigned nr, int val)
+{
+	return timbgpio_update_bit(gpio, nr, TGPIODIR, false);
+}
+
+static void timbgpio_gpio_set(struct gpio_chip *gpio,
+				unsigned nr, int val)
+{
+	timbgpio_update_bit(gpio, nr, TGPIOVAL, val != 0);
+}
+
+static int timbgpio_to_irq(struct gpio_chip *gpio, unsigned offset)
+{
+	struct timbgpio *tgpio = container_of(gpio, struct timbgpio, gpio);
+
+	if (tgpio->irq_base <= 0)
+		return -EINVAL;
+
+	return tgpio->irq_base + offset;
+}
+
+/*
+ * GPIO IRQ
+ */
+static void timbgpio_irq_disable(unsigned irq)
+{
+	struct timbgpio *tgpio = get_irq_chip_data(irq);
+	int offset = irq - tgpio->irq_base;
+
+	timbgpio_update_bit(&tgpio->gpio, offset, TGPIO_IER, 0);
+}
+
+static void timbgpio_irq_enable(unsigned irq)
+{
+	struct timbgpio *tgpio = get_irq_chip_data(irq);
+	int offset = irq - tgpio->irq_base;
+
+	timbgpio_update_bit(&tgpio->gpio, offset, TGPIO_IER, 1);
+}
+
+static int timbgpio_irq_type(unsigned irq, unsigned trigger)
+{
+	struct timbgpio *tgpio = get_irq_chip_data(irq);
+	int offset = irq - tgpio->irq_base;
+	unsigned long flags;
+	u32 lvr, flr;
+
+	if (offset < 0 || offset > tgpio->gpio.ngpio)
+		return -EINVAL;
+
+	spin_lock_irqsave(&tgpio->lock, flags);
+
+	lvr = ioread32(tgpio->membase + TGPIO_LVR);
+	flr = ioread32(tgpio->membase + TGPIO_FLR);
+
+	if (trigger & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) {
+		flr &= ~(1 << offset);
+		if (trigger & IRQ_TYPE_LEVEL_HIGH)
+			lvr |= 1 << offset;
+		else
+			lvr &= ~(1 << offset);
+	}
+
+	if ((trigger & IRQ_TYPE_EDGE_BOTH) == IRQ_TYPE_EDGE_BOTH)
+		return -EINVAL;
+	else {
+		flr |= 1 << offset;
+		/* opposite compared to the datasheet, but it mirrors the
+		 * reality
+		 */
+		if (trigger & IRQ_TYPE_EDGE_FALLING)
+			lvr |= 1 << offset;
+		else
+			lvr &= ~(1 << offset);
+	}
+
+	iowrite32(lvr, tgpio->membase + TGPIO_LVR);
+	iowrite32(flr, tgpio->membase + TGPIO_FLR);
+	iowrite32(1 << offset, tgpio->membase + TGPIO_ICR);
+	spin_unlock_irqrestore(&tgpio->lock, flags);
+
+	return 0;
+}
+
+static void timbgpio_irq(unsigned int irq, struct irq_desc *desc)
+{
+	struct timbgpio *tgpio = get_irq_data(irq);
+	unsigned long ipr;
+	int offset;
+
+	desc->chip->ack(irq);
+	ipr = ioread32(tgpio->membase + TGPIO_IPR);
+	iowrite32(ipr, tgpio->membase + TGPIO_ICR);
+
+	for_each_bit(offset, &ipr, tgpio->gpio.ngpio)
+		generic_handle_irq(timbgpio_to_irq(&tgpio->gpio, offset));
+}
+
+static struct irq_chip timbgpio_irqchip = {
+	.name		= "GPIO",
+	.enable		= timbgpio_irq_enable,
+	.disable	= timbgpio_irq_disable,
+	.set_type	= timbgpio_irq_type,
+};
+
+static int __devinit timbgpio_probe(struct platform_device *pdev)
+{
+	int err, i;
+	struct gpio_chip *gc;
+	struct timbgpio *tgpio;
+	struct resource *iomem;
+	struct timbgpio_platform_data *pdata = pdev->dev.platform_data;
+	int irq = platform_get_irq(pdev, 0);
+
+	if (!pdata || pdata->nr_pins > 32) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	tgpio = kzalloc(sizeof(*tgpio), GFP_KERNEL);
+	if (!tgpio) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+	tgpio->irq_base = pdata->irq_base;
+
+	spin_lock_init(&tgpio->lock);
+
+	if (!request_mem_region(iomem->start, resource_size(iomem),
+		DRIVER_NAME)) {
+		err = -EBUSY;
+		goto err_request;
+	}
+
+	tgpio->membase = ioremap(iomem->start, resource_size(iomem));
+	if (!tgpio->membase) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	gc = &tgpio->gpio;
+
+	gc->label = dev_name(&pdev->dev);
+	gc->owner = THIS_MODULE;
+	gc->dev = &pdev->dev;
+	gc->direction_input = timbgpio_gpio_direction_input;
+	gc->get = timbgpio_gpio_get;
+	gc->direction_output = timbgpio_gpio_direction_output;
+	gc->set = timbgpio_gpio_set;
+	gc->to_irq = (irq >= 0 && tgpio->irq_base > 0) ? timbgpio_to_irq : NULL;
+	gc->dbg_show = NULL;
+	gc->base = pdata->gpio_base;
+	gc->ngpio = pdata->nr_pins;
+	gc->can_sleep = 0;
+
+	err = gpiochip_add(gc);
+	if (err)
+		goto err_chipadd;
+
+	platform_set_drvdata(pdev, tgpio);
+
+	/* make sure to disable interrupts */
+	iowrite32(0x0, tgpio->membase + TGPIO_IER);
+
+	if (irq < 0 || tgpio->irq_base <= 0)
+		return 0;
+
+	for (i = 0; i < pdata->nr_pins; i++) {
+		set_irq_chip_and_handler_name(tgpio->irq_base + i,
+			&timbgpio_irqchip, handle_simple_irq, "mux");
+		set_irq_chip_data(tgpio->irq_base + i, tgpio);
+#ifdef CONFIG_ARM
+		set_irq_flags(tgpio->irq_base + i, IRQF_VALID | IRQF_PROBE);
+#endif
+	}
+
+	set_irq_data(irq, tgpio);
+	set_irq_chained_handler(irq, timbgpio_irq);
+
+	return 0;
+
+err_chipadd:
+	iounmap(tgpio->membase);
+err_ioremap:
+	release_mem_region(iomem->start, resource_size(iomem));
+err_request:
+	kfree(tgpio);
+err_mem:
+	printk(KERN_ERR DRIVER_NAME": Failed to register GPIOs: %d\n", err);
+
+	return err;
+}
+
+static int __devexit timbgpio_remove(struct platform_device *pdev)
+{
+	int err;
+	struct timbgpio_platform_data *pdata = pdev->dev.platform_data;
+	struct timbgpio *tgpio = platform_get_drvdata(pdev);
+	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+
+	if (irq >= 0 && tgpio->irq_base > 0) {
+		int i;
+		for (i = 0; i < pdata->nr_pins; i++) {
+			set_irq_chip(tgpio->irq_base + i, NULL);
+			set_irq_chip_data(tgpio->irq_base + i, NULL);
+		}
+
+		set_irq_handler(irq, NULL);
+		set_irq_data(irq, NULL);
+	}
+
+	err = gpiochip_remove(&tgpio->gpio);
+	if (err)
+		printk(KERN_ERR DRIVER_NAME": failed to remove gpio_chip\n");
+
+	iounmap(tgpio->membase);
+	release_mem_region(iomem->start, resource_size(iomem));
+	kfree(tgpio);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver timbgpio_platform_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= timbgpio_probe,
+	.remove		= timbgpio_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbgpio_init(void)
+{
+	return platform_driver_register(&timbgpio_platform_driver);
+}
+
+static void __exit timbgpio_exit(void)
+{
+	platform_driver_unregister(&timbgpio_platform_driver);
+}
+
+module_init(timbgpio_init);
+module_exit(timbgpio_exit);
+
+MODULE_DESCRIPTION("Timberdale GPIO driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mocean Laboratories");
+MODULE_ALIAS("platform:"DRIVER_NAME);
+
diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h
new file mode 100644
index 000000000000..ce456eaae861
--- /dev/null
+++ b/include/linux/timb_gpio.h
@@ -0,0 +1,37 @@
+/*
+ * timb_gpio.h timberdale FPGA GPIO driver, platform data definition
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_TIMB_GPIO_H
+#define _LINUX_TIMB_GPIO_H
+
+/**
+ * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver
+ * @gpio_base		The number of the first GPIO pin, set to -1 for
+ *			dynamic number allocation.
+ * @nr_pins		Number of pins that is supported by the hardware (1-32)
+ * @irq_base		If IRQ is supported by the hardware, this is the base
+ *			number of IRQ:s. One IRQ per pin will be used. Set to
+ *			-1 if IRQ:s is not supported.
+ */
+struct timbgpio_platform_data {
+	int gpio_base;
+	int nr_pins;
+	int irq_base;
+};
+
+#endif
-- 
cgit v1.2.3


From 0769746183caff9d4334be48c7b0e7d2ec8716c4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Tue, 15 Dec 2009 16:46:20 -0800
Subject: gpiolib: add support for changing value polarity in sysfs

Drivers may use gpiolib sysfs as part of their public user space
interface. The GPIO number and polarity might change from board to
board. The gpio_export_link() call can be used to hide the GPIO number
from user space. Add support for also hiding the GPIO line polarity
changes from user space.

Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     |  15 +++++
 drivers/gpio/gpiolib.c     | 161 +++++++++++++++++++++++++++++++++++++++++----
 include/asm-generic/gpio.h |   6 ++
 include/linux/gpio.h       |   6 ++
 4 files changed, 176 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index e4e7daed2ba8..1866c27eec69 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -531,6 +531,13 @@ and have the following read/write attributes:
 		This file exists only if the pin can be configured as an
 		interrupt generating input pin.
 
+	"active_low" ... reads as either 0 (false) or 1 (true).  Write
+		any nonzero value to invert the value attribute both
+		for reading and writing.  Existing and subsequent
+		poll(2) support configuration via the edge attribute
+		for "rising" and "falling" edges will follow this
+		setting.
+
 GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the
 controller implementing GPIOs starting at #42) and have the following
 read-only attributes:
@@ -566,6 +573,8 @@ requested using gpio_request():
 	int gpio_export_link(struct device *dev, const char *name,
 		unsigned gpio)
 
+	/* change the polarity of a GPIO node in sysfs */
+	int gpio_sysfs_set_active_low(unsigned gpio, int value);
 
 After a kernel driver requests a GPIO, it may only be made available in
 the sysfs interface by gpio_export().  The driver can control whether the
@@ -580,3 +589,9 @@ After the GPIO has been exported, gpio_export_link() allows creating
 symlinks from elsewhere in sysfs to the GPIO sysfs node.  Drivers can
 use this to provide the interface under their own device in sysfs with
 a descriptive name.
+
+Drivers can use gpio_sysfs_set_active_low() to hide GPIO line polarity
+differences between boards from user space.  This only affects the
+sysfs interface.  Polarity change can be done both before and after
+gpio_export(), and previously enabled poll(2) support for either
+rising or falling edge will be reconfigured to follow this setting.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 50de0f5750d8..a25ad284a272 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -53,6 +53,7 @@ struct gpio_desc {
 #define FLAG_SYSFS	4	/* exported via /sys/class/gpio/control */
 #define FLAG_TRIG_FALL	5	/* trigger on falling edge */
 #define FLAG_TRIG_RISE	6	/* trigger on rising edge */
+#define FLAG_ACTIVE_LOW	7	/* sysfs value has active low */
 
 #define PDESC_ID_SHIFT	16	/* add new flags before this one */
 
@@ -210,6 +211,11 @@ static DEFINE_MUTEX(sysfs_lock);
  *      * configures behavior of poll(2) on /value
  *      * available only if pin can generate IRQs on input
  *      * is read/write as "none", "falling", "rising", or "both"
+ *   /active_low
+ *      * configures polarity of /value
+ *      * is read/write as zero/nonzero
+ *      * also affects existing and subsequent "falling" and "rising"
+ *        /edge configuration
  */
 
 static ssize_t gpio_direction_show(struct device *dev,
@@ -255,7 +261,7 @@ static ssize_t gpio_direction_store(struct device *dev,
 	return status ? : size;
 }
 
-static const DEVICE_ATTR(direction, 0644,
+static /* const */ DEVICE_ATTR(direction, 0644,
 		gpio_direction_show, gpio_direction_store);
 
 static ssize_t gpio_value_show(struct device *dev,
@@ -267,10 +273,17 @@ static ssize_t gpio_value_show(struct device *dev,
 
 	mutex_lock(&sysfs_lock);
 
-	if (!test_bit(FLAG_EXPORT, &desc->flags))
+	if (!test_bit(FLAG_EXPORT, &desc->flags)) {
 		status = -EIO;
-	else
-		status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
+	} else {
+		int value;
+
+		value = !!gpio_get_value_cansleep(gpio);
+		if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+			value = !value;
+
+		status = sprintf(buf, "%d\n", value);
+	}
 
 	mutex_unlock(&sysfs_lock);
 	return status;
@@ -294,6 +307,8 @@ static ssize_t gpio_value_store(struct device *dev,
 
 		status = strict_strtol(buf, 0, &value);
 		if (status == 0) {
+			if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+				value = !value;
 			gpio_set_value_cansleep(gpio, value != 0);
 			status = size;
 		}
@@ -303,7 +318,7 @@ static ssize_t gpio_value_store(struct device *dev,
 	return status;
 }
 
-static /*const*/ DEVICE_ATTR(value, 0644,
+static const DEVICE_ATTR(value, 0644,
 		gpio_value_show, gpio_value_store);
 
 static irqreturn_t gpio_sysfs_irq(int irq, void *priv)
@@ -352,9 +367,11 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 
 	irq_flags = IRQF_SHARED;
 	if (test_bit(FLAG_TRIG_FALL, &gpio_flags))
-		irq_flags |= IRQF_TRIGGER_FALLING;
+		irq_flags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ?
+			IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
 	if (test_bit(FLAG_TRIG_RISE, &gpio_flags))
-		irq_flags |= IRQF_TRIGGER_RISING;
+		irq_flags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ?
+			IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING;
 
 	if (!pdesc) {
 		pdesc = kmalloc(sizeof(*pdesc), GFP_KERNEL);
@@ -475,9 +492,79 @@ found:
 
 static DEVICE_ATTR(edge, 0644, gpio_edge_show, gpio_edge_store);
 
+static int sysfs_set_active_low(struct gpio_desc *desc, struct device *dev,
+				int value)
+{
+	int			status = 0;
+
+	if (!!test_bit(FLAG_ACTIVE_LOW, &desc->flags) == !!value)
+		return 0;
+
+	if (value)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	else
+		clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
+
+	/* reconfigure poll(2) support if enabled on one edge only */
+	if (dev != NULL && (!!test_bit(FLAG_TRIG_RISE, &desc->flags) ^
+				!!test_bit(FLAG_TRIG_FALL, &desc->flags))) {
+		unsigned long trigger_flags = desc->flags & GPIO_TRIGGER_MASK;
+
+		gpio_setup_irq(desc, dev, 0);
+		status = gpio_setup_irq(desc, dev, trigger_flags);
+	}
+
+	return status;
+}
+
+static ssize_t gpio_active_low_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%d\n",
+				!!test_bit(FLAG_ACTIVE_LOW, &desc->flags));
+
+	mutex_unlock(&sysfs_lock);
+
+	return status;
+}
+
+static ssize_t gpio_active_low_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags)) {
+		status = -EIO;
+	} else {
+		long		value;
+
+		status = strict_strtol(buf, 0, &value);
+		if (status == 0)
+			status = sysfs_set_active_low(desc, dev, value != 0);
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+	return status ? : size;
+}
+
+static const DEVICE_ATTR(active_low, 0644,
+		gpio_active_low_show, gpio_active_low_store);
+
 static const struct attribute *gpio_attrs[] = {
-	&dev_attr_direction.attr,
 	&dev_attr_value.attr,
+	&dev_attr_active_low.attr,
 	NULL,
 };
 
@@ -662,12 +749,12 @@ int gpio_export(unsigned gpio, bool direction_may_change)
 		dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
 				desc, ioname ? ioname : "gpio%d", gpio);
 		if (!IS_ERR(dev)) {
-			if (direction_may_change)
-				status = sysfs_create_group(&dev->kobj,
+			status = sysfs_create_group(&dev->kobj,
 						&gpio_attr_group);
-			else
+
+			if (!status && direction_may_change)
 				status = device_create_file(dev,
-						&dev_attr_value);
+						&dev_attr_direction);
 
 			if (!status && gpio_to_irq(gpio) >= 0
 					&& (direction_may_change
@@ -744,6 +831,55 @@ done:
 }
 EXPORT_SYMBOL_GPL(gpio_export_link);
 
+
+/**
+ * gpio_sysfs_set_active_low - set the polarity of gpio sysfs value
+ * @gpio: gpio to change
+ * @value: non-zero to use active low, i.e. inverted values
+ *
+ * Set the polarity of /sys/class/gpio/gpioN/value sysfs attribute.
+ * The GPIO does not have to be exported yet.  If poll(2) support has
+ * been enabled for either rising or falling edge, it will be
+ * reconfigured to follow the new polarity.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	struct gpio_desc	*desc;
+	struct device		*dev = NULL;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device *dev;
+
+		dev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (dev == NULL) {
+			status = -ENODEV;
+			goto unlock;
+		}
+	}
+
+	status = sysfs_set_active_low(desc, dev, value);
+
+unlock:
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_sysfs_set_active_low);
+
 /**
  * gpio_unexport - reverse effect of gpio_export()
  * @gpio: gpio to make unavailable
@@ -1094,6 +1230,7 @@ void gpio_free(unsigned gpio)
 		}
 		desc_set_label(desc, NULL);
 		module_put(desc->chip->owner);
+		clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
 		clear_bit(FLAG_REQUESTED, &desc->flags);
 	} else
 		WARN_ON(extra_checks);
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 204bed37e82d..485eeb6c4ef3 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -145,6 +145,7 @@ extern int __gpio_to_irq(unsigned gpio);
 extern int gpio_export(unsigned gpio, bool direction_may_change);
 extern int gpio_export_link(struct device *dev, const char *name,
 			unsigned gpio);
+extern int gpio_sysfs_set_active_low(unsigned gpio, int value);
 extern void gpio_unexport(unsigned gpio);
 
 #endif	/* CONFIG_GPIO_SYSFS */
@@ -197,6 +198,11 @@ static inline int gpio_export_link(struct device *dev, const char *name,
 	return -ENOSYS;
 }
 
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	return -ENOSYS;
+}
+
 static inline void gpio_unexport(unsigned gpio)
 {
 }
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 059bd189d35d..4e949a5b5b85 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -99,6 +99,12 @@ static inline int gpio_export_link(struct device *dev, const char *name,
 	return -EINVAL;
 }
 
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -EINVAL;
+}
 
 static inline void gpio_unexport(unsigned gpio)
 {
-- 
cgit v1.2.3


From 9265576daeab1a884b11cc4c1087b72b488ca2e3 Mon Sep 17 00:00:00 2001
From: Vincent Sanders <vince@simtec.co.uk>
Date: Tue, 15 Dec 2009 16:46:35 -0800
Subject: sm501: implement acceleration features

This patch provides the acceleration entry points for the SM501
framebuffer driver.

This patch provides the sync, copyarea and fillrect entry points, using
the SM501's 2D acceleration engine to perform the operations in-chip
rather than across the bus.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Vincent Sanders <vince@simtec.co.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c    | 237 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/sm501-regs.h |   2 +
 2 files changed, 226 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index d1c8ea83b41f..35370d0ecf03 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -66,6 +66,7 @@ struct sm501fb_info {
 	struct fb_info		*fb[2];		/* fb info for both heads */
 	struct resource		*fbmem_res;	/* framebuffer resource */
 	struct resource		*regs_res;	/* registers resource */
+	struct resource		*regs2d_res;	/* 2d registers resource */
 	struct sm501_platdata_fb *pdata;	/* our platform data */
 
 	unsigned long		 pm_crt_ctrl;	/* pm: crt ctrl save */
@@ -73,6 +74,7 @@ struct sm501fb_info {
 	int			 irq;
 	int			 swap_endian;	/* set to swap rgb=>bgr */
 	void __iomem		*regs;		/* remapped registers */
+	void __iomem		*regs2d;	/* 2d remapped registers */
 	void __iomem		*fbmem;		/* remapped framebuffer */
 	size_t			 fbmem_len;	/* length of remapped region */
 };
@@ -123,9 +125,9 @@ static inline void sm501fb_sync_regs(struct sm501fb_info *info)
  * This is an attempt to lay out memory for the two framebuffers and
  * everything else
  *
- * |fbmem_res->start	                                       fbmem_res->end|
- * |                                                                         |
- * |fb[0].fix.smem_start    |         |fb[1].fix.smem_start    |     2K      |
+ * |fbmem_res->start					       fbmem_res->end|
+ * |									     |
+ * |fb[0].fix.smem_start    |	      |fb[1].fix.smem_start    |     2K	     |
  * |-> fb[0].fix.smem_len <-| spare   |-> fb[1].fix.smem_len <-|-> cursors <-|
  *
  * The "spare" space is for the 2d engine data
@@ -1246,7 +1248,173 @@ static ssize_t sm501fb_debug_show_pnl(struct device *dev,
 
 static DEVICE_ATTR(fbregs_pnl, 0444, sm501fb_debug_show_pnl, NULL);
 
-/* framebuffer ops */
+/* acceleration operations */
+static int sm501fb_sync(struct fb_info *info)
+{
+	int count = 1000000;
+	struct sm501fb_par  *par = info->par;
+	struct sm501fb_info *fbi = par->info;
+
+	/* wait for the 2d engine to be ready */
+	while ((count > 0) &&
+	       (readl(fbi->regs + SM501_SYSTEM_CONTROL) &
+		SM501_SYSCTRL_2D_ENGINE_STATUS) != 0)
+		count--;
+
+	if (count <= 0) {
+		dev_err(info->dev, "Timeout waiting for 2d engine sync\n");
+		return 1;
+	}
+	return 0;
+}
+
+static void sm501fb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	struct sm501fb_par  *par = info->par;
+	struct sm501fb_info *fbi = par->info;
+	int width = area->width;
+	int height = area->height;
+	int sx = area->sx;
+	int sy = area->sy;
+	int dx = area->dx;
+	int dy = area->dy;
+	unsigned long rtl = 0;
+
+	/* source clip */
+	if ((sx >= info->var.xres_virtual) ||
+	    (sy >= info->var.yres_virtual))
+		/* source Area not within virtual screen, skipping */
+		return;
+	if ((sx + width) >= info->var.xres_virtual)
+		width = info->var.xres_virtual - sx - 1;
+	if ((sy + height) >= info->var.yres_virtual)
+		height = info->var.yres_virtual - sy - 1;
+
+	/* dest clip */
+	if ((dx >= info->var.xres_virtual) ||
+	    (dy >= info->var.yres_virtual))
+		/* Destination Area not within virtual screen, skipping */
+		return;
+	if ((dx + width) >= info->var.xres_virtual)
+		width = info->var.xres_virtual - dx - 1;
+	if ((dy + height) >= info->var.yres_virtual)
+		height = info->var.yres_virtual - dy - 1;
+
+	if ((sx < dx) || (sy < dy)) {
+		rtl = 1 << 27;
+		sx += width - 1;
+		dx += width - 1;
+		sy += height - 1;
+		dy += height - 1;
+	}
+
+	if (sm501fb_sync(info))
+		return;
+
+	/* set the base addresses */
+	writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
+	writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
+
+	/* set the window width */
+	writel((info->var.xres << 16) | info->var.xres,
+	       fbi->regs2d + SM501_2D_WINDOW_WIDTH);
+
+	/* set window stride */
+	writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
+	       fbi->regs2d + SM501_2D_PITCH);
+
+	/* set data format */
+	switch (info->var.bits_per_pixel) {
+	case 8:
+		writel(0, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	case 16:
+		writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	case 32:
+		writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	}
+
+	/* 2d compare mask */
+	writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
+
+	/* 2d mask */
+	writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
+
+	/* source and destination x y */
+	writel((sx << 16) | sy, fbi->regs2d + SM501_2D_SOURCE);
+	writel((dx << 16) | dy, fbi->regs2d + SM501_2D_DESTINATION);
+
+	/* w/h */
+	writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
+
+	/* do area move */
+	writel(0x800000cc | rtl, fbi->regs2d + SM501_2D_CONTROL);
+}
+
+static void sm501fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	struct sm501fb_par  *par = info->par;
+	struct sm501fb_info *fbi = par->info;
+	int width = rect->width, height = rect->height;
+
+	if ((rect->dx >= info->var.xres_virtual) ||
+	    (rect->dy >= info->var.yres_virtual))
+		/* Rectangle not within virtual screen, skipping */
+		return;
+	if ((rect->dx + width) >= info->var.xres_virtual)
+		width = info->var.xres_virtual - rect->dx - 1;
+	if ((rect->dy + height) >= info->var.yres_virtual)
+		height = info->var.yres_virtual - rect->dy - 1;
+
+	if (sm501fb_sync(info))
+		return;
+
+	/* set the base addresses */
+	writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_SOURCE_BASE);
+	writel(par->screen.sm_addr, fbi->regs2d + SM501_2D_DESTINATION_BASE);
+
+	/* set the window width */
+	writel((info->var.xres << 16) | info->var.xres,
+	       fbi->regs2d + SM501_2D_WINDOW_WIDTH);
+
+	/* set window stride */
+	writel((info->var.xres_virtual << 16) | info->var.xres_virtual,
+	       fbi->regs2d + SM501_2D_PITCH);
+
+	/* set data format */
+	switch (info->var.bits_per_pixel) {
+	case 8:
+		writel(0, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	case 16:
+		writel(0x00100000, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	case 32:
+		writel(0x00200000, fbi->regs2d + SM501_2D_STRETCH);
+		break;
+	}
+
+	/* 2d compare mask */
+	writel(0xffffffff, fbi->regs2d + SM501_2D_COLOR_COMPARE_MASK);
+
+	/* 2d mask */
+	writel(0xffffffff, fbi->regs2d + SM501_2D_MASK);
+
+	/* colour */
+	writel(rect->color, fbi->regs2d + SM501_2D_FOREGROUND);
+
+	/* x y */
+	writel((rect->dx << 16) | rect->dy, fbi->regs2d + SM501_2D_DESTINATION);
+
+	/* w/h */
+	writel((width << 16) | height, fbi->regs2d + SM501_2D_DIMENSION);
+
+	/* do rectangle fill */
+	writel(0x800100cc, fbi->regs2d + SM501_2D_CONTROL);
+}
+
 
 static struct fb_ops sm501fb_ops_crt = {
 	.owner		= THIS_MODULE,
@@ -1256,9 +1424,10 @@ static struct fb_ops sm501fb_ops_crt = {
 	.fb_setcolreg	= sm501fb_setcolreg,
 	.fb_pan_display	= sm501fb_pan_crt,
 	.fb_cursor	= sm501fb_cursor,
-	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
+	.fb_fillrect	= sm501fb_fillrect,
+	.fb_copyarea	= sm501fb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
+	.fb_sync	= sm501fb_sync,
 };
 
 static struct fb_ops sm501fb_ops_pnl = {
@@ -1269,9 +1438,10 @@ static struct fb_ops sm501fb_ops_pnl = {
 	.fb_blank	= sm501fb_blank_pnl,
 	.fb_setcolreg	= sm501fb_setcolreg,
 	.fb_cursor	= sm501fb_cursor,
-	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
+	.fb_fillrect	= sm501fb_fillrect,
+	.fb_copyarea	= sm501fb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
+	.fb_sync	= sm501fb_sync,
 };
 
 /* sm501_init_cursor
@@ -1329,7 +1499,8 @@ static int sm501fb_start(struct sm501fb_info *info,
 		dev_warn(dev, "no irq for device\n");
 	}
 
-	/* allocate, reserve and remap resources for registers */
+	/* allocate, reserve and remap resources for display
+	 * controller registers */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(dev, "no resource definition for registers\n");
@@ -1354,12 +1525,38 @@ static int sm501fb_start(struct sm501fb_info *info,
 		goto err_regs_res;
 	}
 
+	/* allocate, reserve and remap resources for 2d
+	 * controller registers */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (res == NULL) {
+		dev_err(dev, "no resource definition for 2d registers\n");
+		ret = -ENOENT;
+		goto err_regs_map;
+	}
+
+	info->regs2d_res = request_mem_region(res->start,
+					      resource_size(res),
+					      pdev->name);
+
+	if (info->regs2d_res == NULL) {
+		dev_err(dev, "cannot claim registers\n");
+		ret = -ENXIO;
+		goto err_regs_map;
+	}
+
+	info->regs2d = ioremap(res->start, resource_size(res));
+	if (info->regs2d == NULL) {
+		dev_err(dev, "cannot remap registers\n");
+		ret = -ENXIO;
+		goto err_regs2d_res;
+	}
+
 	/* allocate, reserve resources for framebuffer */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
 	if (res == NULL) {
 		dev_err(dev, "no memory resource defined\n");
 		ret = -ENXIO;
-		goto err_regs_map;
+		goto err_regs2d_map;
 	}
 
 	info->fbmem_res = request_mem_region(res->start,
@@ -1368,7 +1565,7 @@ static int sm501fb_start(struct sm501fb_info *info,
 	if (info->fbmem_res == NULL) {
 		dev_err(dev, "cannot claim framebuffer\n");
 		ret = -ENXIO;
-		goto err_regs_map;
+		goto err_regs2d_map;
 	}
 
 	info->fbmem = ioremap(res->start, resource_size(res));
@@ -1389,8 +1586,10 @@ static int sm501fb_start(struct sm501fb_info *info,
 	/* enable display controller */
 	sm501_unit_power(dev->parent, SM501_GATE_DISPLAY, 1);
 
-	/* setup cursors */
+	/* enable 2d controller */
+	sm501_unit_power(dev->parent, SM501_GATE_2D_ENGINE, 1);
 
+	/* setup cursors */
 	sm501_init_cursor(info->fb[HEAD_CRT], SM501_DC_CRT_HWC_ADDR);
 	sm501_init_cursor(info->fb[HEAD_PANEL], SM501_DC_PANEL_HWC_ADDR);
 
@@ -1400,6 +1599,13 @@ static int sm501fb_start(struct sm501fb_info *info,
 	release_resource(info->fbmem_res);
 	kfree(info->fbmem_res);
 
+ err_regs2d_map:
+	iounmap(info->regs2d);
+
+ err_regs2d_res:
+	release_resource(info->regs2d_res);
+	kfree(info->regs2d_res);
+
  err_regs_map:
 	iounmap(info->regs);
 
@@ -1420,6 +1626,10 @@ static void sm501fb_stop(struct sm501fb_info *info)
 	release_resource(info->fbmem_res);
 	kfree(info->fbmem_res);
 
+	iounmap(info->regs2d);
+	release_resource(info->regs2d_res);
+	kfree(info->regs2d_res);
+
 	iounmap(info->regs);
 	release_resource(info->regs_res);
 	kfree(info->regs_res);
@@ -1486,7 +1696,8 @@ static int sm501fb_init_fb(struct fb_info *fb,
 		par->ops.fb_cursor = NULL;
 
 	fb->fbops = &par->ops;
-	fb->flags = FBINFO_FLAG_DEFAULT |
+	fb->flags = FBINFO_FLAG_DEFAULT | FBINFO_READS_FAST |
+		FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT |
 		FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN;
 
 	/* fixed data */
diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h
index d53642d2d899..67ed2c542831 100644
--- a/include/linux/sm501-regs.h
+++ b/include/linux/sm501-regs.h
@@ -31,6 +31,8 @@
 #define SM501_SYSCTRL_PCI_SUBSYS_LOCK	(1<<11)
 #define SM501_SYSCTRL_PCI_BURST_READ_EN	(1<<15)
 
+#define SM501_SYSCTRL_2D_ENGINE_STATUS	(1<<19)
+
 /* miscellaneous control */
 
 #define SM501_MISC_CONTROL		(0x000004)
-- 
cgit v1.2.3


From 904e812931f001b984912b2d2f653ea69520313c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 15 Dec 2009 16:46:52 -0800
Subject: reiserfs: remove /proc/fs/reiserfs/version

/proc/fs/reiserfs/version is on the way of removing ->read_proc interface.
 It's empty however, so simply remove it instead of doing dummy
conversion.  It's hard to see what information userspace can extract from
empty file.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/procfs.c        | 39 ---------------------------------------
 fs/reiserfs/super.c         |  4 ----
 include/linux/reiserfs_fs.h |  5 -----
 3 files changed, 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9229e5514a4e..0ebb11646526 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -48,14 +48,6 @@ static int show_version(struct seq_file *m, struct super_block *sb)
 	return 0;
 }
 
-int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
-				    int count, int *eof, void *data)
-{
-	*start = buffer;
-	*eof = 1;
-	return 0;
-}
-
 #define SF( x ) ( r -> x )
 #define SFP( x ) SF( s_proc_info_data.x )
 #define SFPL( x ) SFP( x[ level ] )
@@ -538,19 +530,6 @@ int reiserfs_proc_info_done(struct super_block *sb)
 	return 0;
 }
 
-struct proc_dir_entry *reiserfs_proc_register_global(char *name,
-						     read_proc_t * func)
-{
-	return (proc_info_root) ? create_proc_read_entry(name, 0,
-							 proc_info_root,
-							 func, NULL) : NULL;
-}
-
-void reiserfs_proc_unregister_global(const char *name)
-{
-	remove_proc_entry(name, proc_info_root);
-}
-
 int reiserfs_proc_info_global_init(void)
 {
 	if (proc_info_root == NULL) {
@@ -585,16 +564,6 @@ int reiserfs_proc_info_done(struct super_block *sb)
 	return 0;
 }
 
-struct proc_dir_entry *reiserfs_proc_register_global(char *name,
-						     read_proc_t * func)
-{
-	return NULL;
-}
-
-void reiserfs_proc_unregister_global(const char *name)
-{;
-}
-
 int reiserfs_proc_info_global_init(void)
 {
 	return 0;
@@ -603,14 +572,6 @@ int reiserfs_proc_info_global_done(void)
 {
 	return 0;
 }
-
-int reiserfs_global_version_in_proc(char *buffer, char **start,
-				    off_t offset,
-				    int count, int *eof, void *data)
-{
-	return 0;
-}
-
 /* REISERFS_PROC_INFO */
 #endif
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 339b0baf2af6..b4a7dd03bdb9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2222,8 +2222,6 @@ static int __init init_reiserfs_fs(void)
 	}
 
 	reiserfs_proc_info_global_init();
-	reiserfs_proc_register_global("version",
-				      reiserfs_global_version_in_proc);
 
 	ret = register_filesystem(&reiserfs_fs_type);
 
@@ -2231,7 +2229,6 @@ static int __init init_reiserfs_fs(void)
 		return 0;
 	}
 
-	reiserfs_proc_unregister_global("version");
 	reiserfs_proc_info_global_done();
 	destroy_inodecache();
 
@@ -2240,7 +2237,6 @@ static int __init init_reiserfs_fs(void)
 
 static void __exit exit_reiserfs_fs(void)
 {
-	reiserfs_proc_unregister_global("version");
 	reiserfs_proc_info_global_done();
 	unregister_filesystem(&reiserfs_fs_type);
 	destroy_inodecache();
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..f65ed0d3a920 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2061,13 +2061,8 @@ struct dentry *reiserfs_get_parent(struct dentry *);
 
 int reiserfs_proc_info_init(struct super_block *sb);
 int reiserfs_proc_info_done(struct super_block *sb);
-struct proc_dir_entry *reiserfs_proc_register_global(char *name,
-						     read_proc_t * func);
-void reiserfs_proc_unregister_global(const char *name);
 int reiserfs_proc_info_global_init(void);
 int reiserfs_proc_info_global_done(void);
-int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
-				    int count, int *eof, void *data);
 
 #if defined( REISERFS_PROC_INFO )
 
-- 
cgit v1.2.3


From e3c96f53ac132743fda1384910feb863a2eab916 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 15 Dec 2009 16:46:54 -0800
Subject: reiserfs: don't compile procfs.o at all if no support

* small define cleanup in header
* fix #ifdeffery in procfs.c via Kconfig

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/Makefile        |  6 +++++-
 fs/reiserfs/procfs.c        | 26 --------------------------
 include/linux/reiserfs_fs.h | 30 +++++++++++++++++++++---------
 3 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 6a9e30c041dd..792b3cb2cd18 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,11 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
 reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
 		 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
 		 hashes.o tail_conversion.o journal.o resize.o \
-		 item_ops.o ioctl.o procfs.o xattr.o lock.o
+		 item_ops.o ioctl.o xattr.o lock.o
+
+ifeq ($(CONFIG_REISERFS_PROC_INFO),y)
+reiserfs-objs += procfs.o
+endif
 
 ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
 reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 0ebb11646526..7a9981196c1c 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -17,8 +17,6 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 
-#ifdef CONFIG_REISERFS_PROC_INFO
-
 /*
  * LOCKING:
  *
@@ -551,30 +549,6 @@ int reiserfs_proc_info_global_done(void)
 	}
 	return 0;
 }
-
-/* REISERFS_PROC_INFO */
-#else
-
-int reiserfs_proc_info_init(struct super_block *sb)
-{
-	return 0;
-}
-int reiserfs_proc_info_done(struct super_block *sb)
-{
-	return 0;
-}
-
-int reiserfs_proc_info_global_init(void)
-{
-	return 0;
-}
-int reiserfs_proc_info_global_done(void)
-{
-	return 0;
-}
-/* REISERFS_PROC_INFO */
-#endif
-
 /*
  * Revision 1.1.8.2  2001/07/15 17:08:42  god
  *  . use get_super() in procfs.c
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index f65ed0d3a920..c96c1858fe2c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2051,21 +2051,13 @@ void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
 int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
 			struct treepath *path, struct reiserfs_dir_entry *de);
 struct dentry *reiserfs_get_parent(struct dentry *);
-/* procfs.c */
-
-#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
-#define REISERFS_PROC_INFO
-#else
-#undef REISERFS_PROC_INFO
-#endif
 
+#ifdef CONFIG_REISERFS_PROC_INFO
 int reiserfs_proc_info_init(struct super_block *sb);
 int reiserfs_proc_info_done(struct super_block *sb);
 int reiserfs_proc_info_global_init(void);
 int reiserfs_proc_info_global_done(void);
 
-#if defined( REISERFS_PROC_INFO )
-
 #define PROC_EXP( e )   e
 
 #define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
@@ -2079,6 +2071,26 @@ int reiserfs_proc_info_global_done(void);
     PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) );	\
     PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
 #else
+static inline int reiserfs_proc_info_init(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_done(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_init(void)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_done(void)
+{
+	return 0;
+}
+
 #define PROC_EXP( e )
 #define VOID_V ( ( void ) 0 )
 #define PROC_INFO_MAX( sb, field, value ) VOID_V
-- 
cgit v1.2.3


From 569b846df54ffb2827b83ce3244c5f032394cba4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 15 Dec 2009 16:47:03 -0800
Subject: memcg: coalesce uncharge during unmap/truncate

In massive parallel enviroment, res_counter can be a performance
bottleneck.  One strong techinque to reduce lock contention is reducing
calls by coalescing some amount of calls into one.

Considering charge/uncharge chatacteristic,
	- charge is done one by one via demand-paging.
	- uncharge is done by
		- in chunk at munmap, truncate, exit, execve...
		- one by one via vmscan/paging.

It seems we have a chance to coalesce uncharges for improving scalability
at unmap/truncation.

This patch is a for coalescing uncharge.  For avoiding scattering memcg's
structure to functions under /mm, this patch adds memcg batch uncharge
information to the task.  A reason for per-task batching is for making use
of caller's context information.  We do batched uncharge (deleyed
uncharge) when truncation/unmap occurs but do direct uncharge when
uncharge is called by memory reclaim (vmscan.c).

The degree of coalescing depends on callers
  - at invalidate/trucate... pagevec size
  - at unmap ....ZAP_BLOCK_SIZE
(memory itself will be freed in this degree.)
Then, we'll not coalescing too much.

On x86-64 8cpu server, I tested overheads of memcg at page fault by
running a program which does map/fault/unmap in a loop. Running
a task per a cpu by taskset and see sum of the number of page faults
in 60secs.

[without memcg config]
  40156968  page-faults              #      0.085 M/sec   ( +-   0.046% )
  27.67 cache-miss/faults
[root cgroup]
  36659599  page-faults              #      0.077 M/sec   ( +-   0.247% )
  31.58 miss/faults
[in a child cgroup]
  18444157  page-faults              #      0.039 M/sec   ( +-   0.133% )
  69.96 miss/faults
[child with this patch]
  27133719  page-faults              #      0.057 M/sec   ( +-   0.155% )
  47.16 miss/faults

We can see some amounts of improvement.
(root cgroup doesn't affected by this patch)
Another patch for "charge" will follow this and above will be improved more.

Changelog(since 2009/10/02):
 - renamed filed of memcg_batch (as pages to bytes, memsw to memsw_bytes)
 - some clean up and commentary/description updates.
 - added initialize code to copy_process(). (possible bug fix)

Changelog(old):
 - fixed !CONFIG_MEM_CGROUP case.
 - rebased onto the latest mmotm + softlimit fix patches.
 - unified patch for callers
 - added commetns.
 - make ->do_batch as bool.
 - removed css_get() at el. We don't need it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 13 +++++++
 include/linux/sched.h      |  8 ++++
 kernel/fork.c              |  4 ++
 mm/memcontrol.c            | 96 +++++++++++++++++++++++++++++++++++++++++++---
 mm/memory.c                |  2 +
 mm/truncate.c              |  6 +++
 6 files changed, 123 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bf9213b2db8f..91300c972e76 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,6 +54,11 @@ extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
 extern void mem_cgroup_del_lru(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page,
 				  enum lru_list from, enum lru_list to);
+
+/* For coalescing uncharge for reducing memcg' overhead*/
+extern void mem_cgroup_uncharge_start(void);
+extern void mem_cgroup_uncharge_end(void);
+
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern int mem_cgroup_shmem_charge_fallback(struct page *page,
@@ -151,6 +156,14 @@ static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
 {
 }
 
+static inline void mem_cgroup_uncharge_start(void)
+{
+}
+
+static inline void mem_cgroup_uncharge_end(void)
+{
+}
+
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f38e81a..f4c145410a8d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1544,6 +1544,14 @@ struct task_struct {
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 	unsigned long stack_start;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
+	struct memcg_batch_info {
+		int do_batch;	/* incremented when batch uncharge started */
+		struct mem_cgroup *memcg; /* target memcg of uncharge */
+		unsigned long bytes; 		/* uncharged usage */
+		unsigned long memsw_bytes; /* uncharged mem+swap usage */
+	} memcg_batch;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd91447e052..b6cbd33dde80 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1127,6 +1127,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	p->memcg_batch.do_batch = 0;
+	p->memcg_batch.memcg = NULL;
+#endif
 
 	p->bts = NULL;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7b5b108c1c6b..a730c91b8e69 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1827,6 +1827,50 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 	css_put(&mem->css);
 }
 
+static void
+__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
+{
+	struct memcg_batch_info *batch = NULL;
+	bool uncharge_memsw = true;
+	/* If swapout, usage of swap doesn't decrease */
+	if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+		uncharge_memsw = false;
+	/*
+	 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
+	 * In those cases, all pages freed continously can be expected to be in
+	 * the same cgroup and we have chance to coalesce uncharges.
+	 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
+	 * because we want to do uncharge as soon as possible.
+	 */
+	if (!current->memcg_batch.do_batch || test_thread_flag(TIF_MEMDIE))
+		goto direct_uncharge;
+
+	batch = &current->memcg_batch;
+	/*
+	 * In usual, we do css_get() when we remember memcg pointer.
+	 * But in this case, we keep res->usage until end of a series of
+	 * uncharges. Then, it's ok to ignore memcg's refcnt.
+	 */
+	if (!batch->memcg)
+		batch->memcg = mem;
+	/*
+	 * In typical case, batch->memcg == mem. This means we can
+	 * merge a series of uncharges to an uncharge of res_counter.
+	 * If not, we uncharge res_counter ony by one.
+	 */
+	if (batch->memcg != mem)
+		goto direct_uncharge;
+	/* remember freed charge and uncharge it later */
+	batch->bytes += PAGE_SIZE;
+	if (uncharge_memsw)
+		batch->memsw_bytes += PAGE_SIZE;
+	return;
+direct_uncharge:
+	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	if (uncharge_memsw)
+		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+	return;
+}
 
 /*
  * uncharge if !page_mapped(page)
@@ -1875,12 +1919,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
-		if (do_swap_account &&
-				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
-	}
+	if (!mem_cgroup_is_root(mem))
+		__do_uncharge(mem, ctype);
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		mem_cgroup_swap_statistics(mem, true);
 	mem_cgroup_charge_statistics(mem, pc, false);
@@ -1926,6 +1966,50 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
+/*
+ * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
+ * In that cases, pages are freed continuously and we can expect pages
+ * are in the same memcg. All these calls itself limits the number of
+ * pages freed at once, then uncharge_start/end() is called properly.
+ * This may be called prural(2) times in a context,
+ */
+
+void mem_cgroup_uncharge_start(void)
+{
+	current->memcg_batch.do_batch++;
+	/* We can do nest. */
+	if (current->memcg_batch.do_batch == 1) {
+		current->memcg_batch.memcg = NULL;
+		current->memcg_batch.bytes = 0;
+		current->memcg_batch.memsw_bytes = 0;
+	}
+}
+
+void mem_cgroup_uncharge_end(void)
+{
+	struct memcg_batch_info *batch = &current->memcg_batch;
+
+	if (!batch->do_batch)
+		return;
+
+	batch->do_batch--;
+	if (batch->do_batch) /* If stacked, do nothing. */
+		return;
+
+	if (!batch->memcg)
+		return;
+	/*
+	 * This "batch->memcg" is valid without any css_get/put etc...
+	 * bacause we hide charges behind us.
+	 */
+	if (batch->bytes)
+		res_counter_uncharge(&batch->memcg->res, batch->bytes);
+	if (batch->memsw_bytes)
+		res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes);
+	/* forget this pointer (for sanity check) */
+	batch->memcg = NULL;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
diff --git a/mm/memory.c b/mm/memory.c
index a54b2c498444..aed45eaf8ac9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -956,6 +956,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 		details = NULL;
 
 	BUG_ON(addr >= end);
+	mem_cgroup_uncharge_start();
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
@@ -968,6 +969,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 						zap_work, details);
 	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
 	tlb_end_vma(tlb, vma);
+	mem_cgroup_uncharge_end();
 
 	return addr;
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 2c147a7e5f2c..342deee22684 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -272,6 +272,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			pagevec_release(&pvec);
 			break;
 		}
+		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -286,6 +287,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
+		mem_cgroup_uncharge_end();
 	}
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -327,6 +329,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	pagevec_init(&pvec, 0);
 	while (next <= end &&
 			pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 			pgoff_t index;
@@ -354,6 +357,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 				break;
 		}
 		pagevec_release(&pvec);
+		mem_cgroup_uncharge_end();
 		cond_resched();
 	}
 	return ret;
@@ -428,6 +432,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	while (next <= end && !wrapped &&
 		pagevec_lookup(&pvec, mapping, next,
 			min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 			pgoff_t page_index;
@@ -477,6 +482,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
+		mem_cgroup_uncharge_end();
 		cond_resched();
 	}
 	return ret;
-- 
cgit v1.2.3


From d8046582d5ee24448800e71c6933fdb6813aa062 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 15 Dec 2009 16:47:09 -0800
Subject: memcg: make memcg's file mapped consistent with global VM

In global VM, FILE_MAPPED is used but memcg uses MAPPED_FILE.  This makes
grep difficult.  Replace memcg's MAPPED_FILE with FILE_MAPPED

And in global VM, mapped shared memory is accounted into FILE_MAPPED.
But memcg doesn't. fix it.
Note:
  page_is_file_cache() just checks SwapBacked or not.
  So, we need to check PageAnon.

Cc: Balbir Singh <balbir@in.ibm.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++--
 mm/memcontrol.c            | 21 +++++++++------------
 mm/rmap.c                  |  4 ++--
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 91300c972e76..0b46c2068b96 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -122,7 +122,7 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
-void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+void mem_cgroup_update_file_mapped(struct page *page, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask, int nid,
 						int zid);
@@ -287,7 +287,7 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
+static inline void mem_cgroup_update_file_mapped(struct page *page,
 							int val)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6587f657d57c..0b3efb843a87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -67,7 +67,7 @@ enum mem_cgroup_stat_index {
 	 */
 	MEM_CGROUP_STAT_CACHE, 	   /* # of pages charged as cache */
 	MEM_CGROUP_STAT_RSS,	   /* # of pages charged as anon rss */
-	MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
+	MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
 	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
@@ -1227,7 +1227,7 @@ static void record_last_oom(struct mem_cgroup *mem)
  * Currently used to update mapped file statistics, but the routine can be
  * generalized to update other statistics as well.
  */
-void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
+void mem_cgroup_update_file_mapped(struct page *page, int val)
 {
 	struct mem_cgroup *mem;
 	struct mem_cgroup_stat *stat;
@@ -1235,9 +1235,6 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
 	int cpu;
 	struct page_cgroup *pc;
 
-	if (!page_is_file_cache(page))
-		return;
-
 	pc = lookup_page_cgroup(page);
 	if (unlikely(!pc))
 		return;
@@ -1257,7 +1254,7 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
 	stat = &mem->stat;
 	cpustat = &stat->cpustat[cpu];
 
-	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val);
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val);
 done:
 	unlock_page_cgroup(pc);
 }
@@ -1654,18 +1651,18 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
-	if (page_is_file_cache(page) && page_mapped(page)) {
+	if (page_mapped(page) && !PageAnon(page)) {
 		cpu = smp_processor_id();
 		/* Update mapped_file data for mem_cgroup "from" */
 		stat = &from->stat;
 		cpustat = &stat->cpustat[cpu];
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
 						-1);
 
 		/* Update mapped_file data for mem_cgroup "to" */
 		stat = &to->stat;
 		cpustat = &stat->cpustat[cpu];
-		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
 						1);
 	}
 
@@ -2889,7 +2886,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 enum {
 	MCS_CACHE,
 	MCS_RSS,
-	MCS_MAPPED_FILE,
+	MCS_FILE_MAPPED,
 	MCS_PGPGIN,
 	MCS_PGPGOUT,
 	MCS_SWAP,
@@ -2933,8 +2930,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
 	s->stat[MCS_CACHE] += val * PAGE_SIZE;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
 	s->stat[MCS_RSS] += val * PAGE_SIZE;
-	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE);
-	s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE;
+	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED);
+	s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
 	s->stat[MCS_PGPGIN] += val;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
diff --git a/mm/rmap.c b/mm/rmap.c
index 98135dbd25ba..278cd277bdec 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -721,7 +721,7 @@ void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_update_mapped_file_stat(page, 1);
+		mem_cgroup_update_file_mapped(page, 1);
 	}
 }
 
@@ -753,8 +753,8 @@ void page_remove_rmap(struct page *page)
 		__dec_zone_page_state(page, NR_ANON_PAGES);
 	} else {
 		__dec_zone_page_state(page, NR_FILE_MAPPED);
+		mem_cgroup_update_file_mapped(page, -1);
 	}
-	mem_cgroup_update_mapped_file_stat(page, -1);
 	/*
 	 * It would be tidy to reset the PageAnon mapping here,
 	 * but that might overwrite a racing page_add_anon_rmap
-- 
cgit v1.2.3


From 57f9fd7d25ac9a0d7e3a4ced580e780ab4524e3b Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Tue, 15 Dec 2009 16:47:11 -0800
Subject: memcg: cleanup mem_cgroup_move_parent()

mem_cgroup_move_parent() calls try_charge first and cancel_charge on
failure.  IMHO, charge/uncharge(especially charge) is high cost operation,
so we should avoid it as far as possible.

This patch tries to delay try_charge in mem_cgroup_move_parent() by
re-ordering checks it does.

And this patch renames mem_cgroup_move_account() to
__mem_cgroup_move_account(), changes the return value of
__mem_cgroup_move_account() from int to void, and adds a new
wrapper(mem_cgroup_move_account()), which checks whether a @pc is valid
for moving account and calls __mem_cgroup_move_account().

This patch removes the last caller of trylock_page_cgroup(), so removes
its definition too.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |  7 ++--
 mm/memcontrol.c             | 84 +++++++++++++++++++--------------------------
 2 files changed, 37 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 4b938d4f3ac2..b0e4eb126236 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -57,6 +57,8 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
 static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
 
+TESTPCGFLAG(Locked, LOCK)
+
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
 CLEARPCGFLAG(Cache, CACHE)
@@ -86,11 +88,6 @@ static inline void lock_page_cgroup(struct page_cgroup *pc)
 	bit_spin_lock(PCG_LOCK, &pc->flags);
 }
 
-static inline int trylock_page_cgroup(struct page_cgroup *pc)
-{
-	return bit_spin_trylock(PCG_LOCK, &pc->flags);
-}
-
 static inline void unlock_page_cgroup(struct page_cgroup *pc)
 {
 	bit_spin_unlock(PCG_LOCK, &pc->flags);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d6b4a912a6d..6273984f2e34 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1613,27 +1613,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 }
 
 /**
- * mem_cgroup_move_account - move account of the page
+ * __mem_cgroup_move_account - move account of the page
  * @pc:	page_cgroup of the page.
  * @from: mem_cgroup which the page is moved from.
  * @to:	mem_cgroup which the page is moved to. @from != @to.
  *
  * The caller must confirm following.
  * - page is not on LRU (isolate_page() is useful.)
- *
- * returns 0 at success,
- * returns -EBUSY when lock is busy or "pc" is unstable.
+ * - the pc is locked, used, and ->mem_cgroup points to @from.
  *
  * This function does "uncharge" from old cgroup but doesn't do "charge" to
  * new cgroup. It should be done by a caller.
  */
 
-static int mem_cgroup_move_account(struct page_cgroup *pc,
+static void __mem_cgroup_move_account(struct page_cgroup *pc,
 	struct mem_cgroup *from, struct mem_cgroup *to)
 {
-	struct mem_cgroup_per_zone *from_mz, *to_mz;
-	int nid, zid;
-	int ret = -EBUSY;
 	struct page *page;
 	int cpu;
 	struct mem_cgroup_stat *stat;
@@ -1641,20 +1636,9 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON(PageLRU(pc->page));
-
-	nid = page_cgroup_nid(pc);
-	zid = page_cgroup_zid(pc);
-	from_mz =  mem_cgroup_zoneinfo(from, nid, zid);
-	to_mz =  mem_cgroup_zoneinfo(to, nid, zid);
-
-	if (!trylock_page_cgroup(pc))
-		return ret;
-
-	if (!PageCgroupUsed(pc))
-		goto out;
-
-	if (pc->mem_cgroup != from)
-		goto out;
+	VM_BUG_ON(!PageCgroupLocked(pc));
+	VM_BUG_ON(!PageCgroupUsed(pc));
+	VM_BUG_ON(pc->mem_cgroup != from);
 
 	if (!mem_cgroup_is_root(from))
 		res_counter_uncharge(&from->res, PAGE_SIZE);
@@ -1683,15 +1667,28 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	css_get(&to->css);
 	pc->mem_cgroup = to;
 	mem_cgroup_charge_statistics(to, pc, true);
-	ret = 0;
-out:
-	unlock_page_cgroup(pc);
 	/*
 	 * We charges against "to" which may not have any tasks. Then, "to"
 	 * can be under rmdir(). But in current implementation, caller of
 	 * this function is just force_empty() and it's garanteed that
 	 * "to" is never removed. So, we don't check rmdir status here.
 	 */
+}
+
+/*
+ * check whether the @pc is valid for moving account and call
+ * __mem_cgroup_move_account()
+ */
+static int mem_cgroup_move_account(struct page_cgroup *pc,
+				struct mem_cgroup *from, struct mem_cgroup *to)
+{
+	int ret = -EINVAL;
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
+		__mem_cgroup_move_account(pc, from, to);
+		ret = 0;
+	}
+	unlock_page_cgroup(pc);
 	return ret;
 }
 
@@ -1713,38 +1710,27 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
 	if (!pcg)
 		return -EINVAL;
 
+	ret = -EBUSY;
+	if (!get_page_unless_zero(page))
+		goto out;
+	if (isolate_lru_page(page))
+		goto put;
 
 	parent = mem_cgroup_from_cont(pcg);
-
-
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
 	if (ret || !parent)
-		return ret;
-
-	if (!get_page_unless_zero(page)) {
-		ret = -EBUSY;
-		goto uncharge;
-	}
-
-	ret = isolate_lru_page(page);
-
-	if (ret)
-		goto cancel;
+		goto put_back;
 
 	ret = mem_cgroup_move_account(pc, child, parent);
-
+	if (!ret)
+		css_put(&parent->css);	/* drop extra refcnt by try_charge() */
+	else
+		mem_cgroup_cancel_charge(parent);	/* does css_put */
+put_back:
 	putback_lru_page(page);
-	if (!ret) {
-		put_page(page);
-		/* drop extra refcnt by try_charge() */
-		css_put(&parent->css);
-		return 0;
-	}
-
-cancel:
+put:
 	put_page(page);
-uncharge:
-	mem_cgroup_cancel_charge(parent);
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3


From c6a47cc2ccf9649ee09eeddd70a6d061bde69568 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Dec 2009 16:47:15 -0800
Subject: ptrace: cleanup ptrace_init_task()->ptrace_link() path

No functional changes.

ptrace_init_task() looks confusing, as if we always auto-attach when "bool
ptrace" argument is true, while in fact we attach only if current is
traced.

Make the code more explicit and kill now unused ptrace_link().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 7456d7d87a19..1951805df63a 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -105,12 +105,7 @@ static inline int ptrace_reparented(struct task_struct *child)
 {
 	return child->real_parent != child->parent;
 }
-static inline void ptrace_link(struct task_struct *child,
-			       struct task_struct *new_parent)
-{
-	if (unlikely(child->ptrace))
-		__ptrace_link(child, new_parent);
-}
+
 static inline void ptrace_unlink(struct task_struct *child)
 {
 	if (unlikely(child->ptrace))
@@ -169,9 +164,9 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 	INIT_LIST_HEAD(&child->ptraced);
 	child->parent = child->real_parent;
 	child->ptrace = 0;
-	if (unlikely(ptrace)) {
+	if (unlikely(ptrace) && (current->ptrace & PT_PTRACED)) {
 		child->ptrace = current->ptrace;
-		ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent);
 	}
 }
 
-- 
cgit v1.2.3


From 85ec7fd9f8e528c4f61d595cfe4df7681a19f252 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Dec 2009 16:47:17 -0800
Subject: ptrace: introduce user_single_step_siginfo() helper

Suggested by Roland.

Currently there is no way to synthesize a single-stepping trap in the
arch-independent manner.  This patch adds the default helper which fills
siginfo_t, arch/ can can override it.

Architetures which implement user_enable_single_step() should add
user_single_step_siginfo() also.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1951805df63a..56f2d63a5cbb 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -273,6 +273,18 @@ static inline void user_enable_block_step(struct task_struct *task)
 }
 #endif	/* arch_has_block_step */
 
+#ifdef ARCH_HAS_USER_SINGLE_STEP_INFO
+extern void user_single_step_siginfo(struct task_struct *tsk,
+				struct pt_regs *regs, siginfo_t *info);
+#else
+static inline void user_single_step_siginfo(struct task_struct *tsk,
+				struct pt_regs *regs, siginfo_t *info)
+{
+	memset(info, 0, sizeof(*info));
+	info->si_signo = SIGTRAP;
+}
+#endif
+
 #ifndef arch_ptrace_stop_needed
 /**
  * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
-- 
cgit v1.2.3


From 2f0edac5555983dc28033acce8a355f588fd01b2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Dec 2009 16:47:19 -0800
Subject: ptrace: change tracehook_report_syscall_exit() to handle stepping

Suggested by Roland.

Change tracehook_report_syscall_exit() to look at step flag and send the
trap signal if needed.

This change affects ia64, microblaze, parisc, powerpc, sh.  They pass
nonzero "step" argument to tracehook but since it was ignored the tracee
reports via ptrace_notify(), this is not right and not consistent.

	- PTRACE_SETSIGINFO doesn't work

	- if the tracer resumes the tracee with signr != 0 the new signal
	  is generated rather than delivering it

	- If PT_TRACESYSGOOD is set the tracee reports the wrong exit_code

I don't have a powerpc machine, but I think this test-case should see the
difference:

	#include <unistd.h>
	#include <sys/ptrace.h>
	#include <sys/wait.h>
	#include <assert.h>
	#include <stdio.h>

	int main(void)
	{
		int pid, status;

		if (!(pid = fork())) {
			assert(ptrace(PTRACE_TRACEME) == 0);
			kill(getpid(), SIGSTOP);

			getppid();

			return 0;
		}

		assert(pid == wait(&status));
		assert(ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_TRACESYSGOOD) == 0);

		assert(ptrace(PTRACE_SYSCALL, pid, 0,0) == 0);
		assert(pid == wait(&status));

		assert(ptrace(PTRACE_SINGLESTEP, pid, 0,0) == 0);
		assert(pid == wait(&status));

		if (status == 0x57F)
			return 0;

		printf("kernel bug: status=%X shouldn't have 0x80\n", status);
		return 1;
	}

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1eb44a924e56..10db0102a890 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -134,6 +134,13 @@ static inline __must_check int tracehook_report_syscall_entry(
  */
 static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 {
+	if (step) {
+		siginfo_t info;
+		user_single_step_siginfo(current, regs, &info);
+		force_sig_info(SIGTRAP, &info, current);
+		return;
+	}
+
 	ptrace_report_syscall(regs);
 }
 
-- 
cgit v1.2.3


From 614c517d7c00af1b26ded20646b329397d6f51a1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Dec 2009 16:47:22 -0800
Subject: signals: SEND_SIG_NOINFO should be considered as SI_FROMUSER()

No changes in compiled code. The patch adds the new helper, si_fromuser()
and changes check_kill_permission() to use this helper.

The real effect of this patch is that from now we "officially" consider
SEND_SIG_NOINFO signal as "from user-space" signals. This is already true
if we look at the code which uses SEND_SIG_NOINFO, except __send_signal()
has another opinion - see the next patch.

The naming of these special SEND_SIG_XXX siginfo's is really bad
imho.  From __send_signal()'s pov they mean

	SEND_SIG_NOINFO		from user
	SEND_SIG_PRIV		from kernel
	SEND_SIG_FORCED		no info

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Reviewed-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  5 -----
 kernel/signal.c       | 16 +++++++++++++---
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f4c145410a8d..57b3516f055b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2102,11 +2102,6 @@ static inline int kill_cad_pid(int sig, int priv)
 #define SEND_SIG_PRIV	((struct siginfo *) 1)
 #define SEND_SIG_FORCED	((struct siginfo *) 2)
 
-static inline int is_si_special(const struct siginfo *info)
-{
-	return info <= SEND_SIG_FORCED;
-}
-
 /*
  * True if we are on the alternate signal stack.
  */
diff --git a/kernel/signal.c b/kernel/signal.c
index 6b982f2cf524..a0ba428954b6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -607,6 +607,17 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
 	return 1;
 }
 
+static inline int is_si_special(const struct siginfo *info)
+{
+	return info <= SEND_SIG_FORCED;
+}
+
+static inline bool si_fromuser(const struct siginfo *info)
+{
+	return info == SEND_SIG_NOINFO ||
+		(!is_si_special(info) && SI_FROMUSER(info));
+}
+
 /*
  * Bad permissions for sending the signal
  * - the caller must hold at least the RCU read lock
@@ -621,7 +632,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	if (!valid_signal(sig))
 		return -EINVAL;
 
-	if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
+	if (!si_fromuser(info))
 		return 0;
 
 	error = audit_signal_info(sig, t); /* Let audit system see the signal */
@@ -1186,8 +1197,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 		goto out_unlock;
 	}
 	pcred = __task_cred(p);
-	if ((info == SEND_SIG_NOINFO ||
-	     (!is_si_special(info) && SI_FROMUSER(info))) &&
+	if (si_fromuser(info) &&
 	    euid != pcred->suid && euid != pcred->uid &&
 	    uid  != pcred->suid && uid  != pcred->uid) {
 		ret = -EPERM;
-- 
cgit v1.2.3


From ad09750b51150ca87531b8790a379214a974c167 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Dec 2009 16:47:25 -0800
Subject: signals: kill force_sig_specific()

Kill force_sig_specific(), this trivial wrapper has no callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 kernel/signal.c       | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 57b3516f055b..244c287a5ac1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2083,7 +2083,6 @@ extern int kill_proc_info(int, struct siginfo *, pid_t);
 extern int do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
-extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
diff --git a/kernel/signal.c b/kernel/signal.c
index d7c7f3cd4da8..4a9d763f8922 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1062,12 +1062,6 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	return ret;
 }
 
-void
-force_sig_specific(int sig, struct task_struct *t)
-{
-	force_sig_info(sig, SEND_SIG_FORCED, t);
-}
-
 /*
  * Nuke all other threads in the group.
  */
-- 
cgit v1.2.3


From b97e820ffffbf49e94ed60c9c26f1a54bccae924 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Tue, 15 Dec 2009 16:47:32 -0800
Subject: ipc/sem.c: add a per-semaphore pending list

Based on Nick's findings:

sysv sem has the concept of semaphore arrays that consist out of multiple
semaphores.  Atomic operations that affect multiple semaphores are
supported.

The patch is the first step for optimizing simple, single semaphore
operations: In addition to the global list of all pending operations, a
2nd, per-semaphore list with the simple operations is added.

Note: this patch does not make sense by itself, the new list is used
nowhere.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |  5 ++++-
 ipc/sem.c           | 37 ++++++++++++++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 1b191c176bcd..8a4adbef8a0f 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -86,6 +86,7 @@ struct task_struct;
 struct sem {
 	int	semval;		/* current value */
 	int	sempid;		/* pid of last operation */
+	struct list_head sem_pending; /* pending single-sop operations */
 };
 
 /* One sem_array data structure for each set of semaphores in the system. */
@@ -96,11 +97,13 @@ struct sem_array {
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct list_head	sem_pending;	/* pending operations to be processed */
 	struct list_head	list_id;	/* undo requests on this array */
-	unsigned long		sem_nsems;	/* no. of semaphores in array */
+	int			sem_nsems;	/* no. of semaphores in array */
+	int			complex_count;	/* pending complex operations */
 };
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
+	struct list_head	simple_list; /* queue of pending operations */
 	struct list_head	list;	 /* queue of pending operations */
 	struct task_struct	*sleeper; /* this process */
 	struct sem_undo		*undo;	 /* undo structure */
diff --git a/ipc/sem.c b/ipc/sem.c
index eac3f46a5968..4252a8eb77e2 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -241,6 +241,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int nsems = params->u.nsems;
 	int semflg = params->flg;
+	int i;
 
 	if (!nsems)
 		return -EINVAL;
@@ -273,6 +274,11 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	ns->used_sems += nsems;
 
 	sma->sem_base = (struct sem *) &sma[1];
+
+	for (i = 0; i < nsems; i++)
+		INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+
+	sma->complex_count = 0;
 	INIT_LIST_HEAD(&sma->sem_pending);
 	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
@@ -419,6 +425,15 @@ static void wake_up_sem_queue(struct sem_queue *q, int error)
 	preempt_enable();
 }
 
+static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
+{
+	list_del(&q->list);
+	if (q->nsops == 1)
+		list_del(&q->simple_list);
+	else
+		sma->complex_count--;
+}
+
 /* Go through the pending queue for the indicated semaphore
  * looking for tasks that can be completed.
  */
@@ -438,7 +453,7 @@ again:
 		if (error > 0)
 			continue;
 
-		list_del(&q->list);
+		unlink_queue(sma, q);
 
 		/*
 		 * The next operation that must be checked depends on the type
@@ -532,8 +547,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
 	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
-		list_del(&q->list);
-
+		unlink_queue(sma, q);
 		wake_up_sem_queue(q, -EIDRM);
 	}
 
@@ -1191,6 +1205,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	else
 		list_add(&queue.list, &sma->sem_pending);
 
+	if (nsops == 1) {
+		struct sem *curr;
+		curr = &sma->sem_base[sops->sem_num];
+
+		if (alter)
+			list_add_tail(&queue.simple_list, &curr->sem_pending);
+		else
+			list_add(&queue.simple_list, &curr->sem_pending);
+	} else {
+		INIT_LIST_HEAD(&queue.simple_list);
+		sma->complex_count++;
+	}
+
 	queue.status = -EINTR;
 	queue.sleeper = current;
 	current->state = TASK_INTERRUPTIBLE;
@@ -1232,7 +1259,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	 */
 	if (timeout && jiffies_left == 0)
 		error = -EAGAIN;
-	list_del(&queue.list);
+	unlink_queue(sma, &queue);
 
 out_unlock_free:
 	sem_unlock(sma);
@@ -1375,7 +1402,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
 	struct sem_array *sma = it;
 
 	return seq_printf(s,
-			  "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
+			  "%10d %10d  %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
 			  sma->sem_perm.key,
 			  sma->sem_perm.id,
 			  sma->sem_perm.mode,
-- 
cgit v1.2.3


From 9cf18e1dd74cd0061d58ac55029784ca3dd88f6a Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 15 Dec 2009 16:47:36 -0800
Subject: ipc: HARD_MSGMAX should be higher not lower on 64bit

We have HARD_MSGMAX lower on 64bit than on 32bit, since usually 64bit
machines have more memory than 32bit machines.

Making it higher on 64bit seems reasonable, and keep the original number
on 32bit.

Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e408722a84c7..07baa38bce37 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -87,7 +87,7 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 /* default values */
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
-#define HARD_MSGMAX    (131072/sizeof(void *))
+#define HARD_MSGMAX    (32768*sizeof(void *)/4)
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
-- 
cgit v1.2.3


From 06a7f711246b081afc21fff859f1003f1f2a0fbc Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 15 Dec 2009 16:47:46 -0800
Subject: kexec: premit reduction of the reserved memory size

Implement shrinking the reserved memory for crash kernel, if it is more
than enough.

For example, if you have already reserved 128M, now you just want 100M,
you can do:

# echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size

Note, you can only do this before loading the crash kernel.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@redhat.com>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h |  2 ++
 kernel/kexec.c        | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/ksysfs.c       | 21 ++++++++++++++++++
 3 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index adc34f2c6eff..c356b6914ffd 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -206,6 +206,8 @@ extern size_t vmcoreinfo_max_size;
 
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
+int crash_shrink_memory(unsigned long new_size);
+size_t crash_get_memory_size(void);
 
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..433e9fcc1fc5 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -31,6 +31,7 @@
 #include <linux/cpu.h>
 #include <linux/console.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1082,6 +1083,64 @@ void crash_kexec(struct pt_regs *regs)
 	}
 }
 
+size_t crash_get_memory_size(void)
+{
+	size_t size;
+	mutex_lock(&kexec_mutex);
+	size = crashk_res.end - crashk_res.start + 1;
+	mutex_unlock(&kexec_mutex);
+	return size;
+}
+
+static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+		init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+		free_page((unsigned long)__va(addr));
+		totalram_pages++;
+	}
+}
+
+int crash_shrink_memory(unsigned long new_size)
+{
+	int ret = 0;
+	unsigned long start, end;
+
+	mutex_lock(&kexec_mutex);
+
+	if (kexec_crash_image) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+	start = crashk_res.start;
+	end = crashk_res.end;
+
+	if (new_size >= end - start + 1) {
+		ret = -EINVAL;
+		if (new_size == end - start + 1)
+			ret = 0;
+		goto unlock;
+	}
+
+	start = roundup(start, PAGE_SIZE);
+	end = roundup(start + new_size, PAGE_SIZE);
+
+	free_reserved_phys_range(end, crashk_res.end);
+
+	if (start == end) {
+		crashk_res.end = end;
+		release_resource(&crashk_res);
+	} else
+		crashk_res.end = end - 1;
+
+unlock:
+	mutex_unlock(&kexec_mutex);
+	return ret;
+}
+
 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
 			    size_t data_len)
 {
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 528dd78e7e7e..3feaf5a74514 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -100,6 +100,26 @@ static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(kexec_crash_loaded);
 
+static ssize_t kexec_crash_size_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%zu\n", crash_get_memory_size());
+}
+static ssize_t kexec_crash_size_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned long cnt;
+	int ret;
+
+	if (strict_strtoul(buf, 0, &cnt))
+		return -EINVAL;
+
+	ret = crash_shrink_memory(cnt);
+	return ret < 0 ? ret : count;
+}
+KERNEL_ATTR_RW(kexec_crash_size);
+
 static ssize_t vmcoreinfo_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf)
 {
@@ -147,6 +167,7 @@ static struct attribute * kernel_attrs[] = {
 #ifdef CONFIG_KEXEC
 	&kexec_loaded_attr.attr,
 	&kexec_crash_loaded_attr.attr,
+	&kexec_crash_size_attr.attr,
 	&vmcoreinfo_attr.attr,
 #endif
 	NULL
-- 
cgit v1.2.3


From fac046ad0b1ee2c4244ebf43a26433ef0ea29ae4 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 15 Dec 2009 16:47:47 -0800
Subject: aio: remove unused field

Don't know the reason, but it appears ki_wait field of iocb never gets used.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c            | 40 ++--------------------------------------
 include/linux/aio.h |  4 ----
 2 files changed, 2 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index c30dfc006108..1cf12b3dd83a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -711,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	 */
 	ret = retry(iocb);
 
-	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
-		BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
 		aio_complete(iocb, ret, 0);
-	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
 
@@ -866,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
 	unsigned long flags;
 	int run = 0;
 
-	/* We're supposed to be the only path putting the iocb back on the run
-	 * list.  If we find that the iocb is *back* on a wait queue already
-	 * than retry has happened before we could queue the iocb.  This also
-	 * means that the retry could have completed and freed our iocb, no
-	 * good. */
-	BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
-
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	/* set this inside the lock so that we can't race with aio_run_iocb()
 	 * testing it and putting the iocb on the run list under the lock */
@@ -886,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
 /*
  * kick_iocb:
  *      Called typically from a wait queue callback context
- *      (aio_wake_function) to trigger a retry of the iocb.
+ *      to trigger a retry of the iocb.
  *      The retry is usually executed by aio workqueue
  *      threads (See aio_kick_handler).
  */
@@ -1520,31 +1511,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
 	return 0;
 }
 
-/*
- * aio_wake_function:
- * 	wait queue callback function for aio notification,
- * 	Simply triggers a retry of the operation via kick_iocb.
- *
- * 	This callback is specified in the wait queue entry in
- *	a kiocb.
- *
- * Note:
- * This routine is executed with the wait queue lock held.
- * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests
- * the ioctx lock inside the wait queue lock. This is safe
- * because this callback isn't used for wait queues which
- * are nested inside ioctx lock (i.e. ctx->wait)
- */
-static int aio_wake_function(wait_queue_t *wait, unsigned mode,
-			     int sync, void *key)
-{
-	struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait);
-
-	list_del_init(&wait->task_list);
-	kick_iocb(iocb);
-	return 1;
-}
-
 static void aio_batch_add(struct address_space *mapping,
 			  struct hlist_head *batch_hash)
 {
@@ -1642,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
 	req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
 	req->ki_opcode = iocb->aio_lio_opcode;
-	init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
-	INIT_LIST_HEAD(&req->ki_wait.task_list);
 
 	ret = aio_setup_iocb(req);
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index aea219d7d8d1..811dbb369379 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -102,7 +102,6 @@ struct kiocb {
 	} ki_obj;
 
 	__u64			ki_user_data;	/* user's data for completion */
-	wait_queue_t		ki_wait;
 	loff_t			ki_pos;
 
 	void			*private;
@@ -140,7 +139,6 @@ struct kiocb {
 		(x)->ki_dtor = NULL;			\
 		(x)->ki_obj.tsk = tsk;			\
 		(x)->ki_user_data = 0;                  \
-		init_wait((&(x)->ki_wait));             \
 	} while (0)
 
 #define AIO_RING_MAGIC			0xa10a10a1
@@ -223,8 +221,6 @@ struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 #endif /* CONFIG_AIO */
 
-#define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait)
-
 static inline struct kiocb *list_kiocb(struct list_head *h)
 {
 	return list_entry(h, struct kiocb, ki_list);
-- 
cgit v1.2.3


From 5fe878ae7f82fbf0830dbfaee4c5ca18f3aee442 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Dec 2009 16:47:50 -0800
Subject: direct-io: cleanup blockdev_direct_IO locking

Currently the locking in blockdev_direct_IO is a mess, we have three
different locking types and very confusing checks for some of them.  The
most complicated one is DIO_OWN_LOCKING for reads, which happens to not
actually be used.

This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read
case is unused anyway, and the write side is almost identical to
DIO_NO_LOCKING.  The difference is that DIO_NO_LOCKING always sets the
create argument for the get_blocks callback to zero, but we can easily
move that to the actual get_blocks callbacks.  There are four users of the
DIO_NO_LOCKING mode: gfs already ignores the create argument and thus is
fine with the new version, ocfs2 only errors out if create were ever set,
and we can remove this dead code now, the block device code only ever uses
create for an error message if we are fully beyond the device which can
never happen, and last but not least XFS will need the new behavour for
writes.

Now we can replace the lock_type variable with a flags one, where no flag
means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first
flag.  Separate out the check for not allowing to fill holes into a
separate flag, although for now both flags always get set at the same
time.

Also revamp the documentation of the locking scheme to actually make
sense.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alex Elder <aelder@sgi.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/direct-io.c              | 129 ++++++++++++++++++--------------------------
 fs/ocfs2/aops.c             |  34 ++----------
 fs/xfs/linux-2.6/xfs_aops.c |  20 +++----
 include/linux/fs.h          |  22 +++-----
 4 files changed, 71 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9f34bb9b1ecb..4012885d027f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
  *
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
  */
 
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
 	struct inode *inode;
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
-	int lock_type;			/* doesn't change */
+	int flags;			/* doesn't change */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -246,7 +239,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, transferred,
 			    dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
+
+	if (dio->flags & DIO_LOCKING)
 		/* lockdep: non-owner release */
 		up_read_non_owner(&dio->inode->i_alloc_sem);
 
@@ -521,21 +515,24 @@ static int get_more_blocks(struct dio *dio)
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
+		/*
+		 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+		 * forbid block creations: only overwrites are permitted.
+		 * We will return early to the caller once we see an
+		 * unmapped buffer head returned, and the caller will fall
+		 * back to buffered I/O.
+		 *
+		 * Otherwise the decision is left to the get_blocks method,
+		 * which may decide to handle it or also return an unmapped
+		 * buffer head.
+		 */
 		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_LOCKING) {
+		if (dio->flags & DIO_SKIP_HOLES) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
 				create = 0;
-		} else if (dio->lock_type == DIO_NO_LOCKING) {
-			create = 0;
 		}
 
-		/*
-		 * For writes inside i_size we forbid block creations: only
-		 * overwrites are permitted.  We fall back to buffered writes
-		 * at a higher level for inside-i_size block-instantiating
-		 * writes.
-		 */
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
@@ -1045,7 +1042,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
-	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
+	if (rw == READ && (dio->flags & DIO_LOCKING))
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
@@ -1092,30 +1089,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 /*
  * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
  *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
  *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- * For reads, i_mutex is not held on entry, but it is taken and dropped before
- * returning.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
  */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int dio_lock_type)
+	int flags)
 {
 	int seg;
 	size_t size;
@@ -1126,8 +1121,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	int release_i_mutex = 0;
-	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT_PLUG;
@@ -1168,43 +1161,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 */
 	memset(dio, 0, offsetof(struct dio, pages));
 
-	/*
-	 * For block device access DIO_NO_LOCKING is used,
-	 *	neither readers nor writers do any locking at all
-	 * For regular files using DIO_LOCKING,
-	 *	readers need to grab i_mutex and i_alloc_sem
-	 *	writers need to grab i_alloc_sem only (i_mutex is already held)
-	 * For regular files using DIO_OWN_LOCKING,
-	 *	neither readers nor writers take any locks here
-	 */
-	dio->lock_type = dio_lock_type;
-	if (dio_lock_type != DIO_NO_LOCKING) {
+	dio->flags = flags;
+	if (dio->flags & DIO_LOCKING) {
 		/* watch out for a 0 len io from a tricksy fs */
 		if (rw == READ && end > offset) {
-			struct address_space *mapping;
+			struct address_space *mapping =
+					iocb->ki_filp->f_mapping;
 
-			mapping = iocb->ki_filp->f_mapping;
-			if (dio_lock_type != DIO_OWN_LOCKING) {
-				mutex_lock(&inode->i_mutex);
-				release_i_mutex = 1;
-			}
+			/* will be released by direct_io_worker */
+			mutex_lock(&inode->i_mutex);
 
 			retval = filemap_write_and_wait_range(mapping, offset,
 							      end - 1);
 			if (retval) {
+				mutex_unlock(&inode->i_mutex);
 				kfree(dio);
 				goto out;
 			}
-
-			if (dio_lock_type == DIO_OWN_LOCKING) {
-				mutex_unlock(&inode->i_mutex);
-				acquire_i_mutex = 1;
-			}
 		}
 
-		if (dio_lock_type == DIO_LOCKING)
-			/* lockdep: not the owner will release it */
-			down_read_non_owner(&inode->i_alloc_sem);
+		/*
+		 * Will be released at I/O completion, possibly in a
+		 * different thread.
+		 */
+		down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
@@ -1222,24 +1202,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 *
+	 * NOTE: filesystems with their own locking have to handle this
+	 * on their own.
 	 */
-	if (unlikely(retval < 0 && (rw & WRITE))) {
-		loff_t isize = i_size_read(inode);
-
-		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+	if (dio->flags & DIO_LOCKING) {
+		if (unlikely((rw & WRITE) && retval < 0)) {
+			loff_t isize = i_size_read(inode);
+			if (end > isize)
+				vmtruncate(inode, isize);
+		}
 	}
 
-	if (rw == READ && dio_lock_type == DIO_LOCKING)
-		release_i_mutex = 0;
-
 out:
-	if (release_i_mutex)
-		mutex_unlock(&inode->i_mutex);
-	else if (acquire_i_mutex)
-		mutex_lock(&inode->i_mutex);
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
  *
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  * 					fs_count, map_bh, dio->rw == WRITE);
+ *
+ * Note that we never bother to allocate blocks here, and thus ignore the
+ * create argument.
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 				     struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
 	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 
-	/*
-	 * Any write past EOF is not allowed because we'd be extending.
-	 */
-	if (create && (iblock + max_blocks) > inode_blocks) {
-		ret = -EIO;
-		goto bail;
-	}
-
 	/* This figures out the size of the next contiguous block, and
 	 * our logical offset */
 	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has a hole at block %llu\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			    (unsigned long long)iblock);
-		ret = -EROFS;
-		goto bail;
-	}
-
 	/* We should already CoW the refcounted extent. */
 	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	 */
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
 		map_bh(bh_result, inode->i_sb, p_blkno);
-	else {
-		/*
-		 * ocfs2_prepare_inode_for_write() should have caught
-		 * the case where we'd be filling a hole and triggered
-		 * a buffered write instead.
-		 */
-		if (create) {
-			ret = -EIO;
-			mlog_errno(ret);
-			goto bail;
-		}
-
+	else
 		clear_buffer_mapped(bh_result);
-	}
 
 	/* make sure we don't map more than max_blocks blocks here as
 	   that's all the kernel will handle at this point. */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
 
 	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
 
-	if (rw == WRITE) {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	} else {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	}
+	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
+					IOMAP_UNWRITTEN : IOMAP_READ);
+
+	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct);
 
 	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a057f48eb156..b23a7018eb90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2264,9 +2264,11 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int lock_type);
 
 enum {
-	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-	DIO_NO_LOCKING,  /* bdev; no locking at all between buffered/direct */
-	DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
+	/* need locking between buffered and direct access */
+	DIO_LOCKING	= 0x01,
+
+	/* filesystem does not support filling holes */
+	DIO_SKIP_HOLES	= 0x02,
 };
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
@@ -2275,7 +2277,8 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_LOCKING);
+				    nr_segs, get_block, end_io,
+				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -2284,16 +2287,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
-	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_block_t get_block,
-	dio_iodone_t end_io)
-{
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From f65380c07b64b0e45ad5c7d70ca54a9cde1c00db Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 15 Dec 2009 16:48:21 -0800
Subject: resource: constify arg to resource_size() and resource_type()

resource_size() doesn't change the resource it operates on, so the res
parameter can be marked const.  Same for resource_type().

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ioport.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 83aa81297ea3..7129504e053d 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -126,11 +126,11 @@ extern int allocate_resource(struct resource *root, struct resource *new,
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
-static inline resource_size_t resource_size(struct resource *res)
+static inline resource_size_t resource_size(const struct resource *res)
 {
 	return res->end - res->start + 1;
 }
-static inline unsigned long resource_type(struct resource *res)
+static inline unsigned long resource_type(const struct resource *res)
 {
 	return res->flags & IORESOURCE_TYPE_BITS;
 }
-- 
cgit v1.2.3


From c1a2a962a2ad103846e7950b4591471fabecece7 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 15 Dec 2009 16:48:25 -0800
Subject: bitmap: introduce bitmap_set, bitmap_clear,
 bitmap_find_next_zero_area

This introduces new bitmap functions:

bitmap_set: Set specified bit area
bitmap_clear: Clear specified bit area
bitmap_find_next_zero_area: Find free bit area

These are mostly stolen from iommu helper. The differences are:

- Use find_next_bit instead of doing test_bit for each bit

- Rewrite bitmap_set and bitmap_clear

  Instead of setting or clearing for each bit.

- Check the last bit of the limit

  iommu-helper doesn't want to find such area

- The return value if there is no zero area

  find_next_zero_area in iommu helper: returns -1
  bitmap_find_next_zero_area: return >= bitmap size

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Lothar Wassmann <LW@KARO-electronics.de>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 11 +++++++
 lib/bitmap.c           | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 756d78b8c1c5..daf8c480c786 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -42,6 +42,9 @@
  * bitmap_empty(src, nbits)			Are all bits zero in *src?
  * bitmap_full(src, nbits)			Are all bits set in *src?
  * bitmap_weight(src, nbits)			Hamming Weight: number set bits
+ * bitmap_set(dst, pos, nbits)			Set specified bit area
+ * bitmap_clear(dst, pos, nbits)		Clear specified bit area
+ * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
  * bitmap_shift_right(dst, src, n, nbits)	*dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)	*dst = *src << n
  * bitmap_remap(dst, src, old, new, nbits)	*dst = map(old, new)(src)
@@ -108,6 +111,14 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
+extern void bitmap_set(unsigned long *map, int i, int len);
+extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask);
+
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
 extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 702565821c99..11bf49750583 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,6 +271,87 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		nr -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+EXPORT_SYMBOL(bitmap_set);
+
+void bitmap_clear(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		nr -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+EXPORT_SYMBOL(bitmap_clear);
+
+/*
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask)
+{
+	unsigned long index, end, i;
+again:
+	index = find_next_zero_bit(map, size, start);
+
+	/* Align allocation */
+	index = __ALIGN_MASK(index, align_mask);
+
+	end = index + nr;
+	if (end > size)
+		return end;
+	i = find_next_bit(map, end, index);
+	if (i < end) {
+		start = i + 1;
+		goto again;
+	}
+	return index;
+}
+EXPORT_SYMBOL(bitmap_find_next_zero_area);
+
 /*
  * Bitmap printing & parsing functions: first version by Bill Irwin,
  * second version by Paul Jackson, third by Joe Korty.
-- 
cgit v1.2.3


From a66022c457755b5eef61e30866114679c95e1f54 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 15 Dec 2009 16:48:28 -0800
Subject: iommu-helper: use bitmap library

Use bitmap library and kill some unused iommu helper functions.

1. s/iommu_area_free/bitmap_clear/

2. s/iommu_area_reserve/bitmap_set/

3. Use bitmap_find_next_zero_area instead of find_next_zero_area

  This cannot be simple substitution because find_next_zero_area
  doesn't check the last bit of the limit in bitmap

4. Remove iommu_area_free, iommu_area_reserve, and find_next_zero_area

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/iommu.c      |  4 +--
 arch/sparc/kernel/iommu.c        |  3 +-
 arch/x86/kernel/amd_iommu.c      |  4 +--
 arch/x86/kernel/pci-calgary_64.c |  6 ++--
 arch/x86/kernel/pci-gart_64.c    |  6 ++--
 include/linux/iommu-helper.h     |  3 --
 lib/iommu-helper.c               | 59 ++++++----------------------------------
 7 files changed, 21 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index fd51578e29dd..5547ae6e6b0b 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -30,7 +30,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/iommu-helper.h>
 #include <linux/crash_dump.h>
 #include <asm/io.h>
@@ -251,7 +251,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 	}
 
 	ppc_md.tce_free(tbl, entry, npages);
-	iommu_area_free(tbl->it_map, free_entry, npages);
+	bitmap_clear(tbl->it_map, free_entry, npages);
 }
 
 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 7690cc219ecc..5fad94950e76 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -11,6 +11,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/iommu-helper.h>
+#include <linux/bitmap.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
@@ -169,7 +170,7 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np
 
 	entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
 
-	iommu_area_free(arena->map, entry, npages);
+	bitmap_clear(arena->map, entry, npages);
 }
 
 int iommu_table_init(struct iommu *iommu, int tsbsize,
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b990b5cc9541..23824fef789c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -19,7 +19,7 @@
 
 #include <linux/pci.h>
 #include <linux/gfp.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
@@ -1162,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 
 	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
 
-	iommu_area_free(range->bitmap, address, pages);
+	bitmap_clear(range->bitmap, address, pages);
 
 }
 
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index c563e4c8ff39..2bbde6078143 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -31,7 +31,7 @@
 #include <linux/string.h>
 #include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
@@ -212,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	iommu_area_reserve(tbl->it_map, index, npages);
+	bitmap_set(tbl->it_map, index, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
 }
@@ -303,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	iommu_area_free(tbl->it_map, entry, npages);
+	bitmap_clear(tbl->it_map, entry, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 56c0e730d3fe..34de53b46f87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/topology.h>
 #include <linux/interrupt.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
@@ -126,7 +126,7 @@ static void free_iommu(unsigned long offset, int size)
 	unsigned long flags;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	iommu_area_free(iommu_gart_bitmap, offset, size);
+	bitmap_clear(iommu_gart_bitmap, offset, size);
 	if (offset >= next_bit)
 		next_bit = offset + size;
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -792,7 +792,7 @@ int __init gart_iommu_init(void)
 	 * Out of IOMMU space handling.
 	 * Reserve some invalid pages at the beginning of the GART.
 	 */
-	iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+	bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
 
 	pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
 	       iommu_size >> 20);
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index 3b068e5b5671..64d1b638745d 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -14,14 +14,11 @@ static inline unsigned long iommu_device_max_index(unsigned long size,
 extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 				  unsigned long shift,
 				  unsigned long boundary_size);
-extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len);
 extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long start, unsigned int nr,
 				      unsigned long shift,
 				      unsigned long boundary_size,
 				      unsigned long align_mask);
-extern void iommu_area_free(unsigned long *map, unsigned long start,
-			    unsigned int nr);
 
 extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
 				     unsigned long io_page_size);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 75dbda03f4fb..c0251f4ad08b 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,41 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/bitops.h>
-
-static unsigned long find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask)
-{
-	unsigned long index, end, i;
-again:
-	index = find_next_zero_bit(map, size, start);
-
-	/* Align allocation */
-	index = (index + align_mask) & ~align_mask;
-
-	end = index + nr;
-	if (end >= size)
-		return -1;
-	for (i = index; i < end; i++) {
-		if (test_bit(i, map)) {
-			start = i+1;
-			goto again;
-		}
-	}
-	return index;
-}
-
-void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
-{
-	unsigned long end = i + len;
-	while (i < end) {
-		__set_bit(i, map);
-		i++;
-	}
-}
+#include <linux/bitmap.h>
 
 int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 			   unsigned long shift,
@@ -55,31 +21,24 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 			       unsigned long align_mask)
 {
 	unsigned long index;
+
+	/* We don't want the last of the limit */
+	size -= 1;
 again:
-	index = find_next_zero_area(map, size, start, nr, align_mask);
-	if (index != -1) {
+	index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+	if (index < size) {
 		if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
 			/* we could do more effectively */
 			start = index + 1;
 			goto again;
 		}
-		iommu_area_reserve(map, index, nr);
+		bitmap_set(map, index, nr);
+		return index;
 	}
-	return index;
+	return -1;
 }
 EXPORT_SYMBOL(iommu_area_alloc);
 
-void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
-{
-	unsigned long end = start + nr;
-
-	while (start < end) {
-		__clear_bit(start, map);
-		start++;
-	}
-}
-EXPORT_SYMBOL(iommu_area_free);
-
 unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
 			      unsigned long io_page_size)
 {
-- 
cgit v1.2.3


From 3d1e463158febf6e047897597722f768b15350cd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 8 Aug 2009 23:56:29 +0400
Subject: get rid of init_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 30 ++----------------------------
 include/linux/file.h |  3 ---
 2 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index f906ac8c9a9f..602a9ee3023a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -171,32 +171,6 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	init_file(file, mnt, dentry, mode, fop);
-	return file;
-}
-EXPORT_SYMBOL(alloc_file);
-
-/**
- * init_file - initialize a 'struct file'
- * @file: the already allocated 'struct file' to initialized
- * @mnt: the vfsmount on which the file resides
- * @dentry: the dentry representing this file
- * @mode: the mode the file is opened with
- * @fop: the 'struct file_operations' for this file
- *
- * Use this instead of setting the members directly.  Doing so
- * avoids making mistakes like forgetting the mntget() or
- * forgetting to take a write on the mnt.
- *
- * Note: This is a crappy interface.  It is here to make
- * merging with the existing users of get_empty_filp()
- * who have complex failure logic easier.  All users
- * of this should be moving to alloc_file().
- */
-int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
-	   fmode_t mode, const struct file_operations *fop)
-{
-	int error = 0;
 	file->f_path.dentry = dentry;
 	file->f_path.mnt = mntget(mnt);
 	file->f_mapping = dentry->d_inode->i_mapping;
@@ -210,13 +184,13 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 * that we can do debugging checks at __fput()
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+		int error = 0;
 		file_take_write(file);
 		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
-	return error;
+	return file;
 }
-EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
diff --git a/include/linux/file.h b/include/linux/file.h
index 335a0a5c316e..6a8d3612eb2a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,9 +18,6 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern int init_file(struct file *, struct vfsmount *mnt,
-		struct dentry *dentry, fmode_t mode,
-		const struct file_operations *fop);
 extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
 		fmode_t mode, const struct file_operations *fop);
 
-- 
cgit v1.2.3


From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 00:52:35 +0400
Subject: switch alloc_file() to passing struct path

... and have the caller grab both mnt and dentry; kill
leak in infiniband, while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c            | 15 ++++++++-------
 drivers/infiniband/core/uverbs_main.c |  9 +++++++--
 fs/anon_inodes.c                      | 18 +++++++++---------
 fs/file_table.c                       | 13 ++++++-------
 fs/hugetlbfs/inode.c                  | 15 ++++++++-------
 fs/notify/inotify/inotify_user.c      |  8 ++++++--
 fs/pipe.c                             | 17 +++++++++--------
 include/linux/file.h                  |  5 +++--
 ipc/shm.c                             | 10 +++++-----
 mm/shmem.c                            | 14 ++++++++------
 net/socket.c                          | 17 +++++++++--------
 11 files changed, 78 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 599b233bef75..5246285a95fb 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2200,7 +2200,7 @@ pfm_alloc_file(pfm_context_t *ctx)
 {
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry;
+	struct path path;
 	char name[32];
 	struct qstr this;
 
@@ -2225,18 +2225,19 @@ pfm_alloc_file(pfm_context_t *ctx)
 	/*
 	 * allocate a new dcache entry
 	 */
-	dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!dentry) {
+	path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry) {
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
 	}
+	path.mnt = mntget(pfmfs_mnt);
 
-	dentry->d_op = &pfmfs_dentry_operations;
-	d_add(dentry, inode);
+	path.dentry->d_op = &pfmfs_dentry_operations;
+	d_add(path.dentry, inode);
 
-	file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops);
+	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
 	if (!file) {
-		dput(dentry);
+		path_put(&path);
 		return ERR_PTR(-ENFILE);
 	}
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index aec0fbdfe7f0..5f284ffd430e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -492,6 +492,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 					int is_async, int *fd)
 {
 	struct ib_uverbs_event_file *ev_file;
+	struct path path;
 	struct file *filp;
 	int ret;
 
@@ -519,8 +520,10 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	 * system call on a uverbs file, which will already have a
 	 * module reference.
 	 */
-	filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
-			  FMODE_READ, fops_get(&uverbs_event_fops));
+	path.mnt = uverbs_event_mnt;
+	path.dentry = uverbs_event_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, fops_get(&uverbs_event_fops));
 	if (!filp) {
 		ret = -ENFILE;
 		goto err_fd;
@@ -531,6 +534,8 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	return filp;
 
 err_fd:
+	fops_put(&uverbs_event_fops);
+	path_put(&path);
 	put_unused_fd(*fd);
 
 err:
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..94f5110c4655 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -88,7 +88,7 @@ struct file *anon_inode_getfile(const char *name,
 				void *priv, int flags)
 {
 	struct qstr this;
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int error;
 
@@ -106,10 +106,11 @@ struct file *anon_inode_getfile(const char *name,
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry)
 		goto err_module;
 
+	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
 	 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,14 +118,13 @@ struct file *anon_inode_getfile(const char *name,
 	 */
 	atomic_inc(&anon_inode_inode->i_count);
 
-	dentry->d_op = &anon_inodefs_dentry_operations;
+	path.dentry->d_op = &anon_inodefs_dentry_operations;
 	/* Do not publish this dentry inside the global dentry hash table */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, anon_inode_inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(anon_inode_mnt, dentry,
-			  FMODE_READ | FMODE_WRITE, fops);
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
@@ -137,7 +137,7 @@ struct file *anon_inode_getfile(const char *name,
 	return file;
 
 err_dput:
-	dput(dentry);
+	path_put(&path);
 err_module:
 	module_put(fops->owner);
 	return ERR_PTR(error);
diff --git a/fs/file_table.c b/fs/file_table.c
index 602a9ee3023a..163cd28314e0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -162,8 +162,8 @@ fail:
  * If all the callers of init_file() are eliminated, its
  * code should be moved into this function.
  */
-struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop)
+struct file *alloc_file(struct path *path, fmode_t mode,
+		const struct file_operations *fop)
 {
 	struct file *file;
 
@@ -171,9 +171,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	file->f_path.dentry = dentry;
-	file->f_path.mnt = mntget(mnt);
-	file->f_mapping = dentry->d_inode->i_mapping;
+	file->f_path = *path;
+	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_mode = mode;
 	file->f_op = fop;
 
@@ -183,10 +182,10 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	 * visible.  We do this for consistency, and so
 	 * that we can do debugging checks at __fput()
 	 */
-	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+	if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
 		int error = 0;
 		file_take_write(file);
-		error = mnt_clone_write(mnt);
+		error = mnt_clone_write(path->mnt);
 		WARN_ON(error);
 	}
 	return file;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..6bd41525cd71 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -922,7 +922,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	int error = -ENOMEM;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr quick_string;
 
 	*user = NULL;
@@ -944,10 +945,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	quick_string.name = name;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
-	dentry = d_alloc(root, &quick_string);
-	if (!dentry)
+	path.dentry = d_alloc(root, &quick_string);
+	if (!path.dentry)
 		goto out_shm_unlock;
 
+	path.mnt = mntget(hugetlbfs_vfsmount);
 	error = -ENOSPC;
 	inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
 				current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,13 +962,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 			acctflag))
 		goto out_inode;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
 
 	error = -ENFILE;
-	file = alloc_file(hugetlbfs_vfsmount, dentry,
-			FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 			&hugetlbfs_file_operations);
 	if (!file)
 		goto out_dentry; /* inode is already attached */
@@ -977,7 +978,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 out_inode:
 	iput(inode);
 out_dentry:
-	dput(dentry);
+	path_put(&path);
 out_shm_unlock:
 	if (*user) {
 		user_shm_unlock(size, *user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 9e4f90042eaf..8271cf05c957 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
+	struct path path;
 	int fd, ret;
 
 	/* Check the IN_* constants for consistency.  */
@@ -675,8 +676,10 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 	atomic_inc(&user->inotify_devs);
 
-	filp = alloc_file(inotify_mnt, dget(inotify_mnt->mnt_root),
-			  FMODE_READ, &inotify_fops);
+	path.mnt = inotify_mnt;
+	path.dentry = inotify_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
 	if (!filp)
 		goto Enfile;
 
@@ -689,6 +692,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 Enfile:
 	ret = -ENFILE;
+	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..81288bc2bcbb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -974,7 +974,7 @@ struct file *create_write_pipe(int flags)
 	int err;
 	struct inode *inode;
 	struct file *f;
-	struct dentry *dentry;
+	struct path path;
 	struct qstr name = { .name = "" };
 
 	err = -ENFILE;
@@ -983,21 +983,22 @@ struct file *create_write_pipe(int flags)
 		goto err;
 
 	err = -ENOMEM;
-	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
-	if (!dentry)
+	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	if (!path.dentry)
 		goto err_inode;
+	path.mnt = mntget(pipe_mnt);
 
-	dentry->d_op = &pipefs_dentry_operations;
+	path.dentry->d_op = &pipefs_dentry_operations;
 	/*
 	 * We dont want to publish this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on pipes
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
+	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
 	if (!f)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
@@ -1009,7 +1010,7 @@ struct file *create_write_pipe(int flags)
 
  err_dentry:
 	free_pipe_info(inode);
-	dput(dentry);
+	path_put(&path);
 	return ERR_PTR(err);
 
  err_inode:
diff --git a/include/linux/file.h b/include/linux/file.h
index 6a8d3612eb2a..5555508fd517 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,8 +18,9 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop);
+struct path;
+extern struct file *alloc_file(struct path *, fmode_t mode,
+	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/ipc/shm.c b/ipc/shm.c
index 11bec626c228..16e39230aa0d 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -878,8 +878,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (err)
 		goto out_unlock;
 
-	path.dentry = dget(shp->shm_file->f_path.dentry);
-	path.mnt    = shp->shm_file->f_path.mnt;
+	path = shp->shm_file->f_path;
+	path_get(&path);
 	shp->shm_nattch++;
 	size = i_size_read(path.dentry->d_inode);
 	shm_unlock(shp);
@@ -889,8 +889,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (!sfd)
 		goto out_put_dentry;
 
-	file = alloc_file(path.mnt, path.dentry, f_mode,
-			is_file_hugepages(shp->shm_file) ?
+	file = alloc_file(&path, f_mode,
+			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
 	if (!file)
@@ -950,7 +950,7 @@ out_unlock:
 out_free:
 	kfree(sfd);
 out_put_dentry:
-	dput(path.dentry);
+	path_put(&path);
 	goto out_nattch;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ef8f47473c5a..d2ec7f029ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2626,7 +2626,8 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	int error;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr this;
 
 	if (IS_ERR(shm_mnt))
@@ -2643,16 +2644,17 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	this.len = strlen(name);
 	this.hash = 0; /* will go */
 	root = shm_mnt->mnt_root;
-	dentry = d_alloc(root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(root, &this);
+	if (!path.dentry)
 		goto put_memory;
+	path.mnt = mntget(shm_mnt);
 
 	error = -ENOSPC;
 	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
 	if (!inode)
 		goto put_dentry;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
 #ifndef CONFIG_MMU
@@ -2662,7 +2664,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 #endif
 
 	error = -ENFILE;
-	file = alloc_file(shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 		  &shmem_file_operations);
 	if (!file)
 		goto put_dentry;
@@ -2671,7 +2673,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	return file;
 
 put_dentry:
-	dput(dentry);
+	path_put(&path);
 put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
diff --git a/net/socket.c b/net/socket.c
index eaaba3510e81..dbfdfa96d29b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = {
 static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 {
 	struct qstr name = { .name = "" };
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int fd;
 
@@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-	if (unlikely(!dentry)) {
+	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
 	}
+	path.mnt = mntget(sock_mnt);
 
-	dentry->d_op = &sockfs_dentry_operations;
+	path.dentry->d_op = &sockfs_dentry_operations;
 	/*
 	 * We dont want to push this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on sockets
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, SOCK_INODE(sock));
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-	file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	if (unlikely(!file)) {
 		/* drop dentry, keep inode */
 		atomic_inc(&path.dentry->d_inode->i_count);
-		dput(dentry);
+		path_put(&path);
 		put_unused_fd(fd);
 		return -ENFILE;
 	}
-- 
cgit v1.2.3


From e9496ff46a20a8592fdc7bdaaf41b45eb808d310 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 18:44:32 +0400
Subject: fix mismerge with Trond's stuff (create_mnt_ns() export is gone now)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 3 +--
 fs/nfs/super.c                | 8 --------
 include/linux/mnt_namespace.h | 1 -
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..faab1273281e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,7 +2080,6 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
-EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..d5b112bcf3de 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,21 +2648,13 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
-	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
-	ns_private = create_mnt_ns(root_mnt);
-	ret = PTR_ERR(ns_private);
-	if (IS_ERR(ns_private))
-		goto out_mntput;
-
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
-	put_mnt_ns(ns_private);
-
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c2393a..d9ebf1037dfa 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,7 +23,6 @@ struct proc_mounts {
 
 struct fs_struct;
 
-extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From e81e3f4dca6c54116a24aec217d2c15c6f58ada5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 4 Dec 2009 15:47:36 -0500
Subject: fs: move get_empty_filp() deffinition to internal.h

All users outside of fs/ of get_empty_filp() have been removed.  This patch
moves the definition from the include/ directory to internal.h so no new
users crop up and removes the EXPORT_SYMBOL.  I'd love to see open intents
stop using it too, but that's a problem for another day and a smarter
developer!

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 2 ++
 fs/internal.h      | 1 +
 fs/namei.c         | 2 ++
 fs/open.c          | 2 ++
 include/linux/fs.h | 1 -
 5 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 163cd28314e0..361d76be8295 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -24,6 +24,8 @@
 
 #include <asm/atomic.h>
 
+#include "internal.h"
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..f67cd141d9a8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,6 +79,7 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+extern struct file *get_empty_filp(void);
 
 /*
  * super.c
diff --git a/fs/namei.c b/fs/namei.c
index 8c8b379b94a4..1fc038b117be 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,8 @@
 #include <linux/fs_struct.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /* [Feb-1997 T. Schoebel-Theuer]
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..d95651e8be9e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,6 +31,8 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 
+#include "internal.h"
+
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a057f48eb156..cdc23be4edde 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2189,7 +2189,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From 1429b3eca23818f87f9fa569a15d9816de81f698 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Dec 2009 06:38:01 -0500
Subject: Untangling ima mess, part 3: kill dead code in ima

Kill the 'update' argument of ima_path_check(), kill
dead code in ima.

Current rules: ima counters are bumped at the same time
when the file switches from put_filp() fodder to fput()
one.  Which happens exactly in two places - alloc_file()
and __dentry_open().  Nothing else needs to do that at
all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |  4 +--
 fs/nfsd/vfs.c                     |  3 +--
 include/linux/ima.h               | 12 ++-------
 security/integrity/ima/ima_main.c | 52 +++------------------------------------
 4 files changed, 9 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index c530e5d32f12..a765e7a741f4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1686,7 +1686,7 @@ do_last:
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
 			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+				       (MAY_READ | MAY_WRITE | MAY_EXEC));
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1747,7 +1747,7 @@ ok:
 	filp = nameidata_to_filp(&nd, open_flag);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+			       (MAY_READ | MAY_WRITE | MAY_EXEC));
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9942b39654e..936f08400db6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2122,8 +2122,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 	 */
 	path.mnt = exp->ex_path.mnt;
 	path.dentry = dentry;
-	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
-			     IMA_COUNT_LEAVE);
+	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
 nfsd_out:
 	return err? nfserrno(err) : 0;
 }
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e3f2a4c25f6..99dc6d5cf7e5 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,18 +13,14 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
-#define IMA_COUNT_UPDATE 1
-#define IMA_COUNT_LEAVE 0
-
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask, int update_counts);
+extern int ima_path_check(struct path *path, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
-extern void ima_counts_put(struct path *path, int mask);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -42,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask, int update_counts)
+static inline int ima_path_check(struct path *path, int mask)
 {
 	return 0;
 }
@@ -62,9 +58,5 @@ static inline void ima_counts_get(struct file *file)
 	return;
 }
 
-static inline void ima_counts_put(struct path *path, int mask)
-{
-	return;
-}
 #endif /* CONFIG_IMA_H */
 #endif /* _LINUX_IMA_H */
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e041233b4d2a..16dc57d247d0 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -49,20 +49,13 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
-/*
- * Update the counts given open flags instead of fmode
- */
-static void ima_inc_counts_flags(struct ima_iint_cache *iint, int flags)
-{
-	ima_inc_counts(iint, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /*
  * Decrement ima counts
  */
 static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
-			   fmode_t mode)
+			   struct file *file)
 {
+	mode_t mode = file->f_mode;
 	BUG_ON(!mutex_is_locked(&iint->mutex));
 
 	iint->opencount--;
@@ -92,12 +85,6 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 	}
 }
 
-static void ima_dec_counts_flags(struct ima_iint_cache *iint,
-				 struct inode *inode, int flags)
-{
-	ima_dec_counts(iint, inode, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /**
  * ima_file_free - called on __fput()
  * @file: pointer to file structure being freed
@@ -117,7 +104,7 @@ void ima_file_free(struct file *file)
 		return;
 
 	mutex_lock(&iint->mutex);
-	ima_dec_counts(iint, inode, file->f_mode);
+	ima_dec_counts(iint, inode, file);
 	mutex_unlock(&iint->mutex);
 	kref_put(&iint->refcount, iint_free);
 }
@@ -183,7 +170,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct path *path, int mask, int update_counts)
+int ima_path_check(struct path *path, int mask)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -197,8 +184,6 @@ int ima_path_check(struct path *path, int mask, int update_counts)
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	if (update_counts)
-		ima_inc_counts_flags(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -268,35 +253,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_put - decrement file counts
- *
- * File counts are incremented in ima_path_check. On file open
- * error, such as ETXTBSY, decrement the counts to prevent
- * unnecessary imbalance messages.
- */
-void ima_counts_put(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	/* The inode may already have been freed, freeing the iint
-	 * with it. Verify the inode is not NULL before dereferencing
-	 * it.
-	 */
-	if (!ima_initialized || !inode || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-
-	mutex_lock(&iint->mutex);
-	ima_dec_counts_flags(iint, inode, mask);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-
 /*
  * ima_counts_get - increment file counts
  *
-- 
cgit v1.2.3


From 431547b3c4533b8c7fd150ab36980b9a3147797b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2009 09:52:56 +0000
Subject: sanitize xattr handler prototypes

Add a flags argument to struct xattr_handler and pass it to all xattr
handler methods.  This allows using the same methods for multiple
handlers, e.g. for the ACL methods which perform exactly the same action
for the access and default ACLs, just using a different underlying
attribute.  With a little more groundwork it'll also allow sharing the
methods for the regular user/trusted/secure handlers in extN, ocfs2 and
jffs2 like it's already done for xfs in this patch.

Also change the inode argument to the handlers to a dentry to allow
using the handlers mechnism for filesystems that require it later,
e.g. cifs.

[with GFS2 bits updated by Steven Whitehouse <swhiteho@redhat.com>]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c               | 47 ++++++------------------
 fs/ext2/acl.c                | 79 +++++++++++++---------------------------
 fs/ext2/xattr.c              | 11 ++++--
 fs/ext2/xattr_security.c     | 16 ++++----
 fs/ext2/xattr_trusted.c      | 16 ++++----
 fs/ext2/xattr_user.c         | 25 +++++++------
 fs/ext3/acl.c                | 74 ++++++++++++-------------------------
 fs/ext3/xattr.c              | 31 +++++++++-------
 fs/ext3/xattr_security.c     | 20 +++++-----
 fs/ext3/xattr_trusted.c      | 18 ++++-----
 fs/ext3/xattr_user.c         | 25 +++++++------
 fs/ext4/acl.c                | 74 ++++++++++++-------------------------
 fs/ext4/xattr.c              | 31 +++++++++-------
 fs/ext4/xattr_security.c     | 20 +++++-----
 fs/ext4/xattr_trusted.c      | 20 +++++-----
 fs/ext4/xattr_user.c         | 25 +++++++------
 fs/gfs2/acl.c                | 16 +++++---
 fs/gfs2/inode.c              |  3 +-
 fs/gfs2/xattr.c              | 69 +++++++++++++----------------------
 fs/gfs2/xattr.h              |  7 ++--
 fs/jffs2/acl.c               | 65 +++++++++++----------------------
 fs/jffs2/security.c          | 18 +++++----
 fs/jffs2/xattr.c             |  6 ++-
 fs/jffs2/xattr_trusted.c     | 18 +++++----
 fs/jffs2/xattr_user.c        | 18 +++++----
 fs/ocfs2/acl.c               | 87 +++++++++++++-------------------------------
 fs/ocfs2/xattr.c             | 72 ++++++++++++++++++------------------
 fs/reiserfs/xattr.c          | 36 +++++++++---------
 fs/reiserfs/xattr_acl.c      | 69 ++++++++++-------------------------
 fs/reiserfs/xattr_security.c | 21 ++++++-----
 fs/reiserfs/xattr_trusted.c  | 21 ++++++-----
 fs/reiserfs/xattr_user.c     | 21 ++++++-----
 fs/xattr.c                   | 28 +++++++-------
 fs/xfs/linux-2.6/xfs_acl.c   | 57 ++++++++++-------------------
 fs/xfs/linux-2.6/xfs_xattr.c | 71 ++++++++----------------------------
 fs/xfs/xfs_acl.h             |  3 +-
 include/linux/xattr.h        | 13 ++++---
 mm/shmem.c                   | 19 +++++-----
 mm/shmem_acl.c               | 78 ++++++++++-----------------------------
 39 files changed, 533 insertions(+), 815 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..52cbe47022bf 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static int btrfs_xattr_get_acl(struct inode *inode, int type,
-			       void *value, size_t size)
+static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int ret = 0;
 
-	acl = btrfs_get_acl(inode, type);
+	acl = btrfs_get_acl(dentry->d_inode, type);
 
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
@@ -151,8 +151,8 @@ out:
 	return ret;
 }
 
-static int btrfs_xattr_set_acl(struct inode *inode, int type,
-			       const void *value, size_t size)
+static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	int ret = 0;
 	struct posix_acl *acl = NULL;
@@ -167,38 +167,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
 		}
 	}
 
-	ret = btrfs_set_acl(inode, acl, type);
+	ret = btrfs_set_acl(dentry->d_inode, acl, type);
 
 	posix_acl_release(acl);
 
 	return ret;
 }
 
-
-static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
-				      void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
-				      const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
-				       void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 int btrfs_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;
@@ -303,14 +278,16 @@ int btrfs_acl_chmod(struct inode *inode)
 
 struct xattr_handler btrfs_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get	= btrfs_xattr_acl_default_get,
-	.set	= btrfs_xattr_acl_default_set,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 struct xattr_handler btrfs_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get	= btrfs_xattr_acl_access_get,
-	.set	= btrfs_xattr_acl_access_set,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 #else /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
  * Extended attribut handlers
  */
 static size_t
-ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 }
 
 static size_t
-ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
-			    const char *name, size_t name_len)
+ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext2_get_acl(inode, type);
+	acl = ext2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext2_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext2_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
-	if (!is_owner_or_cap(inode))
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
 	} else
 		acl = NULL;
 
-	error = ext2_set_acl(inode, type, acl);
+	error = ext2_set_acl(dentry->d_inode, type, acl);
 
 release_and_out:
 	posix_acl_release(acl);
 	return error;
 }
 
-static int
-ext2_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext2_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
-	.get	= ext2_xattr_get_acl_access,
-	.set	= ext2_xattr_set_acl_access,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
-	.get	= ext2_xattr_get_acl_default,
-	.set	= ext2_xattr_set_acl_default,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/security.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
  * used / required on success.
  */
 static int
-ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	struct ext2_xattr_entry *entry;
 	char *end;
@@ -300,9 +302,10 @@ bad_block:	ext2_error(inode->i_sb, "ext2_xattr_list",
 			ext2_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest) {
 					error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext2_xattr_list(dentry->d_inode, buffer, size);
+	return ext2_xattr_list(dentry, buffer, size);
 }
 
 /*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext2_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
 
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext3_get_acl(inode, type);
+	acl = ext3_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext3_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
-	.get	= ext3_xattr_get_acl_access,
-	.set	= ext3_xattr_set_acl_access,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
-	.get	= ext3_xattr_get_acl_default,
-	.set	= ext3_xattr_set_acl_default,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
 			      struct ext3_xattr_entry *);
-static int ext3_xattr_list(struct inode *inode, char *buffer,
+static int ext3_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext3_xattr_list(dentry->d_inode, buffer, size);
+	return ext3_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 			ext3_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 }
 
 static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext3_xattr_cache_insert(bh);
-	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext3_xattr_ibody_header *header;
 	struct ext3_inode *raw_inode;
 	struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext3_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_list_entries(inode, IFIRST(header),
+	error = ext3_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -430,12 +433,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext3_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext4_get_acl(inode, type);
+	acl = ext4_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext4_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext4_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext4_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext4_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext4_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext4_xattr_list_acl_access,
-	.get	= ext4_xattr_get_acl_access,
-	.set	= ext4_xattr_set_acl_access,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
 
 struct xattr_handler ext4_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext4_xattr_list_acl_default,
-	.get	= ext4_xattr_get_acl_default,
-	.set	= ext4_xattr_set_acl_default,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..83218bebbc7c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct inode *inode, char *buffer,
+static int ext4_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
 ssize_t
 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext4_xattr_list(dentry->d_inode, buffer, size);
+	return ext4_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
+ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 			ext4_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 }
 
 static int
-ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext4_xattr_cache_insert(bh);
-	error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
 }
 
 static int
-ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext4_xattr_ibody_header *header;
 	struct ext4_inode *raw_inode;
 	struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext4_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext4_xattr_list_entries(inode, IFIRST(header),
+	error = ext4_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -423,12 +426,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT4_I(inode)->xattr_sem);
-	i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+	i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT4_I(inode)->xattr_sem);
+	up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		     const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext4_xattr_user_get(struct dentry *dentry, const char *name,
+		    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext4_xattr_user_set(struct dentry *dentry, const char *name,
+		    const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
 	error = posix_acl_to_xattr(acl, data, len);
 	if (error < 0)
 		goto out;
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0);
+	error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
 	if (!error)
 		set_cached_acl(inode, type, acl);
 out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
 	return -EINVAL;
 }
 
-static int gfs2_xattr_system_get(struct inode *inode, const char *name,
-				 void *buffer, size_t size)
+static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
+				 void *buffer, size_t size, int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int type;
 	int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
 	return error;
 }
 
-static int gfs2_xattr_system_set(struct inode *inode, const char *name,
-				 const void *value, size_t size, int flags)
+static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
+				 const void *value, size_t size, int flags,
+				 int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct posix_acl *acl = NULL;
 	int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
 	}
 
 set_acl:
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0);
+	error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
 	if (!error) {
 		if (acl)
 			set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
 
 struct xattr_handler gfs2_xattr_system_handler = {
 	.prefix = XATTR_SYSTEM_PREFIX,
+	.flags  = GFS2_EATYPE_SYS,
 	.get    = gfs2_xattr_system_get,
 	.set    = gfs2_xattr_system_set,
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..3ff32fa793da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 		return err;
 	}
 
-	err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
+	err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
+			       GFS2_EATYPE_SECURITY);
 	kfree(value);
 	kfree(name);
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..8a04108e0c22 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
 /**
  * gfs2_xattr_get - Get a GFS2 extended attribute
  * @inode: The inode
- * @type: The type of extended attribute
  * @name: The name of the extended attribute
  * @buffer: The buffer to write the result into
  * @size: The size of the buffer
+ * @type: The type of extended attribute
  *
  * Returns: actual size of data on success, -errno on error
  */
-
-int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-		   void *buffer, size_t size)
+static int gfs2_xattr_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 /**
  * gfs2_xattr_remove - Remove a GFS2 extended attribute
- * @inode: The inode
+ * @ip: The inode
  * @type: The type of the extended attribute
  * @name: The name of the extended attribute
  *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
  * Returns: 0, or errno on failure
  */
 
-static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
+static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
 }
 
 /**
- * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
- * @inode: The inode
- * @type: The type of the extended attribute
+ * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
+ * @ip: The inode
  * @name: The name of the extended attribute
  * @value: The value of the extended attribute (NULL for remove)
  * @size: The size of the @value argument
  * @flags: Create or Replace
+ * @type: The type of the extended attribute
  *
  * See gfs2_xattr_remove() for details of the removal of xattrs.
  *
  * Returns: 0 or errno on failure
  */
 
-int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-		   const void *value, size_t size, int flags)
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+		   const void *value, size_t size, int flags, int type)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_ea_location el;
 	unsigned int namel = strlen(name);
 	int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 		return -ERANGE;
 
 	if (value == NULL)
-		return gfs2_xattr_remove(inode, type, name);
+		return gfs2_xattr_remove(ip, type, name);
 
 	if (ea_check_size(sdp, namel, size))
 		return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 	return error;
 }
 
+static int gfs2_xattr_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	return __gfs2_xattr_set(dentry->d_inode, name, value,
+				size, flags, type);
+}
+
 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 				  struct gfs2_ea_header *ea, char *data)
 {
@@ -1529,40 +1534,18 @@ out_alloc:
 	return error;
 }
 
-static int gfs2_xattr_user_get(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
-}
-
-static int gfs2_xattr_user_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
-}
-
-static int gfs2_xattr_security_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
-}
-
-static int gfs2_xattr_security_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
-}
-
 static struct xattr_handler gfs2_xattr_user_handler = {
 	.prefix = XATTR_USER_PREFIX,
-	.get    = gfs2_xattr_user_get,
-	.set    = gfs2_xattr_user_set,
+	.flags  = GFS2_EATYPE_USR,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 static struct xattr_handler gfs2_xattr_security_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
-	.get    = gfs2_xattr_security_get,
-	.set    = gfs2_xattr_security_set,
+	.flags  = GFS2_EATYPE_SECURITY,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
-extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-			  void *buffer, size_t size);
-extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-			  const void *value, size_t size, int flags);
+extern int __gfs2_xattr_set(struct inode *inode, const char *name,
+			    const void *value, size_t size,
+			    int flags, int type);
 extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
 extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
 	return rc;
 }
 
-static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size,
-					 const char *name, size_t name_len)
+static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
 
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
 	return retlen;
 }
 
-static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size,
-					  const char *name, size_t name_len)
+static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
 	return retlen;
 }
 
-static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size)
+static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	acl = jffs2_get_acl(inode, type);
+	if (name[0] != '\0')
+		return -EINVAL;
+
+	acl = jffs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
 	return rc;
 }
 
-static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
+static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	if (!is_owner_or_cap(inode))
+	if (name[0] != '\0')
+		return -EINVAL;
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
 	} else {
 		acl = NULL;
 	}
-	rc = jffs2_set_acl(inode, type, acl);
+	rc = jffs2_set_acl(dentry->d_inode, type, acl);
  out:
 	posix_acl_release(acl);
 	return rc;
 }
 
-static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
-				     const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
-				      const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
 struct xattr_handler jffs2_acl_access_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_access_listxattr,
-	.get	= jffs2_acl_access_getxattr,
-	.set	= jffs2_acl_access_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
 
 struct xattr_handler jffs2_acl_default_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_default_listxattr,
-	.get	= jffs2_acl_default_getxattr,
-	.set	= jffs2_acl_default_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
 }
 
 /* ---- XATTR Handler for "security.*" ----------------- */
-static int jffs2_security_getxattr(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
+				   void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size);
 }
 
-static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer,
-				   size_t size, int flags)
+static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size,
-				       const char *name, size_t name_len)
+static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		if (!xhandle)
 			continue;
 		if (buffer) {
-			rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, buffer+len, size-len,
+					   xd->xname, xd->name_len, xd->flags);
 		} else {
-			rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, NULL, 0, xd->xname,
+					   xd->name_len, xd->flags);
 		}
 		if (rc < 0)
 			goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_trusted_getxattr(struct inode *inode, const char *name,
-				  void *buffer, size_t size)
+static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size);
 }
 
-static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer,
-				  size_t size, int flags)
+static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size,
-				      const char *name, size_t name_len)
+static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_user_getxattr(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
+static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
+			       void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size);
 }
 
-static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
-			       size_t size, int flags)
+static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size,
-				   const char *name, size_t name_len)
+static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..e3e47415d851 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -331,13 +331,14 @@ cleanup:
 	return ret;
 }
 
-static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
 					  char *list,
 					  size_t list_len,
 					  const char *name,
-					  size_t name_len)
+					  size_t name_len,
+					  int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +349,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 	return size;
 }
 
-static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
 					   char *list,
 					   size_t list_len,
 					   const char *name,
-					   size_t name_len)
+					   size_t name_len,
+					   int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +367,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
 	return size;
 }
 
-static int ocfs2_xattr_get_acl(struct inode *inode,
-			       int type,
-			       void *buffer,
-			       size_t size)
+static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	struct posix_acl *acl;
 	int ret;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ocfs2_get_acl(inode, type);
+	acl = ocfs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -388,35 +390,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_xattr_get_acl_access(struct inode *inode,
-				      const char *name,
-				      void *buffer,
-				      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int ocfs2_xattr_get_acl_default(struct inode *inode,
-				       const char *name,
-				       void *buffer,
-				       size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int ocfs2_xattr_set_acl(struct inode *inode,
-			       int type,
-			       const void *value,
-			       size_t size)
+static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct posix_acl *acl;
 	int ret = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
@@ -442,38 +425,18 @@ cleanup:
 	return ret;
 }
 
-static int ocfs2_xattr_set_acl_access(struct inode *inode,
-				      const char *name,
-				      const void *value,
-				      size_t size,
-				      int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int ocfs2_xattr_set_acl_default(struct inode *inode,
-				       const char *name,
-				       const void *value,
-				       size_t size,
-				       int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ocfs2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ocfs2_xattr_list_acl_access,
-	.get	= ocfs2_xattr_get_acl_access,
-	.set	= ocfs2_xattr_set_acl_access,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
 
 struct xattr_handler ocfs2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ocfs2_xattr_list_acl_default,
-	.get	= ocfs2_xattr_get_acl_default,
-	.set	= ocfs2_xattr_set_acl_default,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..43c114831c0d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -205,8 +205,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 					   int offset,
 					   struct ocfs2_xattr_value_root **xv,
 					   struct buffer_head **bh);
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -6978,9 +6976,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
 
 	ret = ocfs2_init_security_get(inode, dir, &si);
 	if (!ret) {
-		ret = ocfs2_xattr_security_set(inode, si.name,
-					       si.value, si.value_len,
-					       XATTR_CREATE);
+		ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
+				      si.name, si.value, si.value_len,
+				      XATTR_CREATE);
 		if (ret) {
 			mlog_errno(ret);
 			goto leave;
@@ -7008,9 +7006,9 @@ leave:
 /*
  * 'security' attributes support
  */
-static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_size, const char *name,
-					size_t name_len)
+					size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7021,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
+				    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, value, size, flags);
 }
 
 int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7074,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
 /*
  * 'trusted' attributes support
  */
-static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
 				       size_t list_size, const char *name,
-				       size_t name_len)
+				       size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7089,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
+static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7118,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
 /*
  * 'user' attributes support
  */
-static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return 0;
@@ -7139,31 +7137,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
-				void *buffer, size_t size)
+static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
 			       buffer, size);
 }
 
-static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
-				const void *value, size_t size, int flags)
+static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8c7033a8b67e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
 #include <net/checksum.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
+#include <linux/security.h>
 
 #define PRIVROOT_NAME ".reiserfs_priv"
 #define XAROOT_NAME   "xattrs"
@@ -726,15 +727,14 @@ ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 		  size_t size)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -746,15 +746,14 @@ int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, flags, handler->flags);
 }
 
 /*
@@ -764,21 +763,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
 }
 
 struct listxattr_buf {
 	size_t size;
 	size_t pos;
 	char *buf;
-	struct inode *inode;
+	struct dentry *dentry;
 };
 
 static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +787,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
 	if (name[0] != '.' ||
 	    (namelen != 1 && (name[1] != '.' || namelen != 2))) {
 		struct xattr_handler *handler;
-		handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
+		handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
 						    name);
 		if (!handler)	/* Unsupported xattr name */
 			return 0;
 		if (b->buf) {
-			size = handler->list(b->inode, b->buf + b->pos,
-					 b->size, name, namelen);
+			size = handler->list(b->dentry, b->buf + b->pos,
+					 b->size, name, namelen,
+					 handler->flags);
 			if (size > b->size)
 				return -ERANGE;
 		} else {
-			size = handler->list(b->inode, NULL, 0, name, namelen);
+			size = handler->list(b->dentry, NULL, 0, name,
+					     namelen, handler->flags);
 		}
 
 		b->pos += size;
@@ -820,7 +820,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 	int err = 0;
 	loff_t pos = 0;
 	struct listxattr_buf buf = {
-		.inode = dentry->d_inode,
+		.dentry = dentry,
 		.buf = buffer,
 		.size = buffer ? size : 0,
 	};
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..cc32e6ada67b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
 			    struct posix_acl *acl);
 
 static int
-xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
+posix_acl_set(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int error, error2;
 	struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
 }
 
 static int
-xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return -EOPNOTSUPP;
 
-	acl = reiserfs_get_acl(inode, type);
+	acl = reiserfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -482,30 +485,12 @@ int reiserfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-static int
-posix_acl_access_get(struct inode *inode, const char *name,
-		     void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-posix_acl_access_set(struct inode *inode, const char *name,
-		     const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static size_t posix_acl_access_list(struct inode *inode, char *list,
+static size_t posix_acl_access_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +499,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get = posix_acl_access_get,
-	.set = posix_acl_access_set,
+	.flags = ACL_TYPE_ACCESS,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_access_list,
 };
 
-static int
-posix_acl_default_get(struct inode *inode, const char *name,
-		      void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-posix_acl_default_set(struct inode *inode, const char *name,
-		      const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static size_t posix_acl_default_list(struct inode *inode, char *list,
+static size_t posix_acl_default_list(struct dentry *dentry, char *list,
 				     size_t list_size, const char *name,
-				     size_t name_len)
+				     size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +519,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get = posix_acl_default_get,
-	.set = posix_acl_default_set,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_default_list,
 };
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-security_get(struct inode *inode, const char *name, void *buffer, size_t size)
+security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+		int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-security_set(struct inode *inode, const char *name, const void *buffer,
-	     size_t size, int flags)
+security_set(struct dentry *dentry, const char *name, const void *buffer,
+	     size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t security_list(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t namelen)
+static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t namelen, int handler_flags)
 {
 	const size_t len = namelen + 1;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
+trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	    int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-trusted_set(struct inode *inode, const char *name, const void *buffer,
-	    size_t size, int flags)
+trusted_set(struct dentry *dentry, const char *name, const void *buffer,
+	    size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
 #include <asm/uaccess.h>
 
 static int
-user_get(struct inode *inode, const char *name, void *buffer, size_t size)
+user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	 int handler_flags)
 {
 
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-user_set(struct inode *inode, const char *name, const void *buffer,
-	 size_t size, int flags)
+user_set(struct dentry *dentry, const char *name, const void *buffer,
+	 size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t user_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
+			const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return 0;
 	if (list && len <= list_size) {
 		memcpy(list, name, name_len);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
 generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
 ssize_t
 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-	struct inode *inode = dentry->d_inode;
-	struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
+	struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
 	unsigned int size = 0;
 
 	if (!buffer) {
-		for_each_xattr_handler(handlers, handler)
-			size += handler->list(inode, NULL, 0, NULL, 0);
+		for_each_xattr_handler(handlers, handler) {
+			size += handler->list(dentry, NULL, 0, NULL, 0,
+					      handler->flags);
+		}
 	} else {
 		char *buf = buffer;
 
 		for_each_xattr_handler(handlers, handler) {
-			size = handler->list(inode, buf, buffer_size, NULL, 0);
+			size = handler->list(dentry, buf, buffer_size,
+					     NULL, 0, handler->flags);
 			if (size > buffer_size)
 				return -ERANGE;
 			buf += size;
@@ -659,14 +660,13 @@ int
 generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
 	if (size == 0)
 		value = "";  /* empty EA, do not remove */
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, 0, handler->flags);
 }
 
 /*
@@ -677,12 +677,12 @@ int
 generic_removexattr(struct dentry *dentry, const char *name)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0,
+			    XATTR_REPLACE, handler->flags);
 }
 
 EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986f..2512125dfa7c 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -354,37 +354,14 @@ xfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-/*
- * System xattr handlers.
- *
- * Currently Posix ACLs are the only system namespace extended attribute
- * handlers supported by XFS, so we just implement the handlers here.
- * If we ever support other system extended attributes this will need
- * some refactoring.
- */
-
 static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *value, size_t size)
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
-	int type, error;
-
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
+	int error;
 
-	acl = xfs_get_acl(inode, type);
+	acl = xfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -397,15 +374,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
 }
 
 static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
-	int error = 0, type;
+	int error = 0;
 
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
 	if (flags & XATTR_CREATE)
 		return -EINVAL;
 	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +437,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
 	return error;
 }
 
-struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
+struct xattr_handler xfs_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
+};
+
+struct xattr_handler xfs_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
 };
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
 
 
 static int
-__xfs_xattr_get(struct inode *inode, const char *name,
+xfs_xattr_get(struct dentry *dentry, const char *name,
 		void *value, size_t size, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 	int error, asize = size;
 
 	if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
 }
 
 static int
-__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
 		size_t size, int flags, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
 	return -xfs_attr_set(ip, name, (void *)value, size, xflags);
 }
 
-static int
-xfs_xattr_user_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, 0);
-}
-
-static int
-xfs_xattr_user_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, 0);
-}
-
 static struct xattr_handler xfs_xattr_user_handler = {
 	.prefix	= XATTR_USER_PREFIX,
-	.get	= xfs_xattr_user_get,
-	.set	= xfs_xattr_user_set,
+	.flags	= 0, /* no flags implies user namespace */
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_trusted_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
-}
-
-static int
-xfs_xattr_trusted_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
-}
-
 static struct xattr_handler xfs_xattr_trusted_handler = {
 	.prefix	= XATTR_TRUSTED_PREFIX,
-	.get	= xfs_xattr_trusted_get,
-	.set	= xfs_xattr_trusted_set,
+	.flags	= ATTR_ROOT,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_secure_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
-}
-
-static int
-xfs_xattr_secure_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
-}
-
 static struct xattr_handler xfs_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
-	.get	= xfs_xattr_secure_get,
-	.set	= xfs_xattr_secure_set,
+	.flags	= ATTR_SECURE,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
 struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
 #ifdef CONFIG_XFS_POSIX_ACL
-	&xfs_xattr_system_handler,
+	&xfs_xattr_acl_access_handler,
+	&xfs_xattr_acl_default_handler,
 #endif
 	NULL
 };
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
-extern struct xattr_handler xfs_xattr_system_handler;
+extern struct xattr_handler xfs_xattr_acl_access_handler;
+extern struct xattr_handler xfs_xattr_acl_default_handler;
 #else
 # define xfs_check_acl					NULL
 # define xfs_get_acl(inode, type)			NULL
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 5c84af8c5f6f..fb9b7e6e1e2d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -38,12 +38,13 @@ struct dentry;
 
 struct xattr_handler {
 	char *prefix;
-	size_t (*list)(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len);
-	int (*get)(struct inode *inode, const char *name, void *buffer,
-		   size_t size);
-	int (*set)(struct inode *inode, const char *name, const void *buffer,
-		   size_t size, int flags);
+	int flags;	/* fs private flags passed back to the handlers */
+	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
+		       const char *name, size_t name_len, int handler_flags);
+	int (*get)(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int handler_flags);
+	int (*set)(struct dentry *dentry, const char *name, const void *buffer,
+		   size_t size, int flags, int handler_flags);
 };
 
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
diff --git a/mm/shmem.c b/mm/shmem.c
index adf8033afd52..3cd32c2ea0a0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2042,27 +2042,28 @@ static const struct inode_operations shmem_symlink_inode_operations = {
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_len, const char *name,
-					size_t name_len)
+					size_t name_len, int handler_flags)
 {
-	return security_inode_listsecurity(inode, list, list_len);
+	return security_inode_listsecurity(dentry->d_inode, list, list_len);
 }
 
-static int shmem_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return xattr_getsecurity(inode, name, buffer, size);
+	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
 }
 
-static int shmem_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return security_inode_setsecurity(inode, name, value, size, flags);
+	return security_inode_setsecurity(dentry->d_inode, name, value,
+					  size, flags);
 }
 
 static struct xattr_handler shmem_xattr_security_handler = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index df2c87fdae50..f8d5330ec0d7 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -63,86 +63,48 @@ struct generic_acl_operations shmem_acl_ops = {
 	.setacl = shmem_set_acl,
 };
 
-/**
- * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
- * shmem_xattr_acl_access_handler  -  plumbing code to implement the
- * system.posix_acl_access xattr using the generic acl functions.
- */
-
 static size_t
-shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
-		      const char *name, size_t name_len)
+shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
-				list, list_size);
+	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
+				type, list, list_size);
 }
 
 static int
-shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
-		     size_t size)
+shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
-			       size);
+	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
+			       buffer, size);
 }
 
 static int
-shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
-		     size_t size, int flags)
+shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
-			       size);
+	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
+			       value, size);
 }
 
 struct xattr_handler shmem_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.list	= shmem_list_acl_access,
-	.get	= shmem_get_acl_access,
-	.set	= shmem_set_acl_access,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
-/**
- * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
- * shmem_xattr_acl_default_handler  -  plumbing code to implement the
- * system.posix_acl_default xattr using the generic acl functions.
- */
-
-static size_t
-shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len)
-{
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
-				list, list_size);
-}
-
-static int
-shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
-		      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
-			       size);
-}
-
-static int
-shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
-		      size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
-			       size);
-}
-
 struct xattr_handler shmem_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.list	= shmem_list_acl_default,
-	.get	= shmem_get_acl_default,
-	.set	= shmem_set_acl_default,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
 /**
-- 
cgit v1.2.3


From 1c7c474c31aea6d5cb2fb35f31d9e9e91ae466b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:44 +0100
Subject: make generic_acl slightly more generic

Now that we cache the ACL pointers in the generic inode all the generic_acl
cruft can go away and generic_acl.c can directly implement xattr handlers
dealing with the full Posix ACL semantics for in-memory filesystems.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/generic_acl.c            | 158 +++++++++++++++++++++++++-------------------
 include/linux/generic_acl.h |  41 +++---------
 include/linux/shmem_fs.h    |  16 -----
 mm/Makefile                 |   1 -
 mm/shmem.c                  |  17 +++--
 mm/shmem_acl.c              | 133 -------------------------------------
 6 files changed, 109 insertions(+), 257 deletions(-)
 delete mode 100644 mm/shmem_acl.c

(limited to 'include/linux')

diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
 /*
- * fs/generic_acl.c
- *
  * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
  *
  * This file is released under the GPL.
+ *
+ * Generic ACL support for in-memory filesystems.
  */
 
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/generic_acl.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
 
-/**
- * generic_acl_list  -  Generic xattr_handler->list() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-size_t
-generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
-		 int type, char *list, size_t list_size)
+
+static size_t
+generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	struct posix_acl *acl;
-	const char *name;
+	const char *xname;
 	size_t size;
 
-	acl = ops->getacl(inode, type);
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return 0;
 	posix_acl_release(acl);
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			name = POSIX_ACL_XATTR_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			name = POSIX_ACL_XATTR_DEFAULT;
-			break;
-
-		default:
-			return 0;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		xname = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		xname = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		return 0;
 	}
-	size = strlen(name) + 1;
+	size = strlen(xname) + 1;
 	if (list && size <= list_size)
-		memcpy(list, name, size);
+		memcpy(list, xname, size);
 	return size;
 }
 
-/**
- * generic_acl_get  -  Generic xattr_handler->get() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
-		int type, void *buffer, size_t size)
+static int
+generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	acl = ops->getacl(inode, type);
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return -ENODATA;
 	error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
 	return error;
 }
 
-/**
- * generic_acl_set  -  Generic xattr_handler->set() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
-		int type, const void *value, size_t size)
+static int
+generic_acl_set(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
 	int error;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
 		error = posix_acl_valid(acl);
 		if (error)
 			goto failed;
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
-				if (error < 0)
-					goto failed;
-				inode->i_mode = mode;
-				if (error == 0) {
-					posix_acl_release(acl);
-					acl = NULL;
-				}
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				if (!S_ISDIR(inode->i_mode)) {
-					error = -EINVAL;
-					goto failed;
-				}
-				break;
+		switch (type) {
+		case ACL_TYPE_ACCESS:
+			mode = inode->i_mode;
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0)
+				goto failed;
+			inode->i_mode = mode;
+			if (error == 0) {
+				posix_acl_release(acl);
+				acl = NULL;
+			}
+			break;
+		case ACL_TYPE_DEFAULT:
+			if (!S_ISDIR(inode->i_mode)) {
+				error = -EINVAL;
+				goto failed;
+			}
+			break;
 		}
 	}
-	ops->setacl(inode, type, acl);
+	set_cached_acl(inode, type, acl);
 	error = 0;
 failed:
 	posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
 
 /**
  * generic_acl_init  -  Take care of acl inheritance at @inode create time
- * @ops:	Filesystem specific getacl and setacl callbacks
  *
  * Files created inside a directory with a default ACL inherit the
  * directory's default ACL.
  */
 int
-generic_acl_init(struct inode *inode, struct inode *dir,
-		 struct generic_acl_operations *ops)
+generic_acl_init(struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 
 	inode->i_mode = mode & ~current_umask();
 	if (!S_ISLNK(inode->i_mode))
-		acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+		acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
 	if (acl) {
 		struct posix_acl *clone;
 
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 			error = -ENOMEM;
 			if (!clone)
 				goto cleanup;
-			ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
 			posix_acl_release(clone);
 		}
 		clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 		if (error >= 0) {
 			inode->i_mode = mode;
 			if (error > 0)
-				ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+				set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		}
 		posix_acl_release(clone);
 	}
@@ -169,20 +161,19 @@ cleanup:
 
 /**
  * generic_acl_chmod  -  change the access acl of @inode upon chmod()
- * @ops:	FIlesystem specific getacl and setacl callbacks
  *
  * A chmod also changes the permissions of the owner, group/mask, and
  * other ACL entries.
  */
 int
-generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+generic_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
 	int error = 0;
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
-	acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
 	if (acl) {
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
 			return -ENOMEM;
 		error = posix_acl_chmod_masq(clone, inode->i_mode);
 		if (!error)
-			ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		posix_acl_release(clone);
 	}
 	return error;
 }
+
+int
+generic_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+	return -EAGAIN;
+}
+
+struct xattr_handler generic_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
+
+struct xattr_handler generic_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 886f5faa08cb..ca666d18ed67 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -1,36 +1,15 @@
-/*
- * include/linux/generic_acl.h
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
+#ifndef LINUX_GENERIC_ACL_H
+#define LINUX_GENERIC_ACL_H
 
-#ifndef GENERIC_ACL_H
-#define GENERIC_ACL_H
+#include <linux/xattr.h>
 
-#include <linux/posix_acl.h>
-#include <linux/posix_acl_xattr.h>
+struct inode;
 
-/**
- * struct generic_acl_operations  -  filesystem operations
- *
- * Filesystems must make these operations available to the generic
- * operations.
- */
-struct generic_acl_operations {
-	struct posix_acl *(*getacl)(struct inode *, int);
-	void (*setacl)(struct inode *, int, struct posix_acl *);
-};
+extern struct xattr_handler generic_acl_access_handler;
+extern struct xattr_handler generic_acl_default_handler;
 
-size_t generic_acl_list(struct inode *, struct generic_acl_operations *, int,
-			char *, size_t);
-int generic_acl_get(struct inode *, struct generic_acl_operations *, int,
-		    void *, size_t);
-int generic_acl_set(struct inode *, struct generic_acl_operations *, int,
-		    const void *, size_t);
-int generic_acl_init(struct inode *, struct inode *,
-		     struct generic_acl_operations *);
-int generic_acl_chmod(struct inode *, struct generic_acl_operations *);
+int generic_acl_init(struct inode *, struct inode *);
+int generic_acl_chmod(struct inode *);
+int generic_check_acl(struct inode *inode, int mask);
 
-#endif
+#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index deee7afd8d66..e164291fb3e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -41,20 +41,4 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 
-#ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_check_acl(struct inode *, int);
-int shmem_acl_init(struct inode *, struct inode *);
-
-extern struct xattr_handler shmem_xattr_acl_access_handler;
-extern struct xattr_handler shmem_xattr_acl_default_handler;
-
-extern struct generic_acl_operations shmem_acl_ops;
-
-#else
-static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return 0;
-}
-#endif  /* CONFIG_TMPFS_POSIX_ACL */
-
 #endif
diff --git a/mm/Makefile b/mm/Makefile
index 82131d0f8d85..7a68d2ab5560 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
-obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_KSM) += ksm.o
diff --git a/mm/shmem.c b/mm/shmem.c
index 3cd32c2ea0a0..f8485062f3ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -41,6 +41,7 @@ static struct vfsmount *shm_mnt;
 
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
+#include <linux/posix_acl.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
 #include <linux/string.h>
@@ -809,7 +810,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 		error = inode_setattr(inode, attr);
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	if (!error && (attr->ia_valid & ATTR_MODE))
-		error = generic_acl_chmod(inode, &shmem_acl_ops);
+		error = generic_acl_chmod(inode);
 #endif
 	if (page)
 		page_cache_release(page);
@@ -1823,11 +1824,13 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 				return error;
 			}
 		}
-		error = shmem_acl_init(inode, dir);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+		error = generic_acl_init(inode, dir);
 		if (error) {
 			iput(inode);
 			return error;
 		}
+#endif
 		if (dir->i_mode & S_ISGID) {
 			inode->i_gid = dir->i_gid;
 			if (S_ISDIR(mode))
@@ -2074,8 +2077,8 @@ static struct xattr_handler shmem_xattr_security_handler = {
 };
 
 static struct xattr_handler *shmem_xattr_handlers[] = {
-	&shmem_xattr_acl_access_handler,
-	&shmem_xattr_acl_default_handler,
+	&generic_acl_access_handler,
+	&generic_acl_default_handler,
 	&shmem_xattr_security_handler,
 	NULL
 };
@@ -2454,7 +2457,7 @@ static const struct inode_operations shmem_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 
 };
@@ -2477,7 +2480,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
@@ -2488,7 +2491,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
deleted file mode 100644
index f8d5330ec0d7..000000000000
--- a/mm/shmem_acl.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * mm/shmem_acl.c
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
-
-#include <linux/fs.h>
-#include <linux/shmem_fs.h>
-#include <linux/xattr.h>
-#include <linux/generic_acl.h>
-
-/**
- * shmem_get_acl  -   generic_acl_operations->getacl() operation
- */
-static struct posix_acl *
-shmem_get_acl(struct inode *inode, int type)
-{
-	struct posix_acl *acl = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = posix_acl_dup(inode->i_acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = posix_acl_dup(inode->i_default_acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-/**
- * shmem_set_acl  -   generic_acl_operations->setacl() operation
- */
-static void
-shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-	struct posix_acl *free = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			free = inode->i_acl;
-			inode->i_acl = posix_acl_dup(acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			free = inode->i_default_acl;
-			inode->i_default_acl = posix_acl_dup(acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-	posix_acl_release(free);
-}
-
-struct generic_acl_operations shmem_acl_ops = {
-	.getacl = shmem_get_acl,
-	.setacl = shmem_set_acl,
-};
-
-static size_t
-shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
-		const char *name, size_t name_len, int type)
-{
-	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
-				type, list, list_size);
-}
-
-static int
-shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
-		     size_t size, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
-			       buffer, size);
-}
-
-static int
-shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
-		     size_t size, int flags, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
-			       value, size);
-}
-
-struct xattr_handler shmem_xattr_acl_access_handler = {
-	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.flags	= ACL_TYPE_ACCESS,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-struct xattr_handler shmem_xattr_acl_default_handler = {
-	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.flags	= ACL_TYPE_DEFAULT,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-/**
- * shmem_acl_init  -  Inizialize the acl(s) of a new inode
- */
-int
-shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return generic_acl_init(inode, dir, &shmem_acl_ops);
-}
-
-/**
- * shmem_check_acl  -  check_acl() callback for generic_permission()
- */
-int
-shmem_check_acl(struct inode *inode, int mask)
-{
-	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
-
-	if (acl) {
-		int error = posix_acl_permission(inode, acl, mask);
-		posix_acl_release(acl);
-		return error;
-	}
-	return -EAGAIN;
-}
-- 
cgit v1.2.3


From 1e431f5ce78f3ae8254d725060288b78ff74f086 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:53 +0100
Subject: cleanup blockdev_direct_IO locking

Currently the locking in blockdev_direct_IO is a mess, we have three different
locking types and very confusing checks for some of them.  The most
complicated one is DIO_OWN_LOCKING for reads, which happens to not actually be
used.

This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read case
is unused anyway, and the write side is almost identical to DIO_NO_LOCKING.
The difference is that DIO_NO_LOCKING always sets the create argument for
the get_blocks callback to zero, but we can easily move that to the actual
get_blocks callbacks.  There are four users of the DIO_NO_LOCKING mode:
gfs already ignores the create argument and thus is fine with the new
version, ocfs2 only errors out if create were ever set, and we can remove
this dead code now, the block device code only ever uses create for an
error message if we are fully beyond the device which can never happen,
and last but not least XFS will need the new behavour for writes.

Now we can replace the lock_type variable with a flags one, where no flag
means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first
flag.  Separate out the check for not allowing to fill holes into a separate
flag, although for now both flags always get set at the same time.

Also revamp the documentation of the locking scheme to actually make sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c              | 129 ++++++++++++++++++--------------------------
 fs/ocfs2/aops.c             |  34 ++----------
 fs/xfs/linux-2.6/xfs_aops.c |  20 +++----
 include/linux/fs.h          |  22 +++-----
 4 files changed, 71 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..7dde0df8e8b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
  *
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
  */
 
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
 	struct inode *inode;
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
-	int lock_type;			/* doesn't change */
+	int flags;			/* doesn't change */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -240,7 +233,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, transferred,
 			    dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
+
+	if (dio->flags & DIO_LOCKING)
 		/* lockdep: non-owner release */
 		up_read_non_owner(&dio->inode->i_alloc_sem);
 
@@ -515,21 +509,24 @@ static int get_more_blocks(struct dio *dio)
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
+		/*
+		 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+		 * forbid block creations: only overwrites are permitted.
+		 * We will return early to the caller once we see an
+		 * unmapped buffer head returned, and the caller will fall
+		 * back to buffered I/O.
+		 *
+		 * Otherwise the decision is left to the get_blocks method,
+		 * which may decide to handle it or also return an unmapped
+		 * buffer head.
+		 */
 		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_LOCKING) {
+		if (dio->flags & DIO_SKIP_HOLES) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
 				create = 0;
-		} else if (dio->lock_type == DIO_NO_LOCKING) {
-			create = 0;
 		}
 
-		/*
-		 * For writes inside i_size we forbid block creations: only
-		 * overwrites are permitted.  We fall back to buffered writes
-		 * at a higher level for inside-i_size block-instantiating
-		 * writes.
-		 */
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
@@ -1039,7 +1036,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
-	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
+	if (rw == READ && (dio->flags & DIO_LOCKING))
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
@@ -1086,30 +1083,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 /*
  * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
  *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
  *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- * For reads, i_mutex is not held on entry, but it is taken and dropped before
- * returning.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
  */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int dio_lock_type)
+	int flags)
 {
 	int seg;
 	size_t size;
@@ -1120,8 +1115,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	int release_i_mutex = 0;
-	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT_PLUG;
@@ -1156,43 +1149,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (!dio)
 		goto out;
 
-	/*
-	 * For block device access DIO_NO_LOCKING is used,
-	 *	neither readers nor writers do any locking at all
-	 * For regular files using DIO_LOCKING,
-	 *	readers need to grab i_mutex and i_alloc_sem
-	 *	writers need to grab i_alloc_sem only (i_mutex is already held)
-	 * For regular files using DIO_OWN_LOCKING,
-	 *	neither readers nor writers take any locks here
-	 */
-	dio->lock_type = dio_lock_type;
-	if (dio_lock_type != DIO_NO_LOCKING) {
+	dio->flags = flags;
+	if (dio->flags & DIO_LOCKING) {
 		/* watch out for a 0 len io from a tricksy fs */
 		if (rw == READ && end > offset) {
-			struct address_space *mapping;
+			struct address_space *mapping =
+					iocb->ki_filp->f_mapping;
 
-			mapping = iocb->ki_filp->f_mapping;
-			if (dio_lock_type != DIO_OWN_LOCKING) {
-				mutex_lock(&inode->i_mutex);
-				release_i_mutex = 1;
-			}
+			/* will be released by direct_io_worker */
+			mutex_lock(&inode->i_mutex);
 
 			retval = filemap_write_and_wait_range(mapping, offset,
 							      end - 1);
 			if (retval) {
+				mutex_unlock(&inode->i_mutex);
 				kfree(dio);
 				goto out;
 			}
-
-			if (dio_lock_type == DIO_OWN_LOCKING) {
-				mutex_unlock(&inode->i_mutex);
-				acquire_i_mutex = 1;
-			}
 		}
 
-		if (dio_lock_type == DIO_LOCKING)
-			/* lockdep: not the owner will release it */
-			down_read_non_owner(&inode->i_alloc_sem);
+		/*
+		 * Will be released at I/O completion, possibly in a
+		 * different thread.
+		 */
+		down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
@@ -1210,24 +1190,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 *
+	 * NOTE: filesystems with their own locking have to handle this
+	 * on their own.
 	 */
-	if (unlikely(retval < 0 && (rw & WRITE))) {
-		loff_t isize = i_size_read(inode);
-
-		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+	if (dio->flags & DIO_LOCKING) {
+		if (unlikely((rw & WRITE) && retval < 0)) {
+			loff_t isize = i_size_read(inode);
+			if (end > isize )
+				vmtruncate(inode, isize);
+		}
 	}
 
-	if (rw == READ && dio_lock_type == DIO_LOCKING)
-		release_i_mutex = 0;
-
 out:
-	if (release_i_mutex)
-		mutex_unlock(&inode->i_mutex);
-	else if (acquire_i_mutex)
-		mutex_lock(&inode->i_mutex);
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
  *
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  * 					fs_count, map_bh, dio->rw == WRITE);
+ *
+ * Note that we never bother to allocate blocks here, and thus ignore the
+ * create argument.
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 				     struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
 	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 
-	/*
-	 * Any write past EOF is not allowed because we'd be extending.
-	 */
-	if (create && (iblock + max_blocks) > inode_blocks) {
-		ret = -EIO;
-		goto bail;
-	}
-
 	/* This figures out the size of the next contiguous block, and
 	 * our logical offset */
 	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has a hole at block %llu\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			    (unsigned long long)iblock);
-		ret = -EROFS;
-		goto bail;
-	}
-
 	/* We should already CoW the refcounted extent. */
 	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	 */
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
 		map_bh(bh_result, inode->i_sb, p_blkno);
-	else {
-		/*
-		 * ocfs2_prepare_inode_for_write() should have caught
-		 * the case where we'd be filling a hole and triggered
-		 * a buffered write instead.
-		 */
-		if (create) {
-			ret = -EIO;
-			mlog_errno(ret);
-			goto bail;
-		}
-
+	else
 		clear_buffer_mapped(bh_result);
-	}
 
 	/* make sure we don't map more than max_blocks blocks here as
 	   that's all the kernel will handle at this point. */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
 
 	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
 
-	if (rw == WRITE) {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	} else {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	}
+	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
+					IOMAP_UNWRITTEN : IOMAP_READ);
+
+	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct);
 
 	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc23be4edde..7c8ff12d1995 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2263,9 +2263,11 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int lock_type);
 
 enum {
-	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-	DIO_NO_LOCKING,  /* bdev; no locking at all between buffered/direct */
-	DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
+	/* need locking between buffered and direct access */
+	DIO_LOCKING	= 0x01,
+
+	/* filesystem does not support filling holes */
+	DIO_SKIP_HOLES	= 0x02,
 };
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
@@ -2274,7 +2276,8 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_LOCKING);
+				    nr_segs, get_block, end_io,
+				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -2283,16 +2286,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
-	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_block_t get_block,
-	dio_iodone_t end_io)
-{
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From efbbd05a595343a413964ad85a2ad359b7b7efbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:40 +0100
Subject: sched: Add pre and post wakeup hooks

As will be apparent in the next patch, we need a pre wakeup hook
for sched_fair task migration, hence rename the post wakeup hook
and one pre wakeup.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.114746117@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 12 ++++++++----
 kernel/sched_rt.c     |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f38e81a..2c9fa1ccebff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1091,7 +1091,8 @@ struct sched_class {
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const struct cpumask *newmask);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297dc441ff96..6c571bdd5658 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2412,6 +2412,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 	p->state = TASK_WAKING;
+
+	if (p->sched_class->task_waking)
+		p->sched_class->task_waking(rq, p);
+
 	__task_rq_unlock(rq);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2475,8 +2479,8 @@ out_running:
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 
 	if (unlikely(rq->idle_stamp)) {
 		u64 delta = rq->clock - rq->idle_stamp;
@@ -2666,8 +2670,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 #endif
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea2828164e..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
  * If we are not running and we are not going to reschedule soon, we should
  * try to push tasks away now
  */
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
 	.post_schedule		= post_schedule_rt,
-	.task_wake_up		= task_wake_up_rt,
+	.task_woken		= task_woken_rt,
 	.switched_from		= switched_from_rt,
 #endif
 
-- 
cgit v1.2.3


From 88ec22d3edb72b261f8628226cd543589a6d5e1b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:41 +0100
Subject: sched: Remove the cfs_rq dependency from set_task_cpu()

In order to remove the cfs_rq dependency from set_task_cpu() we
need to ensure the task is cfs_rq invariant for all callsites.

The simple approach is to substract cfs_rq->min_vruntime from
se->vruntime on dequeue, and add cfs_rq->min_vruntime on
enqueue.

However, this has the downside of breaking FAIR_SLEEPERS since
we loose the old vruntime as we only maintain the relative
position.

To solve this, we observe that we only migrate runnable tasks,
we do this using deactivate_task(.sleep=0) and
activate_task(.wakeup=0), therefore we can restrain the
min_vruntime invariance to that state.

The only other case is wakeup balancing, since we want to
maintain the old vruntime we cannot make it relative on dequeue,
but since we don't migrate inactive tasks, we can do so right
before we activate it again.

This is where we need the new pre-wakeup hook, we need to call
this while still holding the old rq->lock. We could fold it into
->select_task_rq(), but since that has multiple callsites and
would obfuscate the locking requirements, that seems like a
fudge.

This leaves the fork() case, simply make sure that ->task_fork()
leaves the ->vruntime in a relative state.

This covers all cases where set_task_cpu() gets called, and
ensures it sees a relative vruntime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.191697025@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  6 +-----
 kernel/sched_fair.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9fa1ccebff..973b2b89f86d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,7 +1116,7 @@ struct sched_class {
 					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*moved_group) (struct task_struct *p);
+	void (*moved_group) (struct task_struct *p, int on_rq);
 #endif
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c571bdd5658..f92ce63edfff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
-	p->se.vruntime -= old_cfsrq->min_vruntime -
-					 new_cfsrq->min_vruntime;
 
 	__set_task_cpu(p, new_cpu);
 }
@@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->moved_group)
-		tsk->sched_class->moved_group(tsk);
+		tsk->sched_class->moved_group(tsk, on_rq);
 #endif
 
 	if (unlikely(running))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ec1d2715620c..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq, exec_clock, delta_exec);
 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+
 	curr->vruntime += delta_exec_weighted;
 	update_min_vruntime(cfs_rq);
 }
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	se->vruntime = vruntime;
 }
 
+#define ENQUEUE_WAKEUP	1
+#define ENQUEUE_MIGRATE 2
+
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	/*
+	 * Update the normalized vruntime before updating min_vruntime
+	 * through callig update_curr().
+	 */
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+		se->vruntime += cfs_rq->min_vruntime;
+
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
 	account_entity_enqueue(cfs_rq, se);
 
-	if (wakeup) {
+	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
 		enqueue_sleeper(cfs_rq, se);
 	}
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 		__dequeue_entity(cfs_rq, se);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
+
+	/*
+	 * Normalize the entity after updating the min_vruntime because the
+	 * update can refer to the ->curr item and we need to reflect this
+	 * movement in our normalized position.
+	 */
+	if (!sleep)
+		se->vruntime -= cfs_rq->min_vruntime;
 }
 
 /*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int flags = 0;
+
+	if (wakeup)
+		flags |= ENQUEUE_WAKEUP;
+	if (p->state == TASK_WAKING)
+		flags |= ENQUEUE_MIGRATE;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup);
-		wakeup = 1;
+		enqueue_entity(cfs_rq, se, flags);
+		flags = ENQUEUE_WAKEUP;
 	}
 
 	hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	se->vruntime -= cfs_rq->min_vruntime;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * effective_load() calculates the load change as seen from the root_task_group
@@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
 		resched_task(rq->curr);
 	}
 
+	se->vruntime -= cfs_rq->min_vruntime;
+
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
 	update_curr(cfs_rq);
-	place_entity(cfs_rq, &p->se, 1);
+	if (!on_rq)
+		place_entity(cfs_rq, &p->se, 1);
 }
 #endif
 
@@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
 	.move_one_task		= move_one_task_fair,
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
+
+	.task_waking		= task_waking_fair,
 #endif
 
 	.set_curr_task          = set_curr_task_fair,
-- 
cgit v1.2.3


From 70023de88c58a81a730ab4d13c51a30e537ec76e Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:28 +0800
Subject: ACPI: Add a generic API for _OSC -v2

v2->v1:
.improve debug info as suggedted by Bjorn,Kenji
.API is using uuid string as suggested by Alexey

Add an API to execute _OSC. A lot of devices can have this method, so add a
generic API.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |   9 ++++
 2 files changed, 131 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 741191524353..12240be58f27 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -344,6 +344,128 @@ bool acpi_bus_can_wakeup(acpi_handle handle)
 
 EXPORT_SYMBOL(acpi_bus_can_wakeup);
 
+static void acpi_print_osc_error(acpi_handle handle,
+	struct acpi_osc_context *context, char *error)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
+	int i;
+
+	if (ACPI_FAILURE(acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer)))
+		printk(KERN_DEBUG "%s\n", error);
+	else {
+		printk(KERN_DEBUG "%s:%s\n", (char *)buffer.pointer, error);
+		kfree(buffer.pointer);
+	}
+	printk(KERN_DEBUG"_OSC request data:");
+	for (i = 0; i < context->cap.length; i += sizeof(u32))
+		printk("%x ", *((u32 *)(context->cap.pointer + i)));
+	printk("\n");
+}
+
+static u8 hex_val(unsigned char c)
+{
+	return isdigit(c) ? c - '0' : toupper(c) - 'A' + 10;
+}
+
+static acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
+{
+	int i;
+	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
+		24, 26, 28, 30, 32, 34};
+
+	if (strlen(str) != 36)
+		return AE_BAD_PARAMETER;
+	for (i = 0; i < 36; i++) {
+		if (i == 8 || i == 13 || i == 18 || i == 23) {
+			if (str[i] != '-')
+				return AE_BAD_PARAMETER;
+		} else if (!isxdigit(str[i]))
+			return AE_BAD_PARAMETER;
+	}
+	for (i = 0; i < 16; i++) {
+		uuid[i] = hex_val(str[opc_map_to_uuid[i]]) << 4;
+		uuid[i] |= hex_val(str[opc_map_to_uuid[i] + 1]);
+	}
+	return AE_OK;
+}
+
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object in_params[4];
+	union acpi_object *out_obj;
+	u8 uuid[16];
+	u32 errors;
+
+	if (!context)
+		return AE_ERROR;
+	if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+		return AE_ERROR;
+	context->ret.length = ACPI_ALLOCATE_BUFFER;
+	context->ret.pointer = NULL;
+
+	/* Setting up input parameters */
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type 		= ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length 	= 16;
+	in_params[0].buffer.pointer	= uuid;
+	in_params[1].type 		= ACPI_TYPE_INTEGER;
+	in_params[1].integer.value 	= context->rev;
+	in_params[2].type 		= ACPI_TYPE_INTEGER;
+	in_params[2].integer.value	= context->cap.length/sizeof(u32);
+	in_params[3].type		= ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length 	= context->cap.length;
+	in_params[3].buffer.pointer 	= context->cap.pointer;
+
+	status = acpi_evaluate_object(handle, "_OSC", &input, &context->ret);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	/* return buffer should have the same length as cap buffer */
+	if (context->ret.length != context->cap.length)
+		return AE_NULL_OBJECT;
+
+	out_obj = context->ret.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		acpi_print_osc_error(handle, context,
+			"_OSC evaluation returned wrong type");
+		status = AE_TYPE;
+		goto out_kfree;
+	}
+	/* Need to ignore the bit0 in result code */
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		if (errors & OSC_REQUEST_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC request failed");
+		if (errors & OSC_INVALID_UUID_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid UUID");
+		if (errors & OSC_INVALID_REVISION_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid revision");
+		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+			if (((u32 *)context->cap.pointer)[OSC_QUERY_TYPE]
+			    & OSC_QUERY_ENABLE)
+				goto out_success;
+			status = AE_SUPPORT;
+			goto out_kfree;
+		}
+		status = AE_ERROR;
+		goto out_kfree;
+	}
+out_success:
+	return AE_OK;
+
+out_kfree:
+	kfree(context->ret.pointer);
+	context->ret.pointer = NULL;
+	return status;
+}
+EXPORT_SYMBOL(acpi_run_osc);
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..3247e09db20d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -253,6 +253,13 @@ void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
 
+struct acpi_osc_context {
+	char *uuid_str; /* uuid string */
+	int rev;
+	struct acpi_buffer cap; /* arg2/arg3 */
+	struct acpi_buffer ret; /* free by caller if success */
+};
+
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
@@ -265,6 +272,8 @@ void __init acpi_s4_no_nvs(void);
 #define OSC_INVALID_REVISION_ERROR	8
 #define OSC_CAPABILITIES_MASK_ERROR	16
 
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
+
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-- 
cgit v1.2.3


From 3a9622dc4659af44a8098a233f65c51e495ff0a5 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:50 +0800
Subject: ACPI: cleanup pci_root _OSC code.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_root.c | 76 +++++++++----------------------------------------
 include/linux/acpi.h    |  5 ++--
 2 files changed, 17 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 1af808171d46..101cce3681d1 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -202,72 +202,24 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
 		}
 }
 
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
-			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
 
 static acpi_status acpi_pci_run_osc(acpi_handle handle,
 				    const u32 *capbuf, u32 *retval)
 {
+	struct acpi_osc_context context = {
+		.uuid_str = pci_osc_uuid_str,
+		.rev = 1,
+		.cap.length = 12,
+		.cap.pointer = (void *)capbuf,
+	};
 	acpi_status status;
-	struct acpi_object_list input;
-	union acpi_object in_params[4];
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *out_obj;
-	u32 errors;
-
-	/* Setting up input parameters */
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type 		= ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= OSC_UUID;
-	in_params[1].type 		= ACPI_TYPE_INTEGER;
-	in_params[1].integer.value 	= 1;
-	in_params[2].type 		= ACPI_TYPE_INTEGER;
-	in_params[2].integer.value	= 3;
-	in_params[3].type		= ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length 	= 12;
-	in_params[3].buffer.pointer 	= (u8 *)capbuf;
-
-	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return status;
 
-	if (!output.length)
-		return AE_NULL_OBJECT;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
-		status = AE_TYPE;
-		goto out_kfree;
-	}
-	/* Need to ignore the bit0 in result code */
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		if (errors & OSC_REQUEST_ERROR)
-			printk(KERN_DEBUG "_OSC request failed\n");
-		if (errors & OSC_INVALID_UUID_ERROR)
-			printk(KERN_DEBUG "_OSC invalid UUID\n");
-		if (errors & OSC_INVALID_REVISION_ERROR)
-			printk(KERN_DEBUG "_OSC invalid revision\n");
-		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
-				goto out_success;
-			printk(KERN_DEBUG
-			       "Firmware did not grant requested _OSC control\n");
-			status = AE_SUPPORT;
-			goto out_kfree;
-		}
-		status = AE_ERROR;
-		goto out_kfree;
+	status = acpi_run_osc(handle, &context);
+	if (ACPI_SUCCESS(status)) {
+		*retval = *((u32 *)(context.ret.pointer + 8));
+		kfree(context.ret.pointer);
 	}
-out_success:
-	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
-	status = AE_OK;
-
-out_kfree:
-	kfree(output.pointer);
 	return status;
 }
 
@@ -277,10 +229,10 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
 	u32 support_set, result, capbuf[3];
 
 	/* do _OSC query for all possible controls */
-	support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+	support_set = root->osc_support_set | (flags & OSC_PCI_SUPPORT_MASKS);
 	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
 	capbuf[OSC_SUPPORT_TYPE] = support_set;
-	capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+	capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS;
 
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
@@ -427,7 +379,7 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 	if (ACPI_FAILURE(status))
 		return status;
 
-	control_req = (flags & OSC_CONTROL_MASKS);
+	control_req = (flags & OSC_PCI_CONTROL_MASKS);
 	if (!control_req)
 		return AE_TYPE;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3247e09db20d..535beecc37cf 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -263,7 +263,6 @@ struct acpi_osc_context {
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
-#define OSC_SUPPORT_MASKS		0x1f
 
 /* _OSC DW0 Definition */
 #define OSC_QUERY_ENABLE		1
@@ -274,12 +273,14 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
 #define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
 #define OSC_MSI_SUPPORT				16
+#define OSC_PCI_SUPPORT_MASKS			0x1f
 
 /* _OSC DW1 Definition (OS Control Fields) */
 #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
@@ -288,7 +289,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_PCI_EXPRESS_AER_CONTROL		8
 #define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
 
-#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
+#define OSC_PCI_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
 				OSC_SHPC_NATIVE_HP_CONTROL | 		\
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
-- 
cgit v1.2.3


From 3563ff964fdc36358cef0330936fdac28e65142a Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:05:05 +0800
Subject: ACPI: Add platform-wide _OSC support.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 26 ++++++++++++++++++++++++++
 include/linux/acpi.h |  7 +++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 12240be58f27..65f7e335f122 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -466,6 +466,30 @@ out_kfree:
 }
 EXPORT_SYMBOL(acpi_run_osc);
 
+static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
+static void acpi_bus_osc_support(void)
+{
+	u32 capbuf[2];
+	struct acpi_osc_context context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+	acpi_handle handle;
+
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
+#ifdef CONFIG_ACPI_PROCESSOR_AGGREGATOR
+	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PAD_SUPPORT;
+#endif
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
+		return;
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
+		kfree(context.ret.pointer);
+	/* do we need to check the returned cap? Sounds no */
+}
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
@@ -856,6 +880,8 @@ static int __init acpi_bus_init(void)
 	status = acpi_ec_ecdt_probe();
 	/* Ignore result. Not having an ECDT is not fatal. */
 
+	acpi_bus_osc_support();
+
 	status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 535beecc37cf..e11090d462d2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -273,6 +273,13 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* platform-wide _OSC bits */
+#define OSC_SB_PAD_SUPPORT		1
+#define OSC_SB_PPC_OST_SUPPORT		2
+#define OSC_SB_PR3_SUPPORT		4
+#define OSC_SB_CPUHP_OST_SUPPORT	8
+#define OSC_SB_APEI_SUPPORT		16
+
 /* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
-- 
cgit v1.2.3


From 2ee1abad73a12df5521cd3f017f081f1f684a361 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dgc@sgi.com>
Date: Tue, 24 Nov 2009 18:03:15 +0000
Subject: xfs: improve metadata I/O merging in the elevator

Change all async metadata buffers to use [READ|WRITE]_META I/O types
so that the I/O doesn't get issued immediately. This allows merging of
adjacent metadata requests but still prioritises them over bulk data.
This shows a 10-15% improvement in sequential create speed of small
files.

Don't include the log buffers in this classification - leave them as
sync types so they are issued immediately.

Signed-off-by: Dave Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 6 +++++-
 fs/xfs/linux-2.6/xfs_buf.h | 1 +
 fs/xfs/xfs_log.c           | 2 ++
 include/linux/fs.h         | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aac..162359b664ca 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
 	if (bp->b_flags & XBF_ORDERED) {
 		ASSERT(!(bp->b_flags & XBF_READ));
 		rw = WRITE_BARRIER;
-	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+	} else if (bp->b_flags & XBF_LOG_BUFFER) {
 		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
 		bp->b_flags &= ~_XBF_RUN_QUEUES;
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+		bp->b_flags &= ~_XBF_RUN_QUEUES;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
 	} else {
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE :
 		     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2a..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
 	XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
  	XBF_ORDERED = (1 << 11),    /* use ordered writes		   */
 	XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
+	XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log   */
 
 	/* flags used only as arguments to access routines */
 	XBF_LOCK = (1 << 14),       /* lock requested			   */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e3..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
+	bp->b_flags |= XBF_LOG_BUFFER;
 	/*
 	 * Do an ordered write for the log block.
 	 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t		*log,
 		XFS_BUF_ZEROFLAGS(bp);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
+		bp->b_flags |= XBF_LOG_BUFFER;
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
 			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b23a7018eb90..cf7fc8a7fe6a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -152,6 +152,7 @@ struct inodes_stat_t {
 #define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
 #define WRITE_ODIRECT_PLUG	(WRITE | (1 << BIO_RW_SYNCIO))
+#define WRITE_META	(WRITE | (1 << BIO_RW_META))
 #define SWRITE_SYNC_PLUG	\
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-- 
cgit v1.2.3


From 6e1415467614e854fee660ff6648bd10fa976e95 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 15 Dec 2009 19:27:45 +0000
Subject: NOMMU: Optimise away the {dac_,}mmap_min_addr tests

In NOMMU mode clamp dac_mmap_min_addr to zero to cause the tests on it to be
skipped by the compiler.  We do this as the minimum mmap address doesn't make
any sense in NOMMU mode.

mmap_min_addr and round_hint_to_min() can be discarded entirely in NOMMU mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 7 +++++++
 kernel/sysctl.c          | 2 ++
 mm/Kconfig               | 1 +
 security/Makefile        | 3 ++-
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..2c627d361c02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,8 +95,13 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+#ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
+#else
+#define dac_mmap_min_addr	0UL
+#endif
+
 /*
  * Values used in the task_security_ops calls
  */
@@ -121,6 +126,7 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+#ifdef CONFIG_MMU
 /*
  * If a hint addr is less than mmap_min_addr change hint to be as
  * low as possible but still greater than mmap_min_addr
@@ -135,6 +141,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 }
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 
 #ifdef CONFIG_SECURITY
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 45e4bef0012a..856a24eadf7e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
+#ifdef CONFIG_MMU
 	{
 		.procname	= "mmap_min_addr",
 		.data		= &dac_mmap_min_addr,
@@ -1221,6 +1222,7 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mmap_min_addr_handler,
 	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.procname	= "numa_zonelist_order",
diff --git a/mm/Kconfig b/mm/Kconfig
index 43ea8c3a2bbf..ee9f3e0f2b69 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -221,6 +221,7 @@ config KSM
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
+	depends on MMU
         default 4096
         help
 	  This is the portion of low virtual memory which should be protected
diff --git a/security/Makefile b/security/Makefile
index bb44e350c618..da20a193c8dd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,8 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o min_addr.o
+obj-y					+= commoncap.o
+obj-$(CONFIG_MMU)			+= min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
-- 
cgit v1.2.3


From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 23:25:50 +0100
Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion

The reiserfs lock -> inode mutex dependency gets inverted when we
relax the lock while walking to the tree.

To fix this, use a specialized version of reiserfs_mutex_lock_safe
that takes care of mutex subclasses. Then we can grab the inode
mutex with I_MUTEX_XATTR subclass without any reiserfs lock
dependency.

This fixes the following report:

[ INFO: possible circular locking dependency detected ]
2.6.32-06793-gf405425-dirty #2
-------------------------------------------------------
mv/18566 is trying to acquire lock:
 (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c1110708>] reiserfs_write_lock+0x28=
/0x40

but task is already holding lock:
 (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}:
       [<c104f723>] validate_chain+0xa23/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c11102b4>] reiserfs_for_each_xattr+0x84/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c109cc0b>] do_unlinkat+0xdb/0x160
       [<c109cca0>] sys_unlink+0x10/0x20
       [<c1002c50>] sysenter_do_call+0x12/0x36

-> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
       [<c104fc68>] validate_chain+0xf68/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c1110708>] reiserfs_write_lock+0x28/0x40
       [<c1103d6b>] search_by_key+0x1f7b/0x21b0
       [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
       [<c10e77f7>] reiserfs_find_entry+0x77/0x400
       [<c10e81e5>] reiserfs_lookup+0x85/0x130
       [<c109a144>] __lookup_hash+0xb4/0x110
       [<c109b763>] lookup_one_len+0xb3/0x100
       [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c10a1c4f>] dentry_iput+0x6f/0xf0
       [<c10a1d74>] d_kill+0x24/0x50
       [<c10a396b>] dput+0x5b/0x120
       [<c109ca89>] sys_renameat+0x1b9/0x230
       [<c109cb28>] sys_rename+0x28/0x30
       [<c1002c50>] sysenter_do_call+0x12/0x36

other info that might help us debug this:

2 locks held by mv/18566:
 #0:  (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [<c109b6ac>]
lock_rename+0xcc/0xd0
 #1:  (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

stack backtrace:
Pid: 18566, comm: mv Tainted: G         C 2.6.32-06793-gf405425-dirty #2
Call Trace:
 [<c134b252>] ? printk+0x18/0x1e
 [<c104e790>] print_circular_bug+0xc0/0xd0
 [<c104fc68>] validate_chain+0xf68/0xf70
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1050155>] __lock_acquire+0x4e5/0xa70
 [<c105075a>] lock_acquire+0x7a/0xa0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134b60a>] ? schedule+0x27a/0x440
 [<c1110708>] reiserfs_write_lock+0x28/0x40
 [<c1103d6b>] search_by_key+0x1f7b/0x21b0
 [<c1050176>] ? __lock_acquire+0x506/0xa70
 [<c1051267>] ? lock_release_non_nested+0x1e7/0x340
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c104e3ab>] ? trace_hardirqs_on+0xb/0x10
 [<c1042a55>] ? T.316+0x15/0x1a0
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
 [<c134bf2a>] ? __mutex_unlock_slowpath+0x9a/0x120
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c10e77f7>] reiserfs_find_entry+0x77/0x400
 [<c10e81e5>] reiserfs_lookup+0x85/0x130
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c109a144>] __lookup_hash+0xb4/0x110
 [<c109b763>] lookup_one_len+0xb3/0x100
 [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
 [<c110ffe0>] ? delete_one_xattr+0x0/0x1c0
 [<c1003342>] ? math_error+0x22/0x150
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
 [<c10a561f>] ? generic_delete_inode+0x5f/0x150
 [<c10ef4f0>] ? reiserfs_delete_inode+0x0/0x140
 [<c10a565c>] generic_delete_inode+0x9c/0x150
 [<c10a574d>] generic_drop_inode+0x3d/0x60
 [<c10a4667>] iput+0x47/0x50
 [<c10a1c4f>] dentry_iput+0x6f/0xf0
 [<c10a1d74>] d_kill+0x24/0x50
 [<c10a396b>] dput+0x5b/0x120
 [<c109ca89>] sys_renameat+0x1b9/0x230
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1042dde>] ? cpu_clock+0x4e/0x60
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c1041816>] ? up_read+0x16/0x30
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c109cb28>] sys_rename+0x28/0x30
 [<c1002c50>] sysenter_do_call+0x12/0x36

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 fs/reiserfs/xattr.c         | 3 ++-
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8891cd88a3f4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -243,7 +243,8 @@ static int reiserfs_for_each_xattr(struct inode *inode,
 		goto out_dir;
 	}
 
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+	reiserfs_mutex_lock_nested_safe(&dir->d_inode->i_mutex, I_MUTEX_XATTR,
+					inode->i_sb);
 	buf.xadir = dir;
 	err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
 	while ((err == 0 || err == -ENOSPC) && buf.count) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..4351b49e2b1e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From 329962503692b42d8088f31584e42d52db179d52 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 15 Dec 2009 17:59:02 -0800
Subject: x86: Fix checking of SRAT when node 0 ram is not from 0

Found one system that boot from socket1 instead of socket0, SRAT get rejected...

[    0.000000] SRAT: Node 1 PXM 0 0-a0000
[    0.000000] SRAT: Node 1 PXM 0 100000-80000000
[    0.000000] SRAT: Node 1 PXM 0 100000000-2080000000
[    0.000000] SRAT: Node 0 PXM 1 2080000000-4080000000
[    0.000000] SRAT: Node 2 PXM 2 4080000000-6080000000
[    0.000000] SRAT: Node 3 PXM 3 6080000000-8080000000
[    0.000000] SRAT: Node 4 PXM 4 8080000000-a080000000
[    0.000000] SRAT: Node 5 PXM 5 a080000000-c080000000
[    0.000000] SRAT: Node 6 PXM 6 c080000000-e080000000
[    0.000000] SRAT: Node 7 PXM 7 e080000000-10080000000
...
[    0.000000] NUMA: Allocated memnodemap from 500000 - 701040
[    0.000000] NUMA: Using 20 for the hash shift.
[    0.000000] Adding active range (0, 0x2080000, 0x4080000) 0 entries of 3200 used
[    0.000000] Adding active range (1, 0x0, 0x96) 1 entries of 3200 used
[    0.000000] Adding active range (1, 0x100, 0x7f750) 2 entries of 3200 used
[    0.000000] Adding active range (1, 0x100000, 0x2080000) 3 entries of 3200 used
[    0.000000] Adding active range (2, 0x4080000, 0x6080000) 4 entries of 3200 used
[    0.000000] Adding active range (3, 0x6080000, 0x8080000) 5 entries of 3200 used
[    0.000000] Adding active range (4, 0x8080000, 0xa080000) 6 entries of 3200 used
[    0.000000] Adding active range (5, 0xa080000, 0xc080000) 7 entries of 3200 used
[    0.000000] Adding active range (6, 0xc080000, 0xe080000) 8 entries of 3200 used
[    0.000000] Adding active range (7, 0xe080000, 0x10080000) 9 entries of 3200 used
[    0.000000] SRAT: PXMs only cover 917504MB of your 1048566MB e820 RAM. Not used.
[    0.000000] SRAT: SRAT not used.

the early_node_map is not sorted because node0 with non zero start come first.

so try to sort it right away after all regions are registered.

also fixs refression by 8716273c (x86: Export srat physical topology)

-v2: make it more solid to handle cross node case like node0 [0,4g), [8,12g) and node1 [4g, 8g), [12g, 16g)
-v3: update comments.

Reported-and-tested-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B2579D2.3010201@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/srat_32.c | 2 ++
 arch/x86/mm/srat_64.c | 4 +++-
 include/linux/mm.h    | 3 +++
 mm/page_alloc.c       | 4 ++--
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
 		e820_register_active_regions(chunk->nid, chunk->start_pfn,
 					     min(chunk->end_pfn, max_pfn));
 	}
+	/* for out of order entries in SRAT */
+	sort_node_map();
 
 	for_each_online_node(nid) {
 		unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d89075489664..a27124185fc1 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 		unsigned long s = nodes[i].start >> PAGE_SHIFT;
 		unsigned long e = nodes[i].end >> PAGE_SHIFT;
 		pxmram += e - s;
-		pxmram -= absent_pages_in_range(s, e);
+		pxmram -= __absent_pages_in_range(i, s, e);
 		if ((long)pxmram < 0)
 			pxmram = 0;
 	}
@@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 	for_each_node_mask(i, nodes_parsed)
 		e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 						nodes[i].end >> PAGE_SHIFT);
+	/* for out of order entries in SRAT */
+	sort_node_map();
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
 		return -1;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c395694f4d..20a2036f3a94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1022,6 +1022,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
+void sort_node_map(void);
+unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
+						unsigned long end_pfn);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
 extern void get_pfn_range_for_nid(unsigned int nid,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..873c86308b4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3573,7 +3573,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
  * then all holes in the requested range will be accounted for.
  */
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
 				unsigned long range_start_pfn,
 				unsigned long range_end_pfn)
 {
@@ -4102,7 +4102,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
 }
 
 /* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
 {
 	sort(early_node_map, (size_t)nr_nodemap_entries,
 			sizeof(struct node_active_region),
-- 
cgit v1.2.3


From a4636818f8e0991f32d9528f39cf4f3d6a7d30a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 Dec 2009 11:43:29 -0600
Subject: cpumask: rename tsk_cpumask to tsk_cpus_allowed

Noone uses this wrapper yet, and Ingo asked that it be kept consistent
with current task_struct usage.

(One user crept in via linux-next: fixed)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +-
 include/linux/sched.h                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a9df9441a9a2..f125e5c551c0 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1136,7 +1136,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
 		return -ENOMEM;
 
-	cpumask_copy(oldmask, tsk_cpumask(current));
+	cpumask_copy(oldmask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
 
 	if (smp_processor_id() != pol->cpu) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..4d7adb282bdd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1555,7 +1555,7 @@ struct task_struct {
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
+#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
-- 
cgit v1.2.3


From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 9 Dec 2009 17:52:13 +1100
Subject: PCI/cardbus: Add a fixup hook and fix powerpc

The cardbus code creates PCI devices without ever going through the
necessary fixup bits and pieces that normal PCI devices go through.

There's in fact a commented out call to pcibios_fixup_bus() in there,
it's commented because ... it doesn't work.

I could make pcibios_fixup_bus() do the right thing on powerpc easily
but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus
for which a weak empty implementation is provided by the PCI core.

This fixes cardbus on powerbooks and probably all other PowerPC
platforms which was broken completely for ever on some platforms and
since 2.6.31 on others such as PowerBooks when we made the DMA ops
mandatory (since those are setup by the fixups).

Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci-common.c | 13 +++++++++++++
 drivers/pci/pci.c                |  5 +++++
 drivers/pcmcia/cardbus.c         |  2 +-
 include/linux/pci.h              |  3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e8dfdbd9327a..cadbed679fbb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct dev_archdata *sd = &dev->dev.archdata;
 
+		/* Cardbus can call us to add new devices to a bus, so ignore
+		 * those who are already fully discovered
+		 */
+		if (dev->is_added)
+			continue;
+
 		/* Setup OF node pointer in archdata */
 		sd->of_node = pci_device_to_OF_node(dev);
 
@@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+	/* Now fixup devices on that bus */
+	pcibios_setup_bus_devices(bus);
+}
+
+
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
 	if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) &&
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d50522bf16b1..864e703cf737 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
 	return 1;
 }
 
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
 static int __init pci_setup(char *str)
 {
 	while (str) {
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index cdf50f3bc2df..d99f846451a3 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-/*	pcibios_fixup_bus(bus); */
+	pci_fixup_cardbus(bus); 
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf1e67080849..5da0690d9cee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
+/* Weak but can be overriden by arch */
+void pci_fixup_cardbus(struct pci_bus *);
+
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-- 
cgit v1.2.3


From 4f924ba5b5aaf1477daafeae779495d39549186d Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 17 Dec 2009 00:08:33 +0900
Subject: sony-laptop: add AVMode key mapping

Some models are equipped with an "AVMode" function key that sends
  sony-laptop: Unknown event: 0x100 0xa1
  sony-laptop: Unknown event: 0x100 0x21
for press and release respectively.

Cc: "Matthew W. S. Bell" <matthew@bells23.org.uk>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/sony-laptop.c | 4 ++++
 include/linux/sonypi.h             | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a2a742c8ff7e..9710f70040ba 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -232,6 +232,7 @@ static int sony_laptop_input_index[] = {
 	56,	/* 69 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	57,	/* 70 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	-1,	/* 71 SONYPI_EVENT_BRIGHTNESS_PRESSED */
+	58,	/* 72 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -293,6 +294,7 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_F15,	/* 55 SONYPI_EVENT_SETTINGKEY_PRESSED */
 	KEY_VOLUMEUP,	/* 56 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	KEY_VOLUMEDOWN,	/* 57 SONYPI_EVENT_VOLUME_DEC_PRESSED */
+	KEY_MEDIA,	/* 58 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -890,6 +892,8 @@ static struct sony_nc_event sony_100_events[] = {
 	{ 0x0C, SONYPI_EVENT_FNKEY_RELEASED },
 	{ 0x9f, SONYPI_EVENT_CD_EJECT_PRESSED },
 	{ 0x1f, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa1, SONYPI_EVENT_MEDIA_PRESSED },
+	{ 0x21, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0, 0 },
 };
 
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 34c4475ac4a2..4f95c1aac2fd 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -111,6 +111,7 @@
 #define SONYPI_EVENT_VOLUME_INC_PRESSED		69
 #define SONYPI_EVENT_VOLUME_DEC_PRESSED		70
 #define SONYPI_EVENT_BRIGHTNESS_PRESSED		71
+#define SONYPI_EVENT_MEDIA_PRESSED		72
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit v1.2.3


From 234da7bcdc7aaa935846534c3b726dbc79a9cdd5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 20:21:05 +0100
Subject: sched: Teach might_sleep() about preemptible RCU

In practice, it is harmless to voluntarily sleep in a
rcu_read_lock() section if we are running under preempt rcu, but
it is illegal if we build a kernel running non-preemptable rcu.

Currently, might_sleep() doesn't notice sleepable operations
under rcu_read_lock() sections if we are running under
preemptable rcu because preempt_count() is left untouched after
rcu_read_lock() in this case. But we want developers who test
their changes under such config to notice the "sleeping while
atomic" issues.

So we add rcu_read_lock_nesting to prempt_count() in
might_sleep() checks.

[ v2: Handle rcu-tiny ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1260991265-8451-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  5 +++++
 include/linux/rcutree.h | 11 +++++++++++
 kernel/sched.c          |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c4ba9a78721e..96cc307ed9f4 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -101,4 +101,9 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c93eee5911b0..8044b1b94333 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,6 +45,12 @@ extern void __rcu_read_unlock(void);
 extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
+/*
+ * Defined as macro as it is a very low level header
+ * included from areas that don't even know about current
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock(void)
@@ -63,6 +69,11 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock_bh(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index af7dfa74e6bb..7be88a7be047 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9682,7 +9682,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = preempt_count() & ~PREEMPT_ACTIVE;
+	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
 
 	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
-- 
cgit v1.2.3


From 5b74ed4729ad2b2017453add68104a83206caefb Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 16 Dec 2009 10:09:45 -0500
Subject: perf events: Remove unused perf_counter.h header file

Since nothing includes the <linux/perf_counter.h> file and it's
also not exported to user space, remove it.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <alpine.LFD.2.00.0912161007430.8198@localhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 444 -------------------------------------------
 1 file changed, 444 deletions(-)
 delete mode 100644 include/linux/perf_counter.h

(limited to 'include/linux')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index e3fb25606706..000000000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- *  NOTE: this file will be removed in a future kernel release, it is
- *  provided as a courtesy copy of user-space code that relies on the
- *  old (pre-rename) symbols and constants.
- *
- *  Performance events:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
-	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- *	{ u64		value;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		id;           } && PERF_FORMAT_ID
- *	} && !PERF_FORMAT_GROUP
- *
- *	{ u64		nr;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		value;
- *	    { u64	id;           } && PERF_FORMAT_ID
- *	  }		cntr[nr];
- *	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				id;
-	 *	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, tid;
-	 *
-	 *	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 *	#
-	 *	# The RAW record below is opaque data wrt the ABI
-	 *	#
-	 *	# That is, the ABI doesn't make any promises wrt to
-	 *	# the stability of its content, it may vary depending
-	 *	# on event, hardware, kernel version and phase of
-	 *	# the moon.
-	 *	#
-	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 *	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
-#define PERF_FLAG_FD_OUTPUT		(1U << 1)
-
-/*
- * In case some app still references the old symbols:
- */
-
-#define __NR_perf_counter_open		__NR_perf_event_open
-
-#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
-#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
-
-#endif /* _LINUX_PERF_COUNTER_H */
-- 
cgit v1.2.3


From 27f37e4bfed803be338dcc78845d4a67eefb40a0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 25 Sep 2009 09:39:26 +0200
Subject: regulator: add driver for MAX8660/8661

Tested with a MX25-based custom board.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   7 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8660.c       | 510 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8660.h |  57 +++++
 4 files changed, 575 insertions(+)
 create mode 100644 drivers/regulator/max8660.c
 create mode 100644 include/linux/regulator/max8660.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 7cfdd65bebb4..9e0aa14dc6af 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -69,6 +69,13 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8660
+	tristate "Maxim 8660/8661 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8660/8661 voltage output
+	  regulator via I2C bus.
+
 config REGULATOR_TWL4030
 	bool "TI TWL4030/TWL5030/TWL6030/TPS695x0 PMIC"
 	depends on TWL4030_CORE
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 9ae3cc44e668..12285e41beec 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
new file mode 100644
index 000000000000..acc2fb7b6087
--- /dev/null
+++ b/drivers/regulator/max8660.c
@@ -0,0 +1,510 @@
+/*
+ * max8660.c  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * based on max1586.c and wm8400-regulator.c
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Some info:
+ *
+ * Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8660-MAX8661.pdf
+ *
+ * This chip is a bit nasty because it is a write-only device. Thus, the driver
+ * uses shadow registers to keep track of its values. The main problem appears
+ * to be the initialization: When Linux boots up, we cannot know if the chip is
+ * in the default state or not, so we would have to pass such information in
+ * platform_data. As this adds a bit of complexity to the driver, this is left
+ * out for now until it is really needed.
+ *
+ * [A|S|M]DTV1 registers are currently not used, but [A|S|M]DTV2.
+ *
+ * If the driver is feature complete, it might be worth to check if one set of
+ * functions for V3-V7 is sufficient. For maximum flexibility during
+ * development, they are separated for now.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8660.h>
+
+#define MAX8660_DCDC_MIN_UV	 725000
+#define MAX8660_DCDC_MAX_UV	1800000
+#define MAX8660_DCDC_STEP	  25000
+#define MAX8660_DCDC_MAX_SEL	0x2b
+
+#define MAX8660_LDO5_MIN_UV	1700000
+#define MAX8660_LDO5_MAX_UV	2000000
+#define MAX8660_LDO5_STEP	  25000
+#define MAX8660_LDO5_MAX_SEL	0x0c
+
+#define MAX8660_LDO67_MIN_UV	1800000
+#define MAX8660_LDO67_MAX_UV	3300000
+#define MAX8660_LDO67_STEP	 100000
+#define MAX8660_LDO67_MAX_SEL	0x0f
+
+enum {
+	MAX8660_OVER1,
+	MAX8660_OVER2,
+	MAX8660_VCC1,
+	MAX8660_ADTV1,
+	MAX8660_ADTV2,
+	MAX8660_SDTV1,
+	MAX8660_SDTV2,
+	MAX8660_MDTV1,
+	MAX8660_MDTV2,
+	MAX8660_L12VCR,
+	MAX8660_FPWM,
+	MAX8660_N_REGS,	/* not a real register */
+};
+
+struct max8660 {
+	struct i2c_client *client;
+	u8 shadow_regs[MAX8660_N_REGS];		/* as chip is write only */
+	struct regulator_dev *rdev[];
+};
+
+static int max8660_write(struct max8660 *max8660, u8 reg, u8 mask, u8 val)
+{
+	static const u8 max8660_addresses[MAX8660_N_REGS] =
+	  { 0x10, 0x12, 0x20, 0x23, 0x24, 0x29, 0x2a, 0x32, 0x33, 0x39, 0x80 };
+
+	int ret;
+	u8 reg_val = (max8660->shadow_regs[reg] & mask) | val;
+	dev_vdbg(&max8660->client->dev, "Writing reg %02x with %02x\n",
+			max8660_addresses[reg], reg_val);
+
+	ret = i2c_smbus_write_byte_data(max8660->client,
+			max8660_addresses[reg], reg_val);
+	if (ret == 0)
+		max8660->shadow_regs[reg] = reg_val;
+
+	return ret;
+}
+
+
+/*
+ * DCDC functions
+ */
+
+static int max8660_dcdc_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER1];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_dcdc_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return max8660_write(max8660, MAX8660_OVER1, 0xff, bit);
+}
+
+static int max8660_dcdc_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? ~1 : ~4;
+	return max8660_write(max8660, MAX8660_OVER1, mask, 0);
+}
+
+static int max8660_dcdc_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_DCDC_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	u8 selector = max8660->shadow_regs[reg];
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg, selector, bits;
+	int ret;
+
+	if (min_uV < MAX8660_DCDC_MIN_UV || min_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_DCDC_MIN_UV || max_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_DCDC_MIN_UV - MAX8660_DCDC_STEP + 1))
+			/ MAX8660_DCDC_STEP;
+
+	ret = max8660_dcdc_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	ret = max8660_write(max8660, reg, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	bits = (rdev_get_id(rdev) == MAX8660_V3) ? 0x03 : 0x30;
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, bits);
+}
+
+static struct regulator_ops max8660_dcdc_ops = {
+	.is_enabled = max8660_dcdc_is_enabled,
+	.list_voltage = max8660_dcdc_list,
+	.set_voltage = max8660_dcdc_set,
+	.get_voltage = max8660_dcdc_get,
+};
+
+
+/*
+ * LDO5 functions
+ */
+
+static int max8660_ldo5_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO5_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector = max8660->shadow_regs[MAX8660_MDTV2];
+
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO5_MIN_UV || min_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO5_MIN_UV || max_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO5_MIN_UV - MAX8660_LDO5_STEP + 1))
+			/ MAX8660_LDO5_STEP;
+	ret = max8660_ldo5_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	ret = max8660_write(max8660, MAX8660_MDTV2, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, 0xc0);
+}
+
+static struct regulator_ops max8660_ldo5_ops = {
+	.list_voltage = max8660_ldo5_list,
+	.set_voltage = max8660_ldo5_set,
+	.get_voltage = max8660_ldo5_get,
+};
+
+
+/*
+ * LDO67 functions
+ */
+
+static int max8660_ldo67_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER2];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_ldo67_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return max8660_write(max8660, MAX8660_OVER2, 0xff, bit);
+}
+
+static int max8660_ldo67_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? ~2 : ~4;
+	return max8660_write(max8660, MAX8660_OVER2, mask, 0);
+}
+
+static int max8660_ldo67_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO67_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 shift = (rdev_get_id(rdev) == MAX8660_V6) ? 0 : 4;
+	u8 selector = (max8660->shadow_regs[MAX8660_L12VCR] >> shift) & 0xf;
+
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO67_MIN_UV || min_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO67_MIN_UV || max_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO67_MIN_UV - MAX8660_LDO67_STEP + 1))
+			/ MAX8660_LDO67_STEP;
+
+	ret = max8660_ldo67_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	if (rdev_get_id(rdev) == MAX8660_V6)
+		return max8660_write(max8660, MAX8660_L12VCR, 0xf0, selector);
+	else
+		return max8660_write(max8660, MAX8660_L12VCR, 0x0f, selector << 4);
+}
+
+static struct regulator_ops max8660_ldo67_ops = {
+	.is_enabled = max8660_ldo67_is_enabled,
+	.enable = max8660_ldo67_enable,
+	.disable = max8660_ldo67_disable,
+	.list_voltage = max8660_ldo67_list,
+	.get_voltage = max8660_ldo67_get,
+	.set_voltage = max8660_ldo67_set,
+};
+
+static struct regulator_desc max8660_reg[] = {
+	{
+		.name = "V3(DCDC)",
+		.id = MAX8660_V3,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V4(DCDC)",
+		.id = MAX8660_V4,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V5(LDO)",
+		.id = MAX8660_V5,
+		.ops = &max8660_ldo5_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO5_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V6(LDO)",
+		.id = MAX8660_V6,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V7(LDO)",
+		.id = MAX8660_V7,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int max8660_probe(struct i2c_client *client,
+			      const struct i2c_device_id *i2c_id)
+{
+	struct regulator_dev **rdev;
+	struct max8660_platform_data *pdata = client->dev.platform_data;
+	struct max8660 *max8660;
+	int boot_on, i, id, ret = -EINVAL;
+
+	if (pdata->num_subdevs > MAX8660_V_END) {
+		dev_err(&client->dev, "Too much regulators found!\n");
+		goto out;
+	}
+
+	max8660 = kzalloc(sizeof(struct max8660) +
+			sizeof(struct regulator_dev *) * MAX8660_V_END,
+			GFP_KERNEL);
+	if (!max8660) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	max8660->client = client;
+	rdev = max8660->rdev;
+
+	if (pdata->en34_is_high) {
+		/* Simulate always on */
+		max8660->shadow_regs[MAX8660_OVER1] = 5;
+	} else {
+		/* Otherwise devices can be toggled via software */
+		max8660_dcdc_ops.enable = max8660_dcdc_enable;
+		max8660_dcdc_ops.disable = max8660_dcdc_disable;
+	}
+
+	/*
+	 * First, set up shadow registers to prevent glitches. As some
+	 * registers are shared between regulators, everything must be properly
+	 * set up for all regulators in advance.
+	 */
+	max8660->shadow_regs[MAX8660_ADTV1] =
+		max8660->shadow_regs[MAX8660_ADTV2] =
+		max8660->shadow_regs[MAX8660_SDTV1] =
+		max8660->shadow_regs[MAX8660_SDTV2] = 0x1b;
+	max8660->shadow_regs[MAX8660_MDTV1] =
+		max8660->shadow_regs[MAX8660_MDTV2] = 0x04;
+
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		if (!pdata->subdevs[i].platform_data)
+			goto err_free;
+
+		boot_on = pdata->subdevs[i].platform_data->constraints.boot_on;
+
+		switch (pdata->subdevs[i].id) {
+		case MAX8660_V3:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 1;
+			break;
+
+		case MAX8660_V4:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 4;
+			break;
+
+		case MAX8660_V5:
+			break;
+
+		case MAX8660_V6:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 2;
+			break;
+
+		case MAX8660_V7:
+			if (!strcmp(i2c_id->name, "max8661")) {
+				dev_err(&client->dev, "Regulator not on this chip!\n");
+				goto err_free;
+			}
+
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 4;
+			break;
+
+		default:
+			dev_err(&client->dev, "invalid regulator %s\n",
+				 pdata->subdevs[i].name);
+			goto err_free;
+		}
+	}
+
+	/* Finally register devices */
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		id = pdata->subdevs[i].id;
+
+		rdev[i] = regulator_register(&max8660_reg[id], &client->dev,
+					     pdata->subdevs[i].platform_data,
+					     max8660);
+		if (IS_ERR(rdev[i])) {
+			ret = PTR_ERR(rdev[i]);
+			dev_err(&client->dev, "failed to register %s\n",
+				max8660_reg[id].name);
+			goto err_unregister;
+		}
+	}
+
+	i2c_set_clientdata(client, rdev);
+	dev_info(&client->dev, "Maxim 8660/8661 regulator driver loaded\n");
+	return 0;
+
+err_unregister:
+	while (--i >= 0)
+		regulator_unregister(rdev[i]);
+err_free:
+	kfree(max8660);
+out:
+	return ret;
+}
+
+static int max8660_remove(struct i2c_client *client)
+{
+	struct regulator_dev **rdev = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < MAX8660_V_END; i++)
+		if (rdev[i])
+			regulator_unregister(rdev[i]);
+	kfree(rdev);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8660_id[] = {
+	{ "max8660", 0 },
+	{ "max8661", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8660_id);
+
+static struct i2c_driver max8660_driver = {
+	.probe = max8660_probe,
+	.remove = max8660_remove,
+	.driver		= {
+		.name	= "max8660",
+	},
+	.id_table	= max8660_id,
+};
+
+static int __init max8660_init(void)
+{
+	return i2c_add_driver(&max8660_driver);
+}
+subsys_initcall(max8660_init);
+
+static void __exit max8660_exit(void)
+{
+	i2c_del_driver(&max8660_driver);
+}
+module_exit(max8660_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8660/8661 voltage regulator driver");
+MODULE_AUTHOR("Wolfram Sang");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/max8660.h b/include/linux/regulator/max8660.h
new file mode 100644
index 000000000000..9936763621c7
--- /dev/null
+++ b/include/linux/regulator/max8660.h
@@ -0,0 +1,57 @@
+/*
+ * max8660.h  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8660_H
+#define __LINUX_REGULATOR_MAX8660_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8660_V3,
+	MAX8660_V4,
+	MAX8660_V5,
+	MAX8660_V6,
+	MAX8660_V7,
+	MAX8660_V_END,
+};
+
+/**
+ * max8660_subdev_data - regulator subdev data
+ * @id: regulator id
+ * @name: regulator name
+ * @platform_data: regulator init data
+ */
+struct max8660_subdev_data {
+	int				id;
+	char				*name;
+	struct regulator_init_data	*platform_data;
+};
+
+/**
+ * max8660_platform_data - platform data for max8660
+ * @num_subdevs: number of regulators used
+ * @subdevs: pointer to regulators used
+ * @en34_is_high: if EN34 is driven high, regulators cannot be en-/disabled.
+ */
+struct max8660_platform_data {
+	int num_subdevs;
+	struct max8660_subdev_data *subdevs;
+	unsigned en34_is_high:1;
+};
+#endif
-- 
cgit v1.2.3


From e24a04c44cf312e88b50006a91ad7ffc1c0d97a5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Sep 2009 08:47:11 -0700
Subject: regulator: Implement WM831x BuckWise DC-DC convertor DVS support

The BuckWise DC-DC convertors in WM831x devices support switching to
a second output voltage using the logic level on one of the device
pins. This is intended to allow rapid voltage switching for uses like
cpufreq, replacing the I2C or SPI write used to configure the voltage
of the regulator with a much faster GPIO status change.

This is implemented by keeping the DVS voltage configured as the
maximum voltage permitted for the regulator. If a request is made
for the maximum voltage then the GPIO is used to switch to the DVS
voltage, otherwise the normal ON voltage is updated and used. This
follows the idiom used by most cpufreq drivers, which drop the
minimum voltage as the core frequency is dropped but use a constant
maximum - raising the voltage should normally be fast, but lowering
it may be slower.

Configuration of the DVS MFP on the device should be done externally,
for example via OTP.

Support is present in the hardware for monitoring the status of the
transition using a second GPIO. This is not currently implemented
but platform data is provided for it - the driver currently assumes
that the device will be configured to transition immediately - but
platform data is provided to reduce merge issues once it is.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/wm831x-dcdc.c  | 207 +++++++++++++++++++++++++++++++++++----
 include/linux/mfd/wm831x/pdata.h |  17 ++++
 2 files changed, 206 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 2eefc1a0cf08..0a6577577e8d 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -19,6 +19,8 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/gpio.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/regulator.h>
@@ -39,6 +41,7 @@
 #define WM831X_DCDC_CONTROL_2     1
 #define WM831X_DCDC_ON_CONFIG     2
 #define WM831X_DCDC_SLEEP_CONTROL 3
+#define WM831X_DCDC_DVS_CONTROL   4
 
 /*
  * Shared
@@ -50,6 +53,10 @@ struct wm831x_dcdc {
 	int base;
 	struct wm831x *wm831x;
 	struct regulator_dev *regulator;
+	int dvs_gpio;
+	int dvs_gpio_state;
+	int on_vsel;
+	int dvs_vsel;
 };
 
 static int wm831x_dcdc_is_enabled(struct regulator_dev *rdev)
@@ -240,11 +247,9 @@ static int wm831x_buckv_list_voltage(struct regulator_dev *rdev,
 	return -EINVAL;
 }
 
-static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
-					 int min_uV, int max_uV)
+static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
 {
-	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
 	u16 vsel;
 
 	if (min_uV < 600000)
@@ -257,39 +262,126 @@ static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
 	if (wm831x_buckv_list_voltage(rdev, vsel) > max_uV)
 		return -EINVAL;
 
-	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	return vsel;
+}
+
+static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
+{
+	u16 vsel;
+
+	if (max_uV < 600000 || max_uV > 1800000)
+		return -EINVAL;
+
+	vsel = ((max_uV - 600000) / 12500) + 8;
+
+	if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV ||
+	    wm831x_buckv_list_voltage(rdev, vsel) < max_uV)
+		return -EINVAL;
+
+	return vsel;
+}
+
+static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+
+	if (state == dcdc->dvs_gpio_state)
+		return 0;
+
+	dcdc->dvs_gpio_state = state;
+	gpio_set_value(dcdc->dvs_gpio, state);
+
+	/* Should wait for DVS state change to be asserted if we have
+	 * a GPIO for it, for now assume the device is configured
+	 * for the fastest possible transition.
+	 */
+
+	return 0;
 }
 
 static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
-				     int min_uV, int max_uV)
+				    int min_uV, int max_uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	struct wm831x *wm831x = dcdc->wm831x;
+	int on_reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int dvs_reg = dcdc->base + WM831X_DCDC_DVS_CONTROL;
+	int vsel, ret;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0)
+		return vsel;
+
+	/* If this value is already set then do a GPIO update if we can */
+	if (dcdc->dvs_gpio && dcdc->on_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 0);
+
+	if (dcdc->dvs_gpio && dcdc->dvs_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 1);
+
+	/* Always set the ON status to the minimum voltage */
+	ret = wm831x_set_bits(wm831x, on_reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	if (ret < 0)
+		return ret;
+	dcdc->on_vsel = vsel;
+
+	if (!dcdc->dvs_gpio)
+		return ret;
+
+	/* Kick the voltage transition now */
+	ret = wm831x_buckv_set_dvs(rdev, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Set the high voltage as the DVS voltage.  This is optimised
+	 * for CPUfreq usage, most processors will keep the maximum
+	 * voltage constant and lower the minimum with the frequency. */
+	vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0) {
+		/* This should never happen - at worst the same vsel
+		 * should be chosen */
+		WARN_ON(vsel < 0);
+		return 0;
+	}
+
+	/* Don't bother if it's the same VSEL we're already using */
+	if (vsel == dcdc->on_vsel)
+		return 0;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, min_uV, max_uV);
+	ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel);
+	if (ret == 0)
+		dcdc->dvs_vsel = vsel;
+	else
+		dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n",
+			 ret);
+
+	return 0;
 }
 
 static int wm831x_buckv_set_suspend_voltage(struct regulator_dev *rdev,
-					     int uV)
+					    int uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
 	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+	int vsel;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, uV, uV);
+	if (vsel < 0)
+		return vsel;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, uV, uV);
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_SLP_VSEL_MASK, vsel);
 }
 
 static int wm831x_buckv_get_voltage(struct regulator_dev *rdev)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
-	int val;
 
-	val = wm831x_reg_read(wm831x, reg);
-	if (val < 0)
-		return val;
-
-	return wm831x_buckv_list_voltage(rdev, val & WM831X_DC1_ON_VSEL_MASK);
+	if (dcdc->dvs_gpio && dcdc->dvs_gpio_state)
+		return wm831x_buckv_list_voltage(rdev, dcdc->dvs_vsel);
+	else
+		return wm831x_buckv_list_voltage(rdev, dcdc->on_vsel);
 }
 
 /* Current limit options */
@@ -346,6 +438,64 @@ static struct regulator_ops wm831x_buckv_ops = {
 	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
 };
 
+/*
+ * Set up DVS control.  We just log errors since we can still run
+ * (with reduced performance) if we fail.
+ */
+static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
+					    struct wm831x_buckv_pdata *pdata)
+{
+	struct wm831x *wm831x = dcdc->wm831x;
+	int ret;
+	u16 ctrl;
+
+	if (!pdata || !pdata->dvs_gpio)
+		return;
+
+	switch (pdata->dvs_control_src) {
+	case 1:
+		ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	case 2:
+		ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
+			pdata->dvs_control_src, dcdc->name);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
+			      WM831X_DC1_DVS_SRC_MASK, ctrl);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	/* gpiolib won't let us read the GPIO status so pick the higher
+	 * of the two existing voltages so we take it as platform data.
+	 */
+	dcdc->dvs_gpio_state = pdata->dvs_init_state;
+
+	ret = gpio_direction_output(pdata->dvs_gpio, dcdc->dvs_gpio_state);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to enable %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		gpio_free(pdata->dvs_gpio);
+		return;
+	}
+
+	dcdc->dvs_gpio = pdata->dvs_gpio;
+}
+
 static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
@@ -384,6 +534,23 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 	dcdc->desc.ops = &wm831x_buckv_ops;
 	dcdc->desc.owner = THIS_MODULE;
 
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read ON VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
+
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->dvs_vsel = ret & WM831X_DC1_DVS_VSEL_MASK;
+
+	if (pdata->dcdc[id])
+		wm831x_buckv_dvs_init(dcdc, pdata->dcdc[id]->driver_data);
+
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
 					     pdata->dcdc[id], dcdc);
 	if (IS_ERR(dcdc->regulator)) {
@@ -422,6 +589,8 @@ err_uv:
 err_regulator:
 	regulator_unregister(dcdc->regulator);
 err:
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 	return ret;
 }
@@ -434,6 +603,8 @@ static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 
 	return 0;
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 415c228743d5..fd322aca33ba 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -41,6 +41,23 @@ struct wm831x_battery_pdata {
 	int timeout;        /** Charge cycle timeout, in minutes */
 };
 
+/**
+ * Configuration for the WM831x DC-DC BuckWise convertors.  This
+ * should be passed as driver_data in the regulator_init_data.
+ *
+ * Currently all the configuration is for the fast DVS switching
+ * support of the devices.  This allows MFPs on the device to be
+ * configured as an input to switch between two output voltages,
+ * allowing voltage transitions without the expense of an access over
+ * I2C or SPI buses.
+ */
+struct wm831x_buckv_pdata {
+	int dvs_gpio;        /** CPU GPIO to use for DVS switching */
+	int dvs_control_src; /** Hardware DVS source to use (1 or 2) */
+	int dvs_init_state;  /** DVS state to expect on startup */
+	int dvs_state_gpio;  /** CPU GPIO to use for monitoring status */
+};
+
 /* Sources for status LED configuration.  Values are register values
  * plus 1 to allow for a zero default for preserve.
  */
-- 
cgit v1.2.3


From 638f85c54f4fed0f8f1fbc23745a8f334112e892 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 22 Oct 2009 16:31:33 +0100
Subject: regulator: Handle regulators without suspend mode configuration

Since some regulators in the system may not support suspend mode
configuration we need to allow some regulators to have a missing
suspend mode configuration. Do this by requiring that disabled
regulators are explicitly flagged and then skip over regulators
that have no state specified.

Try to avoid surprises by warning the if we could set the state
but no configuration is provided.  This also ensures that an all
zeros configuration generates a warning rather than silently
disabling the regulator.

Reported-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 25 ++++++++++++++++++++++---
 include/linux/regulator/machine.h |  6 +++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7d0c0d7d90ca..2dab0d9e11f5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -581,10 +581,29 @@ static int suspend_set_state(struct regulator_dev *rdev,
 	struct regulator_state *rstate)
 {
 	int ret = 0;
+	bool can_set_state;
 
-	/* enable & disable are mandatory for suspend control */
-	if (!rdev->desc->ops->set_suspend_enable ||
-		!rdev->desc->ops->set_suspend_disable) {
+	can_set_state = rdev->desc->ops->set_suspend_enable &&
+		rdev->desc->ops->set_suspend_disable;
+
+	/* If we have no suspend mode configration don't set anything;
+	 * only warn if the driver actually makes the suspend mode
+	 * configurable.
+	 */
+	if (!rstate->enabled && !rstate->disabled) {
+		if (can_set_state)
+			printk(KERN_WARNING "%s: No configuration for %s\n",
+			       __func__, rdev_get_name(rdev));
+		return 0;
+	}
+
+	if (rstate->enabled && rstate->disabled) {
+		printk(KERN_ERR "%s: invalid configuration for %s\n",
+		       __func__, rdev_get_name(rdev));
+		return -EINVAL;
+	}
+
+	if (!can_set_state) {
 		printk(KERN_ERR "%s: no way to set suspend state\n",
 			__func__);
 		return -EINVAL;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 87f5f176d4ef..234a8476cba8 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -43,16 +43,20 @@ struct regulator;
 /**
  * struct regulator_state - regulator state during low power system states
  *
- * This describes a regulators state during a system wide low power state.
+ * This describes a regulators state during a system wide low power
+ * state.  One of enabled or disabled must be set for the
+ * configuration to be applied.
  *
  * @uV: Operating voltage during suspend.
  * @mode: Operating mode during suspend.
  * @enabled: Enabled during suspend.
+ * @disabled: Disabled during suspend.
  */
 struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
+	int disabled; /* is the regulator disbled in this suspend state */
 };
 
 /**
-- 
cgit v1.2.3


From b56daf13eb77ee24f48f0bb34c2492f46a432ec4 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lrg@slimlogic.co.uk>
Date: Wed, 11 Nov 2009 14:16:10 +0000
Subject: regulator: consumer.h - fix build when consumer.h is #included first.

consumer.h requires device.h for stand alone build.

Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 490c5b37b6d7..030d92255c7a 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -35,6 +35,8 @@
 #ifndef __LINUX_REGULATOR_CONSUMER_H_
 #define __LINUX_REGULATOR_CONSUMER_H_
 
+#include <linux/device.h>
+
 /*
  * Regulator operating modes.
  *
-- 
cgit v1.2.3


From d4cc6a2eee98faebf2c7d3ebc4b35541c1d47d21 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Mon, 7 Dec 2009 15:08:13 +0100
Subject: leds: Add LED class driver for regulator driven LEDs.

This driver provides an interface for controlling LEDs (or vibrators)
connected to PMICs for which there is a regulator framework driver.

This driver can be used, for instance, to control vibrator on all Motorola EZX
phones using the pcap-regulator driver services.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/leds/Kconfig           |   6 +
 drivers/leds/Makefile          |   1 +
 drivers/leds/leds-regulator.c  | 242 +++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-regulator.h |  46 ++++++++
 4 files changed, 295 insertions(+)
 create mode 100644 drivers/leds/leds-regulator.c
 create mode 100644 include/linux/leds-regulator.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index f12a99676628..8a0e1ec95e4a 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -229,6 +229,12 @@ config LEDS_PWM
 	help
 	  This option enables support for pwm driven LEDs
 
+config LEDS_REGULATOR
+	tristate "REGULATOR driven LED support"
+	depends on LEDS_CLASS && REGULATOR
+	help
+	  This option enables support for regulator driven LEDs.
+
 config LEDS_BD2802
 	tristate "LED driver for BD2802 RGB LED"
 	depends on LEDS_CLASS && I2C
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 176f0c674751..9e63869d7c0d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
 obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
 obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
+obj-$(CONFIG_LEDS_REGULATOR)		+= leds-regulator.o
 obj-$(CONFIG_LEDS_INTEL_SS4200)		+= leds-ss4200.o
 obj-$(CONFIG_LEDS_LT3593)		+= leds-lt3593.o
 obj-$(CONFIG_LEDS_ADP5520)		+= leds-adp5520.o
diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c
new file mode 100644
index 000000000000..7f00de3ef922
--- /dev/null
+++ b/drivers/leds/leds-regulator.c
@@ -0,0 +1,242 @@
+/*
+ * leds-regulator.c - LED class driver for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * Inspired by leds-wm8350 driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <linux/leds.h>
+#include <linux/leds-regulator.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#define to_regulator_led(led_cdev) \
+	container_of(led_cdev, struct regulator_led, cdev)
+
+struct regulator_led {
+	struct led_classdev cdev;
+	enum led_brightness value;
+	int enabled;
+	struct mutex mutex;
+	struct work_struct work;
+
+	struct regulator *vcc;
+};
+
+static inline int led_regulator_get_max_brightness(struct regulator *supply)
+{
+	int ret;
+	int voltage = regulator_list_voltage(supply, 0);
+
+	if (voltage <= 0)
+		return 1;
+
+	/* even if regulator can't change voltages,
+	 * we still assume it can change status
+	 * and the LED can be turned on and off.
+	 */
+	ret = regulator_set_voltage(supply, voltage, voltage);
+	if (ret < 0)
+		return 1;
+
+	return regulator_count_voltages(supply);
+}
+
+static int led_regulator_get_voltage(struct regulator *supply,
+		enum led_brightness brightness)
+{
+	if (brightness == 0)
+		return -EINVAL;
+
+	return regulator_list_voltage(supply, brightness - 1);
+}
+
+
+static void regulator_led_enable(struct regulator_led *led)
+{
+	int ret;
+
+	if (led->enabled)
+		return;
+
+	ret = regulator_enable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to enable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 1;
+}
+
+static void regulator_led_disable(struct regulator_led *led)
+{
+	int ret;
+
+	if (!led->enabled)
+		return;
+
+	ret = regulator_disable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to disable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 0;
+}
+
+static void regulator_led_set_value(struct regulator_led *led)
+{
+	int voltage;
+	int ret;
+
+	mutex_lock(&led->mutex);
+
+	if (led->value == LED_OFF) {
+		regulator_led_disable(led);
+		goto out;
+	}
+
+	if (led->cdev.max_brightness > 1) {
+		voltage = led_regulator_get_voltage(led->vcc, led->value);
+		dev_dbg(led->cdev.dev, "brightness: %d voltage: %d\n",
+				led->value, voltage);
+
+		ret = regulator_set_voltage(led->vcc, voltage, voltage);
+		if (ret != 0)
+			dev_err(led->cdev.dev, "Failed to set voltage %d: %d\n",
+				voltage, ret);
+	}
+
+	regulator_led_enable(led);
+
+out:
+	mutex_unlock(&led->mutex);
+}
+
+static void led_work(struct work_struct *work)
+{
+	struct regulator_led *led;
+
+	led = container_of(work, struct regulator_led, work);
+	regulator_led_set_value(led);
+}
+
+static void regulator_led_brightness_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct regulator_led *led = to_regulator_led(led_cdev);
+
+	led->value = value;
+	schedule_work(&led->work);
+}
+
+static int __devinit regulator_led_probe(struct platform_device *pdev)
+{
+	struct led_regulator_platform_data *pdata = pdev->dev.platform_data;
+	struct regulator_led *led;
+	struct regulator *vcc;
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	vcc = regulator_get_exclusive(&pdev->dev, "vled");
+	if (IS_ERR(vcc)) {
+		dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
+		return PTR_ERR(vcc);
+	}
+
+	led = kzalloc(sizeof(*led), GFP_KERNEL);
+	if (led == NULL) {
+		ret = -ENOMEM;
+		goto err_vcc;
+	}
+
+	led->cdev.max_brightness = led_regulator_get_max_brightness(vcc);
+	if (pdata->brightness > led->cdev.max_brightness) {
+		dev_err(&pdev->dev, "Invalid default brightness %d\n",
+				pdata->brightness);
+		ret = -EINVAL;
+		goto err_led;
+	}
+	led->value = pdata->brightness;
+
+	led->cdev.brightness_set = regulator_led_brightness_set;
+	led->cdev.name = pdata->name;
+	led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	led->vcc = vcc;
+
+	mutex_init(&led->mutex);
+	INIT_WORK(&led->work, led_work);
+
+	platform_set_drvdata(pdev, led);
+
+	ret = led_classdev_register(&pdev->dev, &led->cdev);
+	if (ret < 0) {
+		cancel_work_sync(&led->work);
+		goto err_led;
+	}
+
+	/* to expose the default value to userspace */
+	led->cdev.brightness = led->value;
+
+	/* Set the default led status */
+	regulator_led_set_value(led);
+
+	return 0;
+
+err_led:
+	kfree(led);
+err_vcc:
+	regulator_put(vcc);
+	return ret;
+}
+
+static int __devexit regulator_led_remove(struct platform_device *pdev)
+{
+	struct regulator_led *led = platform_get_drvdata(pdev);
+
+	led_classdev_unregister(&led->cdev);
+	cancel_work_sync(&led->work);
+	regulator_led_disable(led);
+	regulator_put(led->vcc);
+	kfree(led);
+	return 0;
+}
+
+static struct platform_driver regulator_led_driver = {
+	.driver = {
+		   .name  = "leds-regulator",
+		   .owner = THIS_MODULE,
+		   },
+	.probe  = regulator_led_probe,
+	.remove = __devexit_p(regulator_led_remove),
+};
+
+static int __init regulator_led_init(void)
+{
+	return platform_driver_register(&regulator_led_driver);
+}
+module_init(regulator_led_init);
+
+static void __exit regulator_led_exit(void)
+{
+	platform_driver_unregister(&regulator_led_driver);
+}
+module_exit(regulator_led_exit);
+
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("Regulator driven LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:leds-regulator");
diff --git a/include/linux/leds-regulator.h b/include/linux/leds-regulator.h
new file mode 100644
index 000000000000..5a8eb389aab8
--- /dev/null
+++ b/include/linux/leds-regulator.h
@@ -0,0 +1,46 @@
+/*
+ * leds-regulator.h - platform data structure for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_LEDS_REGULATOR_H
+#define __LINUX_LEDS_REGULATOR_H
+
+/*
+ * Use "vled" as supply id when declaring the regulator consumer:
+ *
+ * static struct regulator_consumer_supply pcap_regulator_VVIB_consumers [] = {
+ * 	{ .dev_name = "leds-regulator.0", supply = "vled" },
+ * };
+ *
+ * If you have several regulator driven LEDs, you can append a numerical id to
+ * .dev_name as done above, and use the same id when declaring the platform
+ * device:
+ *
+ * static struct led_regulator_platform_data a780_vibrator_data = {
+ * 	.name   = "a780::vibrator",
+ * };
+ *
+ * static struct platform_device a780_vibrator = {
+ * 	.name = "leds-regulator",
+ * 	.id   = 0,
+ * 	.dev  = {
+ * 		.platform_data = &a780_vibrator_data,
+ * 	},
+ * };
+ */
+
+#include <linux/leds.h>
+
+struct led_regulator_platform_data {
+	char *name;                     /* LED name as expected by LED class */
+	enum led_brightness brightness; /* initial brightness value */
+};
+
+#endif /* __LINUX_LEDS_REGULATOR_H */
-- 
cgit v1.2.3


From 9695fff8f84d7ab849139750036e443b85804edd Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sat, 28 Nov 2009 12:55:51 +0100
Subject: leds: leds-pca9532.h- indent with tabs, not spaces

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-pca9532.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index 96eea90f01a8..f158eb1149aa 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -32,7 +32,7 @@ struct pca9532_led {
 	struct i2c_client *client;
 	char *name;
 	struct led_classdev ldev;
-       struct work_struct work;
+	struct work_struct work;
 	enum pca9532_type type;
 	enum pca9532_state state;
 };
-- 
cgit v1.2.3


From 1998111582f5d726ca4dbf9d68935d9e7c994374 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sun, 29 Nov 2009 13:12:21 +0100
Subject: leds: leds-lp3944.h - remove unneeded includes

These were needed in the first version of the driver because we used to expose
workqueue and led class details in the header file, now we don't.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-lp3944.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-lp3944.h b/include/linux/leds-lp3944.h
index afc9f9fd70f5..2618aa9063bc 100644
--- a/include/linux/leds-lp3944.h
+++ b/include/linux/leds-lp3944.h
@@ -12,9 +12,6 @@
 #ifndef __LINUX_LEDS_LP3944_H
 #define __LINUX_LEDS_LP3944_H
 
-#include <linux/leds.h>
-#include <linux/workqueue.h>
-
 #define LP3944_LED0 0
 #define LP3944_LED1 1
 #define LP3944_LED2 2
-- 
cgit v1.2.3


From cfc3899fcd0b3b990b29d3d33f75f4edf715e7d1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 10 Nov 2009 17:20:40 +0000
Subject: backlight: Pass device through notify callback in the pwm driver

Add the device to the notify callback's arguments in the PWM backlight
driver. This brings the notify callback into line with the other
callbacks defined by this driver.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/pwm_bl.c | 9 ++++++---
 include/linux/pwm_backlight.h    | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index df9e0b32cf39..9d2ec2a1cce8 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -22,8 +22,10 @@
 
 struct pwm_bl_data {
 	struct pwm_device	*pwm;
+	struct device		*dev;
 	unsigned int		period;
-	int			(*notify)(int brightness);
+	int			(*notify)(struct device *,
+					  int brightness);
 };
 
 static int pwm_backlight_update_status(struct backlight_device *bl)
@@ -39,7 +41,7 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
 		brightness = 0;
 
 	if (pb->notify)
-		brightness = pb->notify(brightness);
+		brightness = pb->notify(pb->dev, brightness);
 
 	if (brightness == 0) {
 		pwm_config(pb->pwm, 0, pb->period);
@@ -88,6 +90,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
 	pb->period = data->pwm_period_ns;
 	pb->notify = data->notify;
+	pb->dev = &pdev->dev;
 
 	pb->pwm = pwm_request(data->pwm_id, "backlight");
 	if (IS_ERR(pb->pwm)) {
@@ -146,7 +149,7 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
 	struct pwm_bl_data *pb = dev_get_drvdata(&bl->dev);
 
 	if (pb->notify)
-		pb->notify(0);
+		pb->notify(pb->dev, 0);
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 	return 0;
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 7a9754c96775..01b3d759f1fc 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -10,7 +10,7 @@ struct platform_pwm_backlight_data {
 	unsigned int dft_brightness;
 	unsigned int pwm_period_ns;
 	int (*init)(struct device *dev);
-	int (*notify)(int brightness);
+	int (*notify)(struct device *dev, int brightness);
 	void (*exit)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From 733421516b42c44b9e21f1793c430cc801ef8324 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:27 +0100
Subject: sched: Move TASK_STATE_TO_CHAR_STR near the TASK_state bits

So that we don't keep forgetting about it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.815779372@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 973b2b89f86d..c28ed1b1d7c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,6 +193,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
@@ -2595,8 +2597,6 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
-
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 44d90df6b757c59651ddd55f1a84f28132b50d29 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:28 +0100
Subject: sched: Add missing state chars to TASK_STATE_TO_CHAR_STR

We grew 3 new task states since the last time someone touched
it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.892737686@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c28ed1b1d7c2..94858df38a87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,7 +193,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-- 
cgit v1.2.3


From e1781538cf5c870ab696e9b8f0a5c498d3900f2f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:30 +0100
Subject: sched: Assert task state bits at build time

Since everybody is lazy and prone to forgetting things, make the
compiler help us a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121830.060186433@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       | 18 ++++++++++--------
 include/linux/sched.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 96361e8fa3a8..f560325c444f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,14 +134,14 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char *task_state_array[] = {
-	"R (running)",		/*  0 */
-	"S (sleeping)",		/*  1 */
-	"D (disk sleep)",	/*  2 */
-	"T (stopped)",		/*  4 */
-	"t (tracing stop)",	/*  8 */
-	"Z (zombie)",		/* 16 */
-	"X (dead)",		/* 32 */
-	"x (dead)",		/* 64 */
+	"R (running)",		/*   0 */
+	"S (sleeping)",		/*   1 */
+	"D (disk sleep)",	/*   2 */
+	"T (stopped)",		/*   4 */
+	"t (tracing stop)",	/*   8 */
+	"Z (zombie)",		/*  16 */
+	"X (dead)",		/*  32 */
+	"x (dead)",		/*  64 */
 	"K (wakekill)",		/* 128 */
 	"W (waking)",		/* 256 */
 };
@@ -151,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
 	const char **p = &task_state_array[0];
 
+	BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
 	while (state) {
 		p++;
 		state >>= 1;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94858df38a87..37543876ddf5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -192,9 +192,13 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
+#define TASK_STATE_MAX		512
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
+extern char ___assert_task_state[1 - 2*!!(
+		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-- 
cgit v1.2.3


From e24c745272072fd2abe55209f1949b7b7ee602a7 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 14 Dec 2009 14:20:22 -0800
Subject: spi: controller driver for Designware SPI core

Driver for the Designware SPI core, it supports multipul interfaces like
PCI/APB etc.  User can use "dw_apb_ssi_db.pdf" from Synopsys as HW
datasheet.

[randy.dunlap@oracle.com: fix build]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig        |  10 +
 drivers/spi/Makefile       |   2 +
 drivers/spi/dw_spi.c       | 944 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/spi/dw_spi_pci.c   | 169 ++++++++
 include/linux/spi/dw_spi.h | 212 ++++++++++
 5 files changed, 1337 insertions(+)
 create mode 100644 drivers/spi/dw_spi.c
 create mode 100644 drivers/spi/dw_spi_pci.c
 create mode 100644 include/linux/spi/dw_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 07e5453f7b18..d7c1741c4c5b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -292,6 +292,16 @@ config SPI_NUC900
 # Add new SPI master controllers in alphabetical order above this line
 #
 
+config SPI_DESIGNWARE
+	bool "DesignWare SPI controller core support"
+	depends on SPI_MASTER
+	help
+	  general driver for SPI controller core from DesignWare
+
+config SPI_DW_PCI
+	tristate "PCI interface driver for DW SPI core"
+	depends on SPI_DESIGNWARE && PCI
+
 #
 # There are lots of SPI device types, with sensors and memory
 # being probably the most widely used ones.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ed8c1675b52f..a909e39f7e7c 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,8 @@ obj-$(CONFIG_SPI_BFIN)			+= spi_bfin5xx.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
+obj-$(CONFIG_SPI_DESIGNWARE)		+= dw_spi.o
+obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_pci.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
new file mode 100644
index 000000000000..31620fae77be
--- /dev/null
+++ b/drivers/spi/dw_spi.c
@@ -0,0 +1,944 @@
+/*
+ * dw_spi.c - Designware SPI core controller driver (refer pxa2xx_spi.c)
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+
+#define START_STATE	((void *)0)
+#define RUNNING_STATE	((void *)1)
+#define DONE_STATE	((void *)2)
+#define ERROR_STATE	((void *)-1)
+
+#define QUEUE_RUNNING	0
+#define QUEUE_STOPPED	1
+
+#define MRST_SPI_DEASSERT	0
+#define MRST_SPI_ASSERT		1
+
+/* Slave spi_dev related */
+struct chip_data {
+	u16 cr0;
+	u8 cs;			/* chip select pin */
+	u8 n_bytes;		/* current is a 1/2/4 byte op */
+	u8 tmode;		/* TR/TO/RO/EEPROM */
+	u8 type;		/* SPI/SSP/MicroWire */
+
+	u8 poll_mode;		/* 1 means use poll mode */
+
+	u32 dma_width;
+	u32 rx_threshold;
+	u32 tx_threshold;
+	u8 enable_dma;
+	u8 bits_per_word;
+	u16 clk_div;		/* baud rate divider */
+	u32 speed_hz;		/* baud rate */
+	int (*write)(struct dw_spi *dws);
+	int (*read)(struct dw_spi *dws);
+	void (*cs_control)(u32 command);
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int spi_show_regs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+#define SPI_REGS_BUFSIZE	1024
+static ssize_t  spi_show_regs(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct dw_spi *dws;
+	char *buf;
+	u32 len = 0;
+	ssize_t ret;
+
+	dws = file->private_data;
+
+	buf = kzalloc(SPI_REGS_BUFSIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"MRST SPI0 registers:\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL0: \t\t0x%08x\n", dw_readl(dws, ctrl0));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL1: \t\t0x%08x\n", dw_readl(dws, ctrl1));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SSIENR: \t0x%08x\n", dw_readl(dws, ssienr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SER: \t\t0x%08x\n", dw_readl(dws, ser));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"BAUDR: \t\t0x%08x\n", dw_readl(dws, baudr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFTLR: \t0x%08x\n", dw_readl(dws, txfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFTLR: \t0x%08x\n", dw_readl(dws, rxfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFLR: \t\t0x%08x\n", dw_readl(dws, txflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFLR: \t\t0x%08x\n", dw_readl(dws, rxflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SR: \t\t0x%08x\n", dw_readl(dws, sr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"IMR: \t\t0x%08x\n", dw_readl(dws, imr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"ISR: \t\t0x%08x\n", dw_readl(dws, isr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMACR: \t\t0x%08x\n", dw_readl(dws, dmacr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMATDLR: \t0x%08x\n", dw_readl(dws, dmatdlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMARDLR: \t0x%08x\n", dw_readl(dws, dmardlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+
+	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations mrst_spi_regs_ops = {
+	.owner		= THIS_MODULE,
+	.open		= spi_show_regs_open,
+	.read		= spi_show_regs,
+};
+
+static int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+	dws->debugfs = debugfs_create_dir("mrst_spi", NULL);
+	if (!dws->debugfs)
+		return -ENOMEM;
+
+	debugfs_create_file("registers", S_IFREG | S_IRUGO,
+		dws->debugfs, (void *)dws, &mrst_spi_regs_ops);
+	return 0;
+}
+
+static void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+	if (dws->debugfs)
+		debugfs_remove_recursive(dws->debugfs);
+}
+
+#else
+static inline int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+}
+
+static inline void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static void wait_till_not_busy(struct dw_spi *dws)
+{
+	unsigned long end = jiffies + usecs_to_jiffies(1000);
+
+	while (time_before(jiffies, end)) {
+		if (!(dw_readw(dws, sr) & SR_BUSY))
+			return;
+	}
+	dev_err(&dws->master->dev,
+		"DW SPI: Stutus keeps busy for 1000us after a read/write!\n");
+}
+
+static void flush(struct dw_spi *dws)
+{
+	while (dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		dw_readw(dws, dr);
+
+	wait_till_not_busy(dws);
+}
+
+static void null_cs_control(u32 command)
+{
+}
+
+static int null_writer(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+	dw_writew(dws, dr, 0);
+	dws->tx += n_bytes;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int null_reader(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		dw_readw(dws, dr);
+		dws->rx += n_bytes;
+	}
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u8_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u8 *)(dws->tx));
+	++dws->tx;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u8_reader(struct dw_spi *dws)
+{
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		*(u8 *)(dws->rx) = dw_readw(dws, dr);
+		++dws->rx;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u16_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u16 *)(dws->tx));
+	dws->tx += 2;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u16_reader(struct dw_spi *dws)
+{
+	u16 temp;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		temp = dw_readw(dws, dr);
+		*(u16 *)(dws->rx) = temp;
+		dws->rx += 2;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static void *next_transfer(struct dw_spi *dws)
+{
+	struct spi_message *msg = dws->cur_msg;
+	struct spi_transfer *trans = dws->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		dws->cur_transfer =
+			list_entry(trans->transfer_list.next,
+					struct spi_transfer,
+					transfer_list);
+		return RUNNING_STATE;
+	} else
+		return DONE_STATE;
+}
+
+/*
+ * Note: first step is the protocol driver prepares
+ * a dma-capable memory, and this func just need translate
+ * the virt addr to physical
+ */
+static int map_dma_buffers(struct dw_spi *dws)
+{
+	if (!dws->cur_msg->is_dma_mapped || !dws->dma_inited
+		|| !dws->cur_chip->enable_dma)
+		return 0;
+
+	if (dws->cur_transfer->tx_dma)
+		dws->tx_dma = dws->cur_transfer->tx_dma;
+
+	if (dws->cur_transfer->rx_dma)
+		dws->rx_dma = dws->cur_transfer->rx_dma;
+
+	return 1;
+}
+
+/* Caller already set message->status; dma and pio irqs are blocked */
+static void giveback(struct dw_spi *dws)
+{
+	struct spi_transfer *last_transfer;
+	unsigned long flags;
+	struct spi_message *msg;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	msg = dws->cur_msg;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->prev_chip = dws->cur_chip;
+	dws->cur_chip = NULL;
+	dws->dma_mapped = 0;
+	queue_work(dws->workqueue, &dws->pump_messages);
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	last_transfer = list_entry(msg->transfers.prev,
+					struct spi_transfer,
+					transfer_list);
+
+	if (!last_transfer->cs_change)
+		dws->cs_control(MRST_SPI_DEASSERT);
+
+	msg->state = NULL;
+	if (msg->complete)
+		msg->complete(msg->context);
+}
+
+static void int_error_stop(struct dw_spi *dws, const char *msg)
+{
+	/* Stop and reset hw */
+	flush(dws);
+	spi_enable_chip(dws, 0);
+
+	dev_err(&dws->master->dev, "%s\n", msg);
+	dws->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&dws->pump_transfers);
+}
+
+static void transfer_complete(struct dw_spi *dws)
+{
+	/* Update total byte transfered return count actual bytes read */
+	dws->cur_msg->actual_length += dws->len;
+
+	/* Move to next transfer */
+	dws->cur_msg->state = next_transfer(dws);
+
+	/* Handle end of message */
+	if (dws->cur_msg->state == DONE_STATE) {
+		dws->cur_msg->status = 0;
+		giveback(dws);
+	} else
+		tasklet_schedule(&dws->pump_transfers);
+}
+
+static irqreturn_t interrupt_transfer(struct dw_spi *dws)
+{
+	u16 irq_status, irq_mask = 0x3f;
+
+	irq_status = dw_readw(dws, isr) & irq_mask;
+	/* Error handling */
+	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
+		dw_readw(dws, txoicr);
+		dw_readw(dws, rxoicr);
+		dw_readw(dws, rxuicr);
+		int_error_stop(dws, "interrupt_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
+
+	/* INT comes from tx */
+	if (dws->tx && (irq_status & SPI_INT_TXEI)) {
+		while (dws->tx < dws->tx_end)
+			dws->write(dws);
+
+		if (dws->tx == dws->tx_end) {
+			spi_mask_intr(dws, SPI_INT_TXEI);
+			transfer_complete(dws);
+		}
+	}
+
+	/* INT comes from rx */
+	if (dws->rx && (irq_status & SPI_INT_RXFI)) {
+		if (dws->read(dws))
+			transfer_complete(dws);
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dw_spi_irq(int irq, void *dev_id)
+{
+	struct dw_spi *dws = dev_id;
+
+	if (!dws->cur_msg) {
+		spi_mask_intr(dws, SPI_INT_TXEI);
+		/* Never fail */
+		return IRQ_HANDLED;
+	}
+
+	return dws->transfer_handler(dws);
+}
+
+/* Must be called inside pump_transfers() */
+static void poll_transfer(struct dw_spi *dws)
+{
+	if (dws->tx) {
+		while (dws->write(dws))
+			dws->read(dws);
+	}
+
+	dws->read(dws);
+	transfer_complete(dws);
+}
+
+static void dma_transfer(struct dw_spi *dws, int cs_change)
+{
+}
+
+static void pump_transfers(unsigned long data)
+{
+	struct dw_spi *dws = (struct dw_spi *)data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct spi_device *spi = NULL;
+	struct chip_data *chip = NULL;
+	u8 bits = 0;
+	u8 imask = 0;
+	u8 cs_change = 0;
+	u16 clk_div = 0;
+	u32 speed = 0;
+	u32 cr0 = 0;
+
+	/* Get current state information */
+	message = dws->cur_msg;
+	transfer = dws->cur_transfer;
+	chip = dws->cur_chip;
+	spi = message->spi;
+
+	if (message->state == ERROR_STATE) {
+		message->status = -EIO;
+		goto early_exit;
+	}
+
+	/* Handle end of message */
+	if (message->state == DONE_STATE) {
+		message->status = 0;
+		goto early_exit;
+	}
+
+	/* Delay if requested at end of transfer*/
+	if (message->state == RUNNING_STATE) {
+		previous = list_entry(transfer->transfer_list.prev,
+					struct spi_transfer,
+					transfer_list);
+		if (previous->delay_usecs)
+			udelay(previous->delay_usecs);
+	}
+
+	dws->n_bytes = chip->n_bytes;
+	dws->dma_width = chip->dma_width;
+	dws->cs_control = chip->cs_control;
+
+	dws->rx_dma = transfer->rx_dma;
+	dws->tx_dma = transfer->tx_dma;
+	dws->tx = (void *)transfer->tx_buf;
+	dws->tx_end = dws->tx + transfer->len;
+	dws->rx = transfer->rx_buf;
+	dws->rx_end = dws->rx + transfer->len;
+	dws->write = dws->tx ? chip->write : null_writer;
+	dws->read = dws->rx ? chip->read : null_reader;
+	dws->cs_change = transfer->cs_change;
+	dws->len = dws->cur_transfer->len;
+	if (chip != dws->prev_chip)
+		cs_change = 1;
+
+	cr0 = chip->cr0;
+
+	/* Handle per transfer options for bpw and speed */
+	if (transfer->speed_hz) {
+		speed = chip->speed_hz;
+
+		if (transfer->speed_hz != speed) {
+			speed = transfer->speed_hz;
+			if (speed > dws->max_freq) {
+				printk(KERN_ERR "MRST SPI0: unsupported"
+					"freq: %dHz\n", speed);
+				message->status = -EIO;
+				goto early_exit;
+			}
+
+			/* clk_div doesn't support odd number */
+			clk_div = dws->max_freq / speed;
+			clk_div = (clk_div >> 1) << 1;
+
+			chip->speed_hz = speed;
+			chip->clk_div = clk_div;
+		}
+	}
+	if (transfer->bits_per_word) {
+		bits = transfer->bits_per_word;
+
+		switch (bits) {
+		case 8:
+			dws->n_bytes = 1;
+			dws->dma_width = 1;
+			dws->read = (dws->read != null_reader) ?
+					u8_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u8_writer : null_writer;
+			break;
+		case 16:
+			dws->n_bytes = 2;
+			dws->dma_width = 2;
+			dws->read = (dws->read != null_reader) ?
+					u16_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u16_writer : null_writer;
+			break;
+		default:
+			printk(KERN_ERR "MRST SPI0: unsupported bits:"
+				"%db\n", bits);
+			message->status = -EIO;
+			goto early_exit;
+		}
+
+		cr0 = (bits - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+	}
+	message->state = RUNNING_STATE;
+
+	/* Check if current transfer is a DMA transaction */
+	dws->dma_mapped = map_dma_buffers(dws);
+
+	if (!dws->dma_mapped && !chip->poll_mode) {
+		if (dws->rx)
+			imask |= SPI_INT_RXFI;
+		if (dws->tx)
+			imask |= SPI_INT_TXEI;
+		dws->transfer_handler = interrupt_transfer;
+	}
+
+	/*
+	 * Reprogram registers only if
+	 *	1. chip select changes
+	 *	2. clk_div is changed
+	 *	3. control value changes
+	 */
+	if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div) {
+		spi_enable_chip(dws, 0);
+
+		if (dw_readw(dws, ctrl0) != cr0)
+			dw_writew(dws, ctrl0, cr0);
+
+		/* Set the interrupt mask, for poll mode just diable all int */
+		spi_mask_intr(dws, 0xff);
+		if (!chip->poll_mode)
+			spi_umask_intr(dws, imask);
+
+		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
+		spi_chip_sel(dws, spi->chip_select);
+		spi_enable_chip(dws, 1);
+
+		if (cs_change)
+			dws->prev_chip = chip;
+	}
+
+	if (dws->dma_mapped)
+		dma_transfer(dws, cs_change);
+
+	if (chip->poll_mode)
+		poll_transfer(dws);
+
+	return;
+
+early_exit:
+	giveback(dws);
+	return;
+}
+
+static void pump_messages(struct work_struct *work)
+{
+	struct dw_spi *dws =
+		container_of(work, struct dw_spi, pump_messages);
+	unsigned long flags;
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&dws->lock, flags);
+	if (list_empty(&dws->queue) || dws->run == QUEUE_STOPPED) {
+		dws->busy = 0;
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Make sure we are not already running a message */
+	if (dws->cur_msg) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Extract head of queue */
+	dws->cur_msg = list_entry(dws->queue.next, struct spi_message, queue);
+	list_del_init(&dws->cur_msg->queue);
+
+	/* Initial message state*/
+	dws->cur_msg->state = START_STATE;
+	dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
+						struct spi_transfer,
+						transfer_list);
+	dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
+
+	/* Mark as busy and launch transfers */
+	tasklet_schedule(&dws->pump_transfers);
+
+	dws->busy = 1;
+	spin_unlock_irqrestore(&dws->lock, flags);
+}
+
+/* spi_device use this to queue in their spi_msg */
+static int dw_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_STOPPED) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -ESHUTDOWN;
+	}
+
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = START_STATE;
+
+	list_add_tail(&msg->queue, &dws->queue);
+
+	if (dws->run == QUEUE_RUNNING && !dws->busy) {
+
+		if (dws->cur_transfer || dws->cur_msg)
+			queue_work(dws->workqueue,
+					&dws->pump_messages);
+		else {
+			/* If no other data transaction in air, just go */
+			spin_unlock_irqrestore(&dws->lock, flags);
+			pump_messages(&dws->pump_messages);
+			return 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&dws->lock, flags);
+	return 0;
+}
+
+/* This may be called twice for each spi dev */
+static int dw_spi_setup(struct spi_device *spi)
+{
+	struct dw_spi_chip *chip_info = NULL;
+	struct chip_data *chip;
+
+	if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
+		return -EINVAL;
+
+	/* Only alloc on first setup */
+	chip = spi_get_ctldata(spi);
+	if (!chip) {
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		chip->cs_control = null_cs_control;
+		chip->enable_dma = 0;
+	}
+
+	/*
+	 * Protocol drivers may change the chip settings, so...
+	 * if chip_info exists, use it
+	 */
+	chip_info = spi->controller_data;
+
+	/* chip_info doesn't always exist */
+	if (chip_info) {
+		if (chip_info->cs_control)
+			chip->cs_control = chip_info->cs_control;
+
+		chip->poll_mode = chip_info->poll_mode;
+		chip->type = chip_info->type;
+
+		chip->rx_threshold = 0;
+		chip->tx_threshold = 0;
+
+		chip->enable_dma = chip_info->enable_dma;
+	}
+
+	if (spi->bits_per_word <= 8) {
+		chip->n_bytes = 1;
+		chip->dma_width = 1;
+		chip->read = u8_reader;
+		chip->write = u8_writer;
+	} else if (spi->bits_per_word <= 16) {
+		chip->n_bytes = 2;
+		chip->dma_width = 2;
+		chip->read = u16_reader;
+		chip->write = u16_writer;
+	} else {
+		/* Never take >16b case for MRST SPIC */
+		dev_err(&spi->dev, "invalid wordsize\n");
+		return -EINVAL;
+	}
+	chip->bits_per_word = spi->bits_per_word;
+
+	chip->speed_hz = spi->max_speed_hz;
+	if (chip->speed_hz)
+		chip->clk_div = 25000000 / chip->speed_hz;
+	else
+		chip->clk_div = 8;	/* default value */
+
+	chip->tmode = 0; /* Tx & Rx */
+	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
+	chip->cr0 = (chip->bits_per_word - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode  << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+
+	spi_set_ctldata(spi, chip);
+	return 0;
+}
+
+static void dw_spi_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+	kfree(chip);
+}
+
+static int __init init_queue(struct dw_spi *dws)
+{
+	INIT_LIST_HEAD(&dws->queue);
+	spin_lock_init(&dws->lock);
+
+	dws->run = QUEUE_STOPPED;
+	dws->busy = 0;
+
+	tasklet_init(&dws->pump_transfers,
+			pump_transfers,	(unsigned long)dws);
+
+	INIT_WORK(&dws->pump_messages, pump_messages);
+	dws->workqueue = create_singlethread_workqueue(
+					dev_name(dws->master->dev.parent));
+	if (dws->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int start_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_RUNNING || dws->busy) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -EBUSY;
+	}
+
+	dws->run = QUEUE_RUNNING;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->cur_chip = NULL;
+	dws->prev_chip = NULL;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	queue_work(dws->workqueue, &dws->pump_messages);
+
+	return 0;
+}
+
+static int stop_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+	unsigned limit = 50;
+	int status = 0;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	dws->run = QUEUE_STOPPED;
+	while (!list_empty(&dws->queue) && dws->busy && limit--) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&dws->lock, flags);
+	}
+
+	if (!list_empty(&dws->queue) || dws->busy)
+		status = -EBUSY;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	return status;
+}
+
+static int destroy_queue(struct dw_spi *dws)
+{
+	int status;
+
+	status = stop_queue(dws);
+	if (status != 0)
+		return status;
+	destroy_workqueue(dws->workqueue);
+	return 0;
+}
+
+/* Restart the controller, disable all interrupts, clean rx fifo */
+static void spi_hw_init(struct dw_spi *dws)
+{
+	spi_enable_chip(dws, 0);
+	spi_mask_intr(dws, 0xff);
+	spi_enable_chip(dws, 1);
+	flush(dws);
+}
+
+int __devinit dw_spi_add_host(struct dw_spi *dws)
+{
+	struct spi_master *master;
+	int ret;
+
+	BUG_ON(dws == NULL);
+
+	master = spi_alloc_master(dws->parent_dev, 0);
+	if (!master) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	dws->master = master;
+	dws->type = SSI_MOTO_SPI;
+	dws->prev_chip = NULL;
+	dws->dma_inited = 0;
+	dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
+
+	ret = request_irq(dws->irq, dw_spi_irq, 0,
+			"dw_spi", dws);
+	if (ret < 0) {
+		dev_err(&master->dev, "can not get IRQ\n");
+		goto err_free_master;
+	}
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+	master->bus_num = dws->bus_num;
+	master->num_chipselect = dws->num_cs;
+	master->cleanup = dw_spi_cleanup;
+	master->setup = dw_spi_setup;
+	master->transfer = dw_spi_transfer;
+
+	dws->dma_inited = 0;
+
+	/* Basic HW init */
+	spi_hw_init(dws);
+
+	/* Initial and start queue */
+	ret = init_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem initializing queue\n");
+		goto err_diable_hw;
+	}
+	ret = start_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem starting queue\n");
+		goto err_diable_hw;
+	}
+
+	spi_master_set_devdata(master, dws);
+	ret = spi_register_master(master);
+	if (ret) {
+		dev_err(&master->dev, "problem registering spi master\n");
+		goto err_queue_alloc;
+	}
+
+	mrst_spi_debugfs_init(dws);
+	return 0;
+
+err_queue_alloc:
+	destroy_queue(dws);
+err_diable_hw:
+	spi_enable_chip(dws, 0);
+	free_irq(dws->irq, dws);
+err_free_master:
+	spi_master_put(master);
+exit:
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_add_host);
+
+void __devexit dw_spi_remove_host(struct dw_spi *dws)
+{
+	int status = 0;
+
+	if (!dws)
+		return;
+	mrst_spi_debugfs_remove(dws);
+
+	/* Remove the queue */
+	status = destroy_queue(dws);
+	if (status != 0)
+		dev_err(&dws->master->dev, "dw_spi_remove: workqueue will not "
+			"complete, message memory not freed\n");
+
+	spi_enable_chip(dws, 0);
+	/* Disable clk */
+	spi_set_clk(dws, 0);
+	free_irq(dws->irq, dws);
+
+	/* Disconnect from the SPI framework */
+	spi_unregister_master(dws->master);
+}
+
+int dw_spi_suspend_host(struct dw_spi *dws)
+{
+	int ret = 0;
+
+	ret = stop_queue(dws);
+	if (ret)
+		return ret;
+	spi_enable_chip(dws, 0);
+	spi_set_clk(dws, 0);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_suspend_host);
+
+int dw_spi_resume_host(struct dw_spi *dws)
+{
+	int ret;
+
+	spi_hw_init(dws);
+	ret = start_queue(dws);
+	if (ret)
+		dev_err(&dws->master->dev, "fail to start queue (%d)\n", ret);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_resume_host);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("Driver for DesignWare SPI controller core");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c
new file mode 100644
index 000000000000..34ba69161734
--- /dev/null
+++ b/drivers/spi/dw_spi_pci.c
@@ -0,0 +1,169 @@
+/*
+ * mrst_spi_pci.c - PCI interface driver for DW SPI Core
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME "dw_spi_pci"
+
+struct dw_spi_pci {
+	struct pci_dev		*pdev;
+	struct dw_spi		dws;
+};
+
+static int __devinit spi_pci_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct dw_spi_pci *dwpci;
+	struct dw_spi *dws;
+	int pci_bar = 0;
+	int ret;
+
+	printk(KERN_INFO "DW: found PCI SPI controller(ID: %04x:%04x)\n",
+		pdev->vendor, pdev->device);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dwpci = kzalloc(sizeof(struct dw_spi_pci), GFP_KERNEL);
+	if (!dwpci) {
+		ret = -ENOMEM;
+		goto err_disable;
+	}
+
+	dwpci->pdev = pdev;
+	dws = &dwpci->dws;
+
+	/* Get basic io resource and map it */
+	dws->paddr = pci_resource_start(pdev, pci_bar);
+	dws->iolen = pci_resource_len(pdev, pci_bar);
+
+	ret = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (ret)
+		goto err_kfree;
+
+	dws->regs = ioremap_nocache((unsigned long)dws->paddr,
+				pci_resource_len(pdev, pci_bar));
+	if (!dws->regs) {
+		ret = -ENOMEM;
+		goto err_release_reg;
+	}
+
+	dws->parent_dev = &pdev->dev;
+	dws->bus_num = 0;
+	dws->num_cs = 4;
+	dws->max_freq = 25000000;	/* for Moorestwon */
+	dws->irq = pdev->irq;
+
+	ret = dw_spi_add_host(dws);
+	if (ret)
+		goto err_unmap;
+
+	/* PCI hook and SPI hook use the same drv data */
+	pci_set_drvdata(pdev, dwpci);
+	return 0;
+
+err_unmap:
+	iounmap(dws->regs);
+err_release_reg:
+	pci_release_region(pdev, pci_bar);
+err_kfree:
+	kfree(dwpci);
+err_disable:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static void __devexit spi_pci_remove(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+	iounmap(dwpci->dws.regs);
+	pci_release_region(pdev, 0);
+	kfree(dwpci);
+	pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int spi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = dw_spi_suspend_host(&dwpci->dws);
+	if (ret)
+		return ret;
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	return ret;
+}
+
+static int spi_resume(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+	return dw_spi_resume_host(&dwpci->dws);
+}
+#else
+#define spi_suspend	NULL
+#define spi_resume	NULL
+#endif
+
+static const struct pci_device_id pci_ids[] __devinitdata = {
+	/* Intel Moorestown platform SPI controller 0 */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0800) },
+	{},
+};
+
+static struct pci_driver dw_spi_driver = {
+	.name =		DRIVER_NAME,
+	.id_table =	pci_ids,
+	.probe =	spi_pci_probe,
+	.remove =	__devexit_p(spi_pci_remove),
+	.suspend =	spi_suspend,
+	.resume	=	spi_resume,
+};
+
+static int __init mrst_spi_init(void)
+{
+	return pci_register_driver(&dw_spi_driver);
+}
+
+static void __exit mrst_spi_exit(void)
+{
+	pci_unregister_driver(&dw_spi_driver);
+}
+
+module_init(mrst_spi_init);
+module_exit(mrst_spi_exit);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("PCI interface driver for DW SPI Core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
new file mode 100644
index 000000000000..51b3e771a9a3
--- /dev/null
+++ b/include/linux/spi/dw_spi.h
@@ -0,0 +1,212 @@
+#ifndef DW_SPI_HEADER_H
+#define DW_SPI_HEADER_H
+#include <linux/io.h>
+
+/* Bit fields in CTRLR0 */
+#define SPI_DFS_OFFSET			0
+
+#define SPI_FRF_OFFSET			4
+#define SPI_FRF_SPI			0x0
+#define SPI_FRF_SSP			0x1
+#define SPI_FRF_MICROWIRE		0x2
+#define SPI_FRF_RESV			0x3
+
+#define SPI_MODE_OFFSET			6
+#define SPI_SCPH_OFFSET			6
+#define SPI_SCOL_OFFSET			7
+#define SPI_TMOD_OFFSET			8
+#define	SPI_TMOD_TR			0x0		/* xmit & recv */
+#define SPI_TMOD_TO			0x1		/* xmit only */
+#define SPI_TMOD_RO			0x2		/* recv only */
+#define SPI_TMOD_EPROMREAD		0x3		/* eeprom read mode */
+
+#define SPI_SLVOE_OFFSET		10
+#define SPI_SRL_OFFSET			11
+#define SPI_CFS_OFFSET			12
+
+/* Bit fields in SR, 7 bits */
+#define SR_MASK				0x7f		/* cover 7 bits */
+#define SR_BUSY				(1 << 0)
+#define SR_TF_NOT_FULL			(1 << 1)
+#define SR_TF_EMPT			(1 << 2)
+#define SR_RF_NOT_EMPT			(1 << 3)
+#define SR_RF_FULL			(1 << 4)
+#define SR_TX_ERR			(1 << 5)
+#define SR_DCOL				(1 << 6)
+
+/* Bit fields in ISR, IMR, RISR, 7 bits */
+#define SPI_INT_TXEI			(1 << 0)
+#define SPI_INT_TXOI			(1 << 1)
+#define SPI_INT_RXUI			(1 << 2)
+#define SPI_INT_RXOI			(1 << 3)
+#define SPI_INT_RXFI			(1 << 4)
+#define SPI_INT_MSTI			(1 << 5)
+
+/* TX RX interrupt level threshhold, max can be 256 */
+#define SPI_INT_THRESHOLD		32
+
+enum dw_ssi_type {
+	SSI_MOTO_SPI = 0,
+	SSI_TI_SSP,
+	SSI_NS_MICROWIRE,
+};
+
+struct dw_spi_reg {
+	u32	ctrl0;
+	u32	ctrl1;
+	u32	ssienr;
+	u32	mwcr;
+	u32	ser;
+	u32	baudr;
+	u32	txfltr;
+	u32	rxfltr;
+	u32	txflr;
+	u32	rxflr;
+	u32	sr;
+	u32	imr;
+	u32	isr;
+	u32	risr;
+	u32	txoicr;
+	u32	rxoicr;
+	u32	rxuicr;
+	u32	msticr;
+	u32	icr;
+	u32	dmacr;
+	u32	dmatdlr;
+	u32	dmardlr;
+	u32	idr;
+	u32	version;
+	u32	dr;		/* Currently oper as 32 bits,
+				though only low 16 bits matters */
+} __packed;
+
+struct dw_spi {
+	struct spi_master	*master;
+	struct spi_device	*cur_dev;
+	struct device		*parent_dev;
+	enum dw_ssi_type	type;
+
+	void __iomem		*regs;
+	unsigned long		paddr;
+	u32			iolen;
+	int			irq;
+	u32			max_freq;	/* max bus freq supported */
+
+	u16			bus_num;
+	u16			num_cs;		/* supported slave numbers */
+
+	/* Driver message queue */
+	struct workqueue_struct	*workqueue;
+	struct work_struct	pump_messages;
+	spinlock_t		lock;
+	struct list_head	queue;
+	int			busy;
+	int			run;
+
+	/* Message Transfer pump */
+	struct tasklet_struct	pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message	*cur_msg;
+	struct spi_transfer	*cur_transfer;
+	struct chip_data	*cur_chip;
+	struct chip_data	*prev_chip;
+	size_t			len;
+	void			*tx;
+	void			*tx_end;
+	void			*rx;
+	void			*rx_end;
+	int			dma_mapped;
+	dma_addr_t		rx_dma;
+	dma_addr_t		tx_dma;
+	size_t			rx_map_len;
+	size_t			tx_map_len;
+	u8			n_bytes;	/* current is a 1/2 bytes op */
+	u8			max_bits_per_word;	/* maxim is 16b */
+	u32			dma_width;
+	int			cs_change;
+	int			(*write)(struct dw_spi *dws);
+	int			(*read)(struct dw_spi *dws);
+	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
+	void			(*cs_control)(u32 command);
+
+	/* Dma info */
+	int			dma_inited;
+	struct dma_chan		*txchan;
+	struct dma_chan		*rxchan;
+	int			txdma_done;
+	int			rxdma_done;
+	u64			tx_param;
+	u64			rx_param;
+	struct device		*dma_dev;
+	dma_addr_t		dma_addr;
+
+	/* Bus interface info */
+	void			*priv;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
+};
+
+#define dw_readl(dw, name) \
+	__raw_readl(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writel(dw, name, val) \
+	__raw_writel((val), &(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_readw(dw, name) \
+	__raw_readw(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writew(dw, name, val) \
+	__raw_writew((val), &(((struct dw_spi_reg *)dw->regs)->name))
+
+static inline void spi_enable_chip(struct dw_spi *dws, int enable)
+{
+	dw_writel(dws, ssienr, (enable ? 1 : 0));
+}
+
+static inline void spi_set_clk(struct dw_spi *dws, u16 div)
+{
+	dw_writel(dws, baudr, div);
+}
+
+static inline void spi_chip_sel(struct dw_spi *dws, u16 cs)
+{
+	if (cs > dws->num_cs)
+		return;
+	dw_writel(dws, ser, 1 << cs);
+}
+
+/* Disable IRQ bits */
+static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) & ~mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/* Enable IRQ bits */
+static inline void spi_umask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) | mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/*
+ * Each SPI slave device to work with dw_api controller should
+ * has such a structure claiming its working mode (PIO/DMA etc),
+ * which can be save in the "controller_data" member of the
+ * struct spi_device
+ */
+struct dw_spi_chip {
+	u8 poll_mode;	/* 0 for contoller polling mode */
+	u8 type;	/* SPI/SSP/Micrwire */
+	u8 enable_dma;
+	void (*cs_control)(u32 command);
+};
+
+extern int dw_spi_add_host(struct dw_spi *dws);
+extern void dw_spi_remove_host(struct dw_spi *dws);
+extern int dw_spi_suspend_host(struct dw_spi *dws);
+extern int dw_spi_resume_host(struct dw_spi *dws);
+#endif /* DW_SPI_HEADER_H */
-- 
cgit v1.2.3


From 9afa2fb6c13501e5b3536d15344fce4e5442c469 Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Wed, 2 Dec 2009 19:51:54 -0500
Subject: fsstack/ecryptfs: remove unused get_nlinks param to
 fsstack_copy_attr_all

This get_nlinks parameter was never used by the only mainline user,
ecryptfs; and it has never been used by unionfs or wrapfs either.

Acked-by: Dustin Kirkland <kirkland@canonical.com>
Acked-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c     |  2 +-
 fs/ecryptfs/inode.c      |  6 +++---
 fs/ecryptfs/main.c       |  2 +-
 fs/stack.c               | 17 +++--------------
 include/linux/fs_stack.h |  4 +---
 5 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
 
-		fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 	}
 out:
 	return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..429ca0b3ba08 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 	if (new_dir != old_dir)
-		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	rc = notify_change(lower_dentry, ia);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c7b1ee..567bc4b9f70a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..0e20e43ad740 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -14,11 +14,8 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 {
 	dest->i_mode = src->i_mode;
 	dest->i_uid = src->i_uid;
@@ -29,14 +26,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 	dest->i_ctime = src->i_ctime;
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
-
-	/*
-	 * Update the nlinks AFTER updating the above fields, because the
-	 * get_links callback may depend on them.
-	 */
-	if (!get_nlinks)
-		dest->i_nlink = src->i_nlink;
-	else
-		dest->i_nlink = (*get_nlinks)(dest);
+	dest->i_nlink = src->i_nlink;
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ceeefc9..aa60311900dd 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -8,9 +8,7 @@
 #include <linux/fs.h>
 
 /* externs for fs/stack.c */
-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *));
-
+extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
 extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
 
 /* inlines */
-- 
cgit v1.2.3


From 1b8ab8159ef8f818f870a1d2e3b6953d80eefd3f Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Thu, 3 Dec 2009 21:56:09 -0500
Subject: VFS/fsstack: handle 32-bit smp + preempt + large files in
 fsstack_copy_inode_size

Copy the inode size and blocks from one inode to another correctly on 32-bit
systems with CONFIG_SMP, CONFIG_PREEMPT, or CONFIG_LBDAF.  Use proper inode
spinlocks only when i_size/i_blocks cannot fit in one 32-bit word.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stack.c               | 54 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs_stack.h |  2 +-
 2 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/stack.c b/fs/stack.c
index 0e20e43ad740..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,10 +7,58 @@
  * This function cannot be inlined since i_size_{read,write} is rather
  * heavy-weight on 32-bit systems
  */
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 {
-	i_size_write(dst, i_size_read((struct inode *)src));
-	dst->i_blocks = src->i_blocks;
+	loff_t i_size;
+	blkcnt_t i_blocks;
+
+	/*
+	 * i_size_read() includes its own seqlocking and protection from
+	 * preemption (see include/linux/fs.h): we need nothing extra for
+	 * that here, and prefer to avoid nesting locks than attempt to keep
+	 * i_size and i_blocks in sync together.
+	 */
+	i_size = i_size_read(src);
+
+	/*
+	 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+	 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+	 * though stat's generic_fillattr() doesn't bother, and we won't be
+	 * applying quotas (where i_blocks does become important) at the
+	 * upper level.
+	 *
+	 * We don't actually know what locking is used at the lower level;
+	 * but if it's a filesystem that supports quotas, it will be using
+	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
+	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+	 * holes; but its i_blocks cannot carry into the upper long without
+	 * almost 2TB swap - let's ignore that case.
+	 */
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_lock(&src->i_lock);
+	i_blocks = src->i_blocks;
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&src->i_lock);
+
+	/*
+	 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+	 * fsstack_copy_inode_size() to hold some lock around
+	 * i_size_write(), otherwise i_size_read() may spin forever (see
+	 * include/linux/fs.h).  We don't necessarily hold i_mutex when this
+	 * is called, so take i_lock for that case.
+	 *
+	 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+	 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+	 * for that case too, and do both at once by combining the tests.
+	 *
+	 * There is none of this locking overhead in the 64-bit case.
+	 */
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_lock(&dst->i_lock);
+	i_size_write(dst, i_size);
+	dst->i_blocks = i_blocks;
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&dst->i_lock);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index aa60311900dd..da317c7163ab 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -9,7 +9,7 @@
 
 /* externs for fs/stack.c */
 extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
+extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
 
 /* inlines */
 static inline void fsstack_copy_attr_atime(struct inode *dest,
-- 
cgit v1.2.3


From 7a0ad10c367ab57c899d340372f37880cbe6ab52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:40 +0100
Subject: fold do_sync_file_range into sys_sync_file_range

We recently go rid of all callers of do_sync_file_range as they're better
served with vfs_fsync or the filemap_write_and_wait.  Now that
do_sync_file_range is down to a single caller fold it into it so that people
don't start using it again accidentally.  While at it also switch it from
using __filemap_fdatawrite_range(..., WB_SYNC_ALL) to the more clear
filemap_fdatawrite_range().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c          | 59 +++++++++++++++++++++---------------------------------
 include/linux/fs.h |  4 ----
 2 files changed, 23 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 {
 	int ret;
 	struct file *file;
+	struct address_space *mapping;
 	loff_t endbyte;			/* inclusive */
 	int fput_needed;
 	umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 			!S_ISLNK(i_mode))
 		goto out_put;
 
-	ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+	mapping = file->f_mapping;
+	if (!mapping) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	ret = 0;
+	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WRITE) {
+		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
 out_put:
 	fput_light(file, fput_needed);
 out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
 }
 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
 #endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			  loff_t endbyte, unsigned int flags)
-{
-	int ret;
-
-	if (!mapping) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = 0;
-	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WRITE) {
-		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_ALL);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-	}
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66bc0a54b284..77a975089d9a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,10 +1095,6 @@ struct file_lock {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/* fs/sync.c */
-extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			loff_t endbyte, unsigned int flags);
-
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-- 
cgit v1.2.3


From eaff8079d4f1016a12e34ab323737314f24127dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:25:01 +0100
Subject: kill I_LOCK

After I_SYNC was split from I_LOCK the leftover is always used together with
I_NEW and thus superflous.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/inode.c             |  2 +-
 fs/inode.c                  | 26 +++++++++++++-------------
 fs/jfs/jfs_txnmgr.c         |  2 +-
 fs/ntfs/inode.c             |  6 +++---
 fs/ubifs/file.c             |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 fs/xfs/xfs_iget.c           |  4 ++--
 include/linux/fs.h          | 36 ++++++++++++++++--------------------
 include/linux/writeback.h   |  3 +--
 9 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa793da..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
  * directory entry when gfs2_inode_lookup() is invoked. Part of the code
  * segment inside gfs2_inode_lookup code needs to get moved around.
  *
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
  **/
 
 void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
 	 * Prevent speculative execution through spin_unlock(&inode_lock);
 	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_LOCK);
+	wake_up_bit(&inode->i_state, __I_NEW);
 }
 
 /**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
 	}
 #endif
 	/*
-	 * This is special!  We do not need the spinlock when clearing I_LOCK,
+	 * This is special!  We do not need the spinlock when clearing I_NEW,
 	 * because we're guaranteed that nobody else tries to do anything about
 	 * the state of the inode when it is locked, as we just created it (so
-	 * there can be no old holders that haven't tested I_LOCK).
+	 * there can be no old holders that haven't tested I_NEW).
 	 * However we must emit the memory barrier so that other CPUs reliably
-	 * see the clearing of I_LOCK after the other inode initialisation has
+	 * see the clearing of I_NEW after the other inode initialisation has
 	 * completed.
 	 */
 	smp_mb();
-	WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
-	inode->i_state &= ~(I_LOCK|I_NEW);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
 	wake_up_inode(inode);
 }
 EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 				goto set_failed;
 
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		if (!old) {
 			inode->i_ino = ino;
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 
 	while (1) {
 		struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
  * until the deletion _might_ have completed.  Callers are responsible
  * to recheck inode state.
  *
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
  * wake_up_inode() after removing from the hash list will DTRT.
  *
  * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
 static void __wait_on_freeing_inode(struct inode *inode)
 {
 	wait_queue_head_t *wq;
-	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
-	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_lock);
 	schedule();
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		 */
 		/*
 		 * I believe this code is no longer needed.  Splitting I_LOCK
-		 * into two bits, I_LOCK and I_SYNC should prevent this
+		 * into two bits, I_NEW and I_SYNC should prevent this
 		 * deadlock as well.  But since I don't have a JFS testload
 		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
 		 * Joern
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
  * the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
  *    is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
  * necessary fields in @vi as well as initializing the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
  * normal directory inodes.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
  *
  * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
  * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
  * set as well. However, UBIFS disables readahead.
  */
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
 	struct inode		*inode = &ip->i_vnode;
 
 	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW|I_LOCK;
+	inode->i_state = I_NEW;
 	inode_add_to_lists(ip->i_mount->m_super, inode);
 
 	inode->i_mode	= ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2a46f1..fa402a6bbbcf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
 	ip->i_new_size = 0;
 
 	/* prevent anyone from using this yet */
-	VFS_I(ip)->i_state = I_NEW|I_LOCK;
+	VFS_I(ip)->i_state = I_NEW;
 
 	return ip;
 }
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
 			trace_xfs_iget_reclaim(ip);
 			goto out_error;
 		}
-		inode->i_state = I_LOCK|I_NEW;
+		inode->i_state = I_NEW;
 	} else {
 		/* If the VFS inode is being torn down, pause and try again. */
 		if (!igrab(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 77a975089d9a..cca191933ff6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1587,7 +1587,7 @@ struct super_operations {
  * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
  * various stages of removing an inode.
  *
- * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
  *
  * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
  *			fdatasync().  i_atime is the usual cause.
@@ -1596,8 +1596,14 @@ struct super_operations {
  *			don't have to write inode on fdatasync() when only
  *			mtime has changed in it.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
- * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
- *			are cleared by unlock_new_inode(), called from iget().
+ * I_NEW		Serves as both a mutex and completion notification.
+ *			New inodes set I_NEW.  If two processes both create
+ *			the same inode, one of them will release its inode and
+ *			wait for I_NEW to be released before returning.
+ *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ *			also cause waiting on I_NEW, without I_NEW actually
+ *			being set.  find_inode() uses this to prevent returning
+ *			nearly-dead inodes.
  * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
  *			is zero.  I_FREEING must be set when I_WILL_FREE is
  *			cleared.
@@ -1611,20 +1617,11 @@ struct super_operations {
  *			prohibited for many purposes.  iget() must wait for
  *			the inode to be completely released, then create it
  *			anew.  Other functions will just ignore such inodes,
- *			if appropriate.  I_LOCK is used for waiting.
+ *			if appropriate.  I_NEW is used for waiting.
  *
- * I_LOCK		Serves as both a mutex and completion notification.
- *			New inodes set I_LOCK.  If two processes both create
- *			the same inode, one of them will release its inode and
- *			wait for I_LOCK to be released before returning.
- *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- *			also cause waiting on I_LOCK, without I_LOCK actually
- *			being set.  find_inode() uses this to prevent returning
- *			nearly-dead inodes.
- * I_SYNC		Similar to I_LOCK, but limited in scope to writeback
- *			of inode dirty data.  Having a separate lock for this
- *			purpose reduces latency and prevents some filesystem-
- *			specific deadlocks.
+ * I_SYNC		Synchonized write of dirty inode data.  The bits is
+ *			set during data writeback, and cleared with a wakeup
+ *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
@@ -1633,13 +1630,12 @@ struct super_operations {
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
 #define I_DIRTY_PAGES		4
-#define I_NEW			8
+#define __I_NEW			3
+#define I_NEW			(1 << __I_NEW)
 #define I_WILL_FREE		16
 #define I_FREEING		32
 #define I_CLEAR			64
-#define __I_LOCK		7
-#define I_LOCK			(1 << __I_LOCK)
-#define __I_SYNC		8
+#define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 705f01fe413a..c18c008f4bbf 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
-							TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 static inline void inode_sync_wait(struct inode *inode)
 {
-- 
cgit v1.2.3


From a2770d86b33024f71df269fde2de096df89d6a48 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 12:51:05 -0800
Subject: Revert "fix mismerge with Trond's stuff (create_mnt_ns() export is
 gone now)"

This reverts commit e9496ff46a20a8592fdc7bdaaf41b45eb808d310. Quoth Al:

 "it's dependent on a lot of other stuff not currently in mainline
  and badly broken with current fs/namespace.c.  Sorry, badly
  out-of-order cherry-pick from old queue.

  PS: there's a large pending series reworking the refcounting and
  lifetime rules for vfsmounts that will, among other things, allow to
  rip a subtree away _without_ dissolving connections in it, to be
  garbage-collected when all active references are gone.  It's
  considerably saner wrt "is the subtree busy" logics, but it's nowhere
  near being ready for merge at the moment; this changeset is one of the
  things becoming possible with that sucker, but it certainly shouldn't
  have been picked during this cycle.  My apologies..."

Noticed-by: Eric Paris <eparis@redhat.com>
Requested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c                | 3 ++-
 fs/nfs/super.c                | 8 ++++++++
 include/linux/mnt_namespace.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index faab1273281e..7d70d63ceb29 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,6 +2080,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
+EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d5b112bcf3de..ce907efc5508 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,13 +2648,21 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
+	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
+	ns_private = create_mnt_ns(root_mnt);
+	ret = PTR_ERR(ns_private);
+	if (IS_ERR(ns_private))
+		goto out_mntput;
+
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
+	put_mnt_ns(ns_private);
+
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d9ebf1037dfa..d74785c2393a 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,6 +23,7 @@ struct proc_mounts {
 
 struct fs_struct;
 
+extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From b6e3224fb20954f155e41ec5709b2ab70b50ae2d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 13:23:24 -0800
Subject: Revert "task_struct: make journal_info conditional"

This reverts commit e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319, as
requested by Alexey:

 "I think I gave a good enough arguments to not merge it.
  To iterate:
   * patch makes impossible to start using ext3 on EXT3_FS=n kernels
     without reboot.
   * this is done only for one pointer on task_struct"

  None of config options which define task_struct are tristate directly
  or effectively."

Requested-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig                | 4 ----
 fs/btrfs/Kconfig          | 1 -
 fs/ext4/Kconfig           | 1 -
 fs/gfs2/Kconfig           | 1 -
 fs/jbd/Kconfig            | 1 -
 fs/jbd2/Kconfig           | 1 -
 fs/nilfs2/Kconfig         | 1 -
 fs/reiserfs/Kconfig       | 1 -
 include/linux/init_task.h | 8 +-------
 include/linux/sched.h     | 2 --
 10 files changed, 1 insertion(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad628..64d44efad7a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
 
 if BLOCK
 
-config FS_JOURNAL_INFO
-	bool
-	default n
-
 source "fs/ext2/Kconfig"
 source "fs/ext3/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bfb..7bb3c020e570 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
-	select FS_JOURNAL_INFO
 	help
 	  Btrfs is a new filesystem with extents, writable snapshotting,
 	  support for multiple devices and many more features.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e4..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
 	tristate "The Extended 4 (ext4) filesystem"
 	select JBD2
 	select CRC16
-	select FS_JOURNAL_INFO
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa6..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
 	select SLOW_WORK
 	select QUOTA
 	select QUOTACTL
-	select FS_JOURNAL_INFO
 	help
 	  A cluster filesystem.
 
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd4..4e28beeed157 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
 config JBD
 	tristate
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journalling layer for block devices.  It is
 	  currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdfd..f32f346f4b0a 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
 config JBD2
 	tristate
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journaling layer for block devices that support
 	  both 32-bit and 64-bit block numbers.  It is currently used by
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b2166..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
 	tristate "NILFS2 file system support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  NILFS2 is a log-structured file system (LFS) supporting continuous
 	  snapshotting.  In addition to versioning capability of the entire
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f8..513f431038f9 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
 config REISERFS_FS
 	tristate "Reiserfs support"
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  Stores not just filenames but the files themselves in a balanced
 	  tree.  Uses journalling.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5ed8b9c50355..abec69b63d7e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,12 +111,6 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
-#define INIT_JOURNAL_INFO	.journal_info = NULL,
-#else
-#define INIT_JOURNAL_INFO
-#endif
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -168,6 +162,7 @@ extern struct cred init_cred;
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
+	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
@@ -178,7 +173,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
-	INIT_JOURNAL_INFO						\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..211ed32befbd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1446,10 +1446,8 @@ struct task_struct {
 	gfp_t lockdep_reclaim_gfp;
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
 /* journalling filesystem info */
 	void *journal_info;
-#endif
 
 /* stacked block device info */
 	struct bio *bio_list, **bio_tail;
-- 
cgit v1.2.3


From 5d0bb2c4238e333ae18c5cd23f75e02a3dac3519 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bernhard@bwalle.de>
Date: Thu, 17 Dec 2009 15:27:11 -0800
Subject: vt: don't export vt_kmsg_redirect() to userspace

Fix following warning in linux-next by guarding the function definition
(both the "extern" and the inline) with #ifdef __KERNEL__.

usr/include/linux/vt.h:89: userspace cannot call function or variable defined in
the kernel

Introduced by commit 5ada918b82399eef3afd6a71e3637697d6bd719f ("vt:
introduce and use vt_kmsg_redirect() function").

Signed-off-by: Bernhard Walle <bernhard@bwalle.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vt.h b/include/linux/vt.h
index 3fb9944e50a6..d5dd0bc408fd 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -84,6 +84,8 @@ struct vt_setactivate {
 
 #define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
 
+#ifdef __KERNEL__
+
 #ifdef CONFIG_VT_CONSOLE
 
 extern int vt_kmsg_redirect(int new);
@@ -97,6 +99,8 @@ static inline int vt_kmsg_redirect(int new)
 
 #endif
 
+#endif /* __KERNEL__ */
+
 #define vt_get_kmsg_redirect() vt_kmsg_redirect(-1)
 
 #endif /* _LINUX_VT_H */
-- 
cgit v1.2.3


From f6151dfea21496d43dbaba32cfcd9c9f404769bc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 17 Dec 2009 15:27:16 -0800
Subject: mm: introduce coredump parameter structure

Introduce coredump parameter data structure (struct coredump_params) to
simplify binfmt->core_dump() arguments.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c        | 13 +++++++------
 fs/binfmt_elf.c         | 24 +++++++++++++-----------
 fs/binfmt_elf_fdpic.c   | 29 +++++++++++++++--------------
 fs/binfmt_flat.c        |  6 +++---
 fs/binfmt_som.c         |  2 +-
 fs/exec.c               | 38 +++++++++++++++++++++-----------------
 include/linux/binfmts.h | 10 +++++++++-
 7 files changed, 69 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..346b69405363 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
+	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7f..edd90c49003c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  * don't even try.
  */
 #ifdef CONFIG_ELF_CORE
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
 #else
 #define elf_core_dump	NULL
 #endif
@@ -1272,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
@@ -1947,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -2009,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 #endif
 
  	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
 		goto end_coredump;
 
-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8e..c25256a5c5b0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
 					     struct file *, struct mm_struct *);
 
 #ifdef CONFIG_ELF_CORE
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
 #endif
 
 static struct linux_binfmt elf_fdpic_format = {
@@ -1326,8 +1326,9 @@ static int writenote(struct memelfnote *men, struct file *file)
 #undef DUMP_WRITE
 #undef DUMP_SEEK
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1583,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 		goto cleanup;
 #endif
 
-	if (signr) {
+	if (cprm->signr) {
 		struct core_thread *ct;
 		struct elf_thread_status *tmp;
 
@@ -1661,14 +1661,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 			int sz;
 
 			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
 			thread_status_size += sz;
 		}
 	}
 
 	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1703,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
   	/* Try to dump the FPU. */
 	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1774,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1783,14 +1783,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
 		goto end_coredump;
 
-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
 		goto end_coredump;
 
 #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..d4a00ea1054c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
  * Currently only a stub-function.
  */
 
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
 {
 	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
 	return(1);
 }
 
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..2a9b5330cc5e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
  * don't even try.
  */
 #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
 #else
 #define som_core_dump	NULL
 #endif
diff --git a/fs/exec.c b/fs/exec.c
index 77db9a97a773..632b02e34ec7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1763,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
-	struct file * file;
 	const struct cred *old_cred;
 	struct cred *cred;
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};
 
 	audit_core_dumps(signr);
 
@@ -1829,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
 		goto fail_unlock;
 
  	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
 			 * non-zero limit gets set to RLIM_INFINITY below, but
 			 * a limit of 0 skips the dump.  This is a consistent
 			 * way to catch recursive crashes.  We can still crash
@@ -1870,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto fail_dropcount;
 		}
 
-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
  		}
  	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
 		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1901,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 */
 	if (inode->i_uid != current_fsuid())
 		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
 		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
 		goto close_fail;
 
-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);
 
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
 close_fail:
 	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486ac734..cd4349bdc34e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{
 
 #define BINPRM_MAX_RECURSION 4
 
+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
 	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
 	int hasvdso;
 };
-- 
cgit v1.2.3


From 925cc71e512a29e2594bcc17dc58d0a0e9c4d524 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 17 Dec 2009 14:44:38 +0000
Subject: mm: Add notifier in pageblock isolation for balloon drivers

Memory balloon drivers can allocate a large amount of memory which is not
movable but could be freed to accomodate memory hotplug remove.

Prior to calling the memory hotplug notifier chain the memory in the
pageblock is isolated.  Currently, if the migrate type is not
MIGRATE_MOVABLE the isolation will not proceed, causing the memory removal
for that page range to fail.

Rather than failing pageblock isolation if the migrateteype is not
MIGRATE_MOVABLE, this patch checks if all of the pages in the pageblock,
and not on the LRU, are owned by a registered balloon driver (or other
entity) using a notifier chain.  If all of the non-movable pages are owned
by a balloon, they can be freed later through the memory notifier chain
and the range can still be isolated in set_migratetype_isolate().

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Brian King <brking@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Gerald Schaefer <geralds@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/base/memory.c  | 19 +++++++++++++++++
 include/linux/memory.h | 27 ++++++++++++++++++++++++
 mm/page_alloc.c        | 57 +++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 96 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c4c8f2e1dd15..d7d77d4a402c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_notifier);
 
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
 	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+int memory_isolate_notify(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
 /*
  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
  * OK to have direct references to sparsemem variables in here.
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 37fa19b34ef5..1adfe779eb99 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -50,6 +50,19 @@ struct memory_notify {
 	int status_change_nid;
 };
 
+/*
+ * During pageblock isolation, count the number of pages within the
+ * range [start_pfn, start_pfn + nr_pages) which are owned by code
+ * in the notifier chain.
+ */
+#define MEM_ISOLATE_COUNT	(1<<0)
+
+struct memory_isolate_notify {
+	unsigned long start_pfn;	/* Start of range to check */
+	unsigned int nr_pages;		/* # pages in range to check */
+	unsigned int pages_found;	/* # pages owned found by callbacks */
+};
+
 struct notifier_block;
 struct mem_section;
 
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
 {
 	return 0;
 }
+static inline int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+}
+static inline int memory_isolate_notify(unsigned long val, void *v)
+{
+	return 0;
+}
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int register_memory_isolate_notifier(struct notifier_block *nb);
+extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74af449b1f1d..998eacc1e4c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <linux/memory.h>
 #include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 int set_migratetype_isolate(struct page *page)
 {
 	struct zone *zone;
-	unsigned long flags;
+	struct page *curr_page;
+	unsigned long flags, pfn, iter;
+	unsigned long immobile = 0;
+	struct memory_isolate_notify arg;
+	int notifier_ret;
 	int ret = -EBUSY;
 	int zone_idx;
 
 	zone = page_zone(page);
 	zone_idx = zone_idx(zone);
+
 	spin_lock_irqsave(&zone->lock, flags);
+	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+	    zone_idx == ZONE_MOVABLE) {
+		ret = 0;
+		goto out;
+	}
+
+	pfn = page_to_pfn(page);
+	arg.start_pfn = pfn;
+	arg.nr_pages = pageblock_nr_pages;
+	arg.pages_found = 0;
+
 	/*
-	 * In future, more migrate types will be able to be isolation target.
+	 * It may be possible to isolate a pageblock even if the
+	 * migratetype is not MIGRATE_MOVABLE. The memory isolation
+	 * notifier chain is used by balloon drivers to return the
+	 * number of pages in a range that are held by the balloon
+	 * driver to shrink memory. If all the pages are accounted for
+	 * by balloons, are free, or on the LRU, isolation can continue.
+	 * Later, for example, when memory hotplug notifier runs, these
+	 * pages reported as "can be isolated" should be isolated(freed)
+	 * by the balloon driver through the memory notifier chain.
 	 */
-	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
-	    zone_idx != ZONE_MOVABLE)
+	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+	notifier_ret = notifier_to_errno(notifier_ret);
+	if (notifier_ret || !arg.pages_found)
 		goto out;
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	move_freepages_block(zone, page, MIGRATE_ISOLATE);
-	ret = 0;
+
+	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		curr_page = pfn_to_page(iter);
+		if (!page_count(curr_page) || PageLRU(curr_page))
+			continue;
+
+		immobile++;
+	}
+
+	if (arg.pages_found == immobile)
+		ret = 0;
+
 out:
+	if (!ret) {
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		move_freepages_block(zone, page, MIGRATE_ISOLATE);
+	}
+
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
 		drain_all_pages();
-- 
cgit v1.2.3


From 8c0414cd524e9f1c483ffb3ff1c2d860f5c567c8 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 3 Dec 2009 12:46:51 -0800
Subject: fiemap: Add new extent flag FIEMAP_EXTENT_SHARED

Some filesystems may allow multiple files to point to a particular
extent.  This patch adds flag FIEMAP_EXTENT_SHARED to denote extents
that are shared with other inodes.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 include/linux/fiemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 934e22d65801..d830747f5c0b 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -62,5 +62,7 @@ struct fiemap {
 #define FIEMAP_EXTENT_MERGED		0x00001000 /* File does not natively
 						    * support extents. Result
 						    * merged for efficiency. */
+#define FIEMAP_EXTENT_SHARED		0x00002000 /* Space shared with other
+						    * files. */
 
 #endif /* _LINUX_FIEMAP_H */
-- 
cgit v1.2.3


From 622e99bf0d54c4517cb0524540cd77257db8621a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 18 Dec 2009 17:43:20 +0100
Subject: [S390] rename NT_PRXSTATUS to NT_S390_HIGHREGS

The elf notes number for the upper register halves is s390 specific.
Change the name of the elf notes to include S390.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 2 +-
 include/linux/elf.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 653c6a178740..13815d39f7dd 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -959,7 +959,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_fpregs_set,
 	},
 	[REGSET_GENERAL_EXTENDED] = {
-		.core_note_type = NT_PRXSTATUS,
+		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 90a4ed0ea0e5..0cc4d55151b7 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -361,7 +361,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
-#define NT_PRXSTATUS	0x300		/* s390 upper register halves */
+#define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 13:55:48 +0100
Subject: mac80211: fix peer HT capabilities

I noticed yesterday, because Jeff had noticed
a speed regression, cf. bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138
that the SM PS settings for peers were wrong.
Instead of overwriting the SM PS settings with
the local bits, we need to keep the remote bits.

The bug was part of the original HT code from
over two years ago, but unfortunately nobody
noticed that it makes no sense -- we shouldn't
be overwriting the peer's setting with our own
but rather keep it intact when masking the peer
capabilities with our own.

While fixing that, I noticed that the masking of
capabilities is completely useless for most of
the bits, so also fix those other bits.

Finally, I also noticed that PSMP_SUPPORT no
longer exists in the final 802.11n version, so
also remove that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800lib.c |  3 +--
 include/linux/ieee80211.h               |  2 +-
 net/mac80211/ht.c                       | 25 ++++++++++++++++++++++---
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 6bf6c0f12f35..27bf887f1453 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -2080,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	    IEEE80211_HT_CAP_SGI_20 |
 	    IEEE80211_HT_CAP_SGI_40 |
 	    IEEE80211_HT_CAP_TX_STBC |
-	    IEEE80211_HT_CAP_RX_STBC |
-	    IEEE80211_HT_CAP_PSMP_SUPPORT;
+	    IEEE80211_HT_CAP_RX_STBC;
 	spec->ht.ampdu_factor = 3;
 	spec->ht.ampdu_density = 4;
 	spec->ht.mcs.tx_params =
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9724a28c0c2..163c840437d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -832,7 +832,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
 #define IEEE80211_HT_CAP_DSSSCCK40		0x1000
-#define IEEE80211_HT_CAP_PSMP_SUPPORT		0x2000
+#define IEEE80211_HT_CAP_RESERVED		0x2000
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..d7dcee680728 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	ht_cap->ht_supported = true;
 
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap;
-	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	/*
+	 * The bits listed in this expression should be
+	 * the same for the peer and us, if the station
+	 * advertises more then we can't use those thus
+	 * we mask them out.
+	 */
+	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(sband->ht_cap.cap |
+		 ~(IEEE80211_HT_CAP_LDPC_CODING |
+		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		   IEEE80211_HT_CAP_GRN_FLD |
+		   IEEE80211_HT_CAP_SGI_20 |
+		   IEEE80211_HT_CAP_SGI_40 |
+		   IEEE80211_HT_CAP_DSSSCCK40));
+	/*
+	 * The STBC bits are asymmetric -- if we don't have
+	 * TX then mask out the peer's RX and vice versa.
+	 */
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
 	ht_cap->ampdu_factor =
-- 
cgit v1.2.3


From 482928d59db668b8d82a48717f78986d8cea72e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:10:39 -0500
Subject: Fix f_flags/f_mode in case of lookup_instantiate_filp() from
 open(pathname, 3)

Just set f_flags when shoving struct file into nameidata; don't
postpone that until __dentry_open().  do_filp_open() has correct
value; lookup_instantiate_filp() doesn't - we lose the difference
between O_RDWR and 3 by that point.

We still set .intent.open.flags, so no fs code needs to be changed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         |  7 +++++++
 fs/namei.c            |  6 ++++--
 fs/open.c             | 13 ++++++-------
 include/linux/namei.h |  2 --
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index f67cd141d9a8..e96a1667d749 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,3 +85,10 @@ extern struct file *get_empty_filp(void);
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+
+/*
+ * open.c
+ */
+struct nameidata;
+extern struct file *nameidata_to_filp(struct nameidata *);
+extern void release_open_intent(struct nameidata *);
diff --git a/fs/namei.c b/fs/namei.c
index dad4b80257db..d517f73aa36b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1640,6 +1640,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		if (filp == NULL)
 			return ERR_PTR(-ENFILE);
 		nd.intent.open.file = filp;
+		filp->f_flags = open_flag;
 		nd.intent.open.flags = flag;
 		nd.intent.open.create_mode = 0;
 		error = do_path_lookup(dfd, pathname,
@@ -1685,6 +1686,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (filp == NULL)
 		goto exit_parent;
 	nd.intent.open.file = filp;
+	filp->f_flags = open_flag;
 	nd.intent.open.flags = flag;
 	nd.intent.open.create_mode = mode;
 	dir = nd.path.dentry;
@@ -1725,7 +1727,7 @@ do_last:
 			mnt_drop_write(nd.path.mnt);
 			goto exit;
 		}
-		filp = nameidata_to_filp(&nd, open_flag);
+		filp = nameidata_to_filp(&nd);
 		mnt_drop_write(nd.path.mnt);
 		if (nd.root.mnt)
 			path_put(&nd.root);
@@ -1789,7 +1791,7 @@ ok:
 			mnt_drop_write(nd.path.mnt);
 		goto exit;
 	}
-	filp = nameidata_to_filp(&nd, open_flag);
+	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
 			       (MAY_READ | MAY_WRITE | MAY_EXEC));
diff --git a/fs/open.c b/fs/open.c
index ca69241796bd..6daee28f6e8f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -821,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					int flags, struct file *f,
+					struct file *f,
 					int (*open)(struct inode *, struct file *),
 					const struct cred *cred)
 {
 	struct inode *inode;
 	int error;
 
-	f->f_flags = flags;
-	f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
@@ -930,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
 	if (IS_ERR(dentry))
 		goto out_err;
 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
-					     nd->intent.open.flags - 1,
 					     nd->intent.open.file,
 					     open, cred);
 out:
@@ -949,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
  *
  * Note that this function destroys the original nameidata
  */
-struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+struct file *nameidata_to_filp(struct nameidata *nd)
 {
 	const struct cred *cred = current_cred();
 	struct file *filp;
@@ -958,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL)
-		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
+		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
 				     NULL, cred);
 	else
 		path_put(&nd->path);
@@ -997,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 		return ERR_PTR(error);
 	}
 
-	return __dentry_open(dentry, mnt, flags, f, NULL, cred);
+	f->f_flags = flags;
+	return __dentry_open(dentry, mnt, f, NULL, cred);
 }
 EXPORT_SYMBOL(dentry_open);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 028946750289..05b441d93642 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,8 +72,6 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
-extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
-extern void release_open_intent(struct nameidata *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
-- 
cgit v1.2.3


From 5300990c0370e804e49d9a59d928c5d53fb73487 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:15:07 -0500
Subject: Sanitize f_flags helpers

* pull ACC_MODE to fs.h; we have several copies all over the place
* nightmarish expression calculating f_mode by f_flags deserves a helper
too (OPEN_FMODE(flags))

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c       | 10 +---------
 fs/namei.c             |  2 --
 fs/open.c              |  2 +-
 include/linux/fs.h     |  3 +++
 kernel/auditsc.c       |  1 -
 security/tomoyo/file.c |  1 -
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 598237e97221..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -89,19 +89,11 @@ struct file *anon_inode_getfile(const char *name,
 	struct qstr this;
 	struct path path;
 	struct file *file;
-	fmode_t mode;
 	int error;
 
 	if (IS_ERR(anon_inode_inode))
 		return ERR_PTR(-ENODEV);
 
-	switch (flags & O_ACCMODE) {
-	case O_RDONLY: mode = FMODE_READ;		break;
-	case O_WRONLY: mode = FMODE_WRITE;		break;
-	case O_RDWR:   mode = FMODE_READ | FMODE_WRITE;	break;
-	default:       return ERR_PTR(-EINVAL);
-	}
-
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -129,7 +121,7 @@ struct file *anon_inode_getfile(const char *name,
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(&path, mode, fops);
+	file = alloc_file(&path, OPEN_FMODE(flags), fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
diff --git a/fs/namei.c b/fs/namei.c
index d517f73aa36b..68921d9b5302 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -37,8 +37,6 @@
 
 #include "internal.h"
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
-
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
diff --git a/fs/open.c b/fs/open.c
index 6daee28f6e8f..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -828,7 +828,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	struct inode *inode;
 	int error;
 
-	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cca191933ff6..9e13b533aaef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2464,5 +2464,8 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f0198..fc0f928167e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
 #endif
 };
 
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 static inline int open_arg(int flags, int mask)
 {
 	int n = ACC_MODE(flags);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 8346938809b1..9a6c58881c0a 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -12,7 +12,6 @@
 #include "common.h"
 #include "tomoyo.h"
 #include "realpath.h"
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /*
  * tomoyo_globally_readable_file_entry is a structure which is used for holding
-- 
cgit v1.2.3


From 95ebc3a7930d5965b00bbedbf36bfd3eb9124d65 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 28 Oct 2009 01:46:33 +0100
Subject: Remove obsolete comment in fs.h

This question was determined to be a bug which was fixed in
commit 4a3b0a49.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Jan Blunck <jblunck@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9e13b533aaef..7e3012e0ac06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1624,8 +1624,6 @@ struct super_operations {
  *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
- * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
- *    I_CLEAR?  If not, why?
  */
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
-- 
cgit v1.2.3


From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:26 -0800
Subject: kfifo: move struct kfifo in place

This is a new generic kernel FIFO implementation.

The current kernel fifo API is not very widely used, because it has to
many constrains.  Only 17 files in the current 2.6.31-rc5 used it.
FIFO's are like list's a very basic thing and a kfifo API which handles
the most use case would save a lot of development time and memory
resources.

I think this are the reasons why kfifo is not in use:

 - The API is to simple, important functions are missing
 - A fifo can be only allocated dynamically
 - There is a requirement of a spinlock whether you need it or not
 - There is no support for data records inside a fifo

So I decided to extend the kfifo in a more generic way without blowing up
the API to much.  The new API has the following benefits:

 - Generic usage: For kernel internal use and/or device driver.
 - Provide an API for the most use case.
 - Slim API: The whole API provides 25 functions.
 - Linux style habit.
 - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros
 - Direct copy_to_user from the fifo and copy_from_user into the fifo.
 - The kfifo itself is an in place member of the using data structure, this save an
   indirection access and does not waste the kernel allocator.
 - Lockless access: if only one reader and one writer is active on the fifo,
   which is the common use case, no additional locking is necessary.
 - Remove spinlock - give the user the freedom of choice what kind of locking to use if
   one is required.
 - Ability to handle records. Three type of records are supported:
   - Variable length records between 0-255 bytes, with a record size
     field of 1 bytes.
   - Variable length records between 0-65535 bytes, with a record size
     field of 2 bytes.
   - Fixed size records, which no record size field.
 - Preserve memory resource.
 - Performance!
 - Easy to use!

This patch:

Since most users want to have the kfifo as part of another object,
reorganize the code to allow including struct kfifo in another data
structure.  This requires changing the kfifo_alloc and kfifo_init
prototypes so that we pass an existing kfifo pointer into them.  This
patch changes the implementation and all existing users.

[akpm@linux-foundation.org: fix warning]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 21 +++++-----
 drivers/char/sonypi.c                       | 40 +++++++++---------
 drivers/infiniband/hw/cxgb3/cxio_hal.h      |  9 ++--
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 60 +++++++++++++-------------
 drivers/media/video/meye.c                  | 48 ++++++++++-----------
 drivers/media/video/meye.h                  |  4 +-
 drivers/net/wireless/libertas/cmd.c         |  4 +-
 drivers/net/wireless/libertas/dev.h         |  4 +-
 drivers/net/wireless/libertas/main.c        | 16 ++++---
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++----
 drivers/platform/x86/sony-laptop.c          | 46 ++++++++++----------
 drivers/scsi/libiscsi.c                     | 22 ++++------
 drivers/scsi/libiscsi_tcp.c                 | 29 +++++++------
 drivers/scsi/libsrp.c                       | 13 +++---
 drivers/usb/host/fhci-sched.c               | 10 ++---
 drivers/usb/host/fhci-tds.c                 | 35 ++++++++--------
 drivers/usb/host/fhci.h                     | 10 ++---
 drivers/usb/serial/usb-serial.c             |  5 +--
 include/linux/kfifo.h                       | 11 ++---
 include/scsi/libiscsi.h                     |  3 +-
 include/scsi/libiscsi_tcp.h                 |  2 +-
 include/scsi/libsrp.h                       |  2 +-
 kernel/kfifo.c                              | 65 +++++++++++++++--------------
 net/dccp/probe.c                            | 20 ++++-----
 24 files changed, 238 insertions(+), 259 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index d3400b20444f..0f39bec28b45 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -358,7 +358,7 @@ struct port {
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
-	struct kfifo *fifo_ul;
+	struct kfifo fifo_ul;
 	void __iomem *dl_addr[2];
 	u32 dl_size[2];
 	u8 toggle_dl;
@@ -685,8 +685,8 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			dc->port[i].fifo_ul =
-			    kfifo_alloc(FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+			kfifo_alloc(&dc->port[i].fifo_ul,
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(port->fifo_ul, dc->send_buf,
+	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(dc->port[port].fifo_ul)) {
+		if (__kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(dc->port[port].fifo_ul));
+				__kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1536,8 +1536,7 @@ static void __devexit nozomi_card_exit(struct pci_dev *pdev)
 	free_irq(pdev->irq, dc);
 
 	for (i = 0; i < MAX_PORT; i++)
-		if (dc->port[i].fifo_ul)
-			kfifo_free(dc->port[i].fifo_ul);
+		kfifo_free(&dc->port[i].fifo_ul);
 
 	kfree(dc->send_buf);
 
@@ -1673,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(port->fifo_ul, (unsigned char *)buffer, count);
+	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1721,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
+	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1878,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(port->fifo_ul);
+	rval = __kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 8c262aaf7c26..9e6efb1f029f 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -487,7 +487,7 @@ static struct sonypi_device {
 	int camera_power;
 	int bluetooth_power;
 	struct mutex lock;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	wait_queue_head_t fifo_proc_list;
 	struct fasync_struct *fifo_async;
@@ -496,7 +496,7 @@ static struct sonypi_device {
 	struct input_dev *input_jog_dev;
 	struct input_dev *input_key_dev;
 	struct work_struct input_work;
-	struct kfifo *input_fifo;
+	struct kfifo input_fifo;
 	spinlock_t input_fifo_lock;
 } sonypi_device;
 
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -827,7 +827,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(sonypi_device.input_fifo,
+		kfifo_put(&sonypi_device.input_fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 		schedule_work(&sonypi_device.input_work);
 	}
@@ -880,7 +880,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -906,7 +906,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	mutex_lock(&sonypi_device.lock);
 	/* Flush input queue on first open */
 	if (!sonypi_device.open_count)
-		kfifo_reset(sonypi_device.fifo);
+		kfifo_reset(&sonypi_device.fifo);
 	sonypi_device.open_count++;
 	mutex_unlock(&sonypi_device.lock);
 	unlock_kernel();
@@ -919,17 +919,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_device.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_device.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_device.fifo_proc_list,
-				       kfifo_len(sonypi_device.fifo) != 0);
+				       kfifo_len(&sonypi_device.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -946,7 +946,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_device.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_device.fifo))
+	if (kfifo_len(&sonypi_device.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -1313,11 +1313,11 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_device.fifo_lock);
-	if (IS_ERR(sonypi_device.fifo)) {
+	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_device.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_device.fifo_proc_list);
@@ -1393,12 +1393,10 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		}
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
-		sonypi_device.input_fifo =
-			kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
-				    &sonypi_device.input_fifo_lock);
-		if (IS_ERR(sonypi_device.input_fifo)) {
+		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
+				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-			error = PTR_ERR(sonypi_device.input_fifo);
 			goto err_inpdev_unregister;
 		}
 
@@ -1423,7 +1421,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		pci_disable_device(pcidev);
  err_put_pcidev:
 	pci_dev_put(pcidev);
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return error;
 }
@@ -1438,7 +1436,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
 		input_unregister_device(sonypi_device.input_jog_dev);
-		kfifo_free(sonypi_device.input_fifo);
+		kfifo_free(&sonypi_device.input_fifo);
 	}
 
 	misc_deregister(&sonypi_misc_device);
@@ -1451,7 +1449,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 		pci_dev_put(sonypi_device.dev);
 	}
 
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index bfd03bf8be54..f3d440cc68f2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -34,6 +34,7 @@
 
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/kfifo.h>
 
 #include "t3_cpl.h"
 #include "t3cdev.h"
@@ -75,13 +76,13 @@ struct cxio_hal_ctrl_qp {
 };
 
 struct cxio_hal_resource {
-	struct kfifo *tpt_fifo;
+	struct kfifo tpt_fifo;
 	spinlock_t tpt_fifo_lock;
-	struct kfifo *qpid_fifo;
+	struct kfifo qpid_fifo;
 	spinlock_t qpid_fifo_lock;
-	struct kfifo *cqid_fifo;
+	struct kfifo cqid_fifo;
 	spinlock_t cqid_fifo_lock;
-	struct kfifo *pdid_fifo;
+	struct kfifo pdid_fifo;
 	spinlock_t pdid_fifo_lock;
 };
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index bd233c087653..65072bdfc1bf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -39,12 +39,12 @@
 #include "cxio_resource.h"
 #include "cxio_hal.h"
 
-static struct kfifo *rhdl_fifo;
+static struct kfifo rhdl_fifo;
 static spinlock_t rhdl_fifo_lock;
 
 #define RANDOM_SIZE 16
 
-static int __cxio_init_resource_fifo(struct kfifo **fifo,
+static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				   spinlock_t *fifo_lock,
 				   u32 nr, u32 skip_low,
 				   u32 skip_high,
@@ -55,12 +55,11 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	*fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
-	if (IS_ERR(*fifo))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -72,33 +71,33 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
 	return 0;
 }
 
-static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
 	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
 					  skip_high, 0));
 }
 
-static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+static int cxio_init_resource_fifo_random(struct kfifo *fifo,
 				   spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
@@ -113,15 +112,14 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
-	rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
 					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock);
-	if (IS_ERR(rdev_p->rscp->qpid_fifo))
+					      &rdev_p->rscp->qpid_fifo_lock))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(rdev_p->rscp->qpid_fifo,
+			__kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -134,7 +132,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
 
 void cxio_hal_destroy_rhdl_resource(void)
 {
-	kfifo_free(rhdl_fifo);
+	kfifo_free(&rhdl_fifo);
 }
 
 /* nr_* must be power of 2 */
@@ -167,11 +165,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
 		goto pdid_err;
 	return 0;
 pdid_err:
-	kfifo_free(rscp->cqid_fifo);
+	kfifo_free(&rscp->cqid_fifo);
 cqid_err:
-	kfifo_free(rscp->qpid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
 qpid_err:
-	kfifo_free(rscp->tpt_fifo);
+	kfifo_free(&rscp->tpt_fifo);
 tpt_err:
 	return -ENOMEM;
 }
@@ -195,17 +193,17 @@ static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -213,35 +211,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
 {
-	kfifo_free(rscp->tpt_fifo);
-	kfifo_free(rscp->cqid_fifo);
-	kfifo_free(rscp->qpid_fifo);
-	kfifo_free(rscp->pdid_fifo);
+	kfifo_free(&rscp->tpt_fifo);
+	kfifo_free(&rscp->cqid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
+	kfifo_free(&rscp->pdid_fifo);
 	kfree(rscp);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 6ffa64cd1c6d..dacbbb839b9e 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,7 +820,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -831,7 +831,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -859,8 +859,8 @@ static int meye_open(struct file *file)
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 	return 0;
 }
 
@@ -933,7 +933,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +965,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1452,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1467,18 +1467,18 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 
 	mutex_lock(&meye.lock);
 
-	if (kfifo_len(meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
+	if (kfifo_len(&meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
 		mutex_unlock(&meye.lock);
 		return -EAGAIN;
 	}
 
 	if (wait_event_interruptible(meye.proc_list,
-				     kfifo_len(meye.doneq) != 0) < 0) {
+				     kfifo_len(&meye.doneq) != 0) < 0) {
 		mutex_unlock(&meye.lock);
 		return -EINTR;
 	}
 
-	if (!kfifo_get(meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int))) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
@@ -1529,8 +1529,8 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 {
 	mutex_lock(&meye.lock);
 	mchip_hic_stop();
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
@@ -1572,7 +1572,7 @@ static unsigned int meye_poll(struct file *file, poll_table *wait)
 
 	mutex_lock(&meye.lock);
 	poll_wait(file, &meye.proc_list, wait);
-	if (kfifo_len(meye.doneq))
+	if (kfifo_len(&meye.doneq))
 		res = POLLIN | POLLRDNORM;
 	mutex_unlock(&meye.lock);
 	return res;
@@ -1745,16 +1745,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	meye.grabq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock);
-	if (IS_ERR(meye.grabq)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.grabq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	meye.doneq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock);
-	if (IS_ERR(meye.doneq)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.doneq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
@@ -1868,9 +1866,9 @@ outregions:
 outenabledev:
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 outsonypienable:
-	kfifo_free(meye.doneq);
+	kfifo_free(&meye.doneq);
 outkfifoalloc2:
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.grabq);
 outkfifoalloc1:
 	vfree(meye.grab_temp);
 outvmalloc:
@@ -1901,8 +1899,8 @@ static void __devexit meye_remove(struct pci_dev *pcidev)
 
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 
-	kfifo_free(meye.doneq);
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.doneq);
+	kfifo_free(&meye.grabq);
 
 	vfree(meye.grab_temp);
 
diff --git a/drivers/media/video/meye.h b/drivers/media/video/meye.h
index 5f70a106ba2b..1321ad5d6597 100644
--- a/drivers/media/video/meye.h
+++ b/drivers/media/video/meye.h
@@ -303,9 +303,9 @@ struct meye {
 	struct meye_grab_buffer grab_buffer[MEYE_MAX_BUFNBRS];
 	int vma_use_count[MEYE_MAX_BUFNBRS]; /* mmap count */
 	struct mutex lock;		/* mutex for open/mmap... */
-	struct kfifo *grabq;		/* queue for buffers to be grabbed */
+	struct kfifo grabq;		/* queue for buffers to be grabbed */
 	spinlock_t grabq_lock;		/* lock protecting the queue */
-	struct kfifo *doneq;		/* queue for grabbed buffers */
+	struct kfifo doneq;		/* queue for grabbed buffers */
 	spinlock_t doneq_lock;		/* lock protecting the queue */
 	wait_queue_head_t proc_list;	/* wait queue */
 	struct video_device *video_dev;	/* video device parameters */
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index b9b371bfa30f..ffed17f4f506 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h
index 6a8d2b291d8c..05bb298dfae9 100644
--- a/drivers/net/wireless/libertas/dev.h
+++ b/drivers/net/wireless/libertas/dev.h
@@ -10,7 +10,7 @@
 #include "scan.h"
 #include "assoc.h"
 
-
+#include <linux/kfifo.h>
 
 /** sleep_params */
 struct sleep_params {
@@ -120,7 +120,7 @@ struct lbs_private {
 	u32 resp_len[2];
 
 	/* Events sent from hardware to driver */
-	struct kfifo *event_fifo;
+	struct kfifo event_fifo;
 
 	/** thread to service interrupts */
 	struct task_struct *main_thread;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index db38a5a719fa..403909287414 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(priv->event_fifo))
+		else if (__kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(priv->event_fifo)) {
+		while (__kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(priv->event_fifo, (unsigned char *) &event,
+			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -883,10 +883,9 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	priv->event_fifo = kfifo_alloc(sizeof(u32) * 16, GFP_KERNEL, NULL);
-	if (IS_ERR(priv->event_fifo)) {
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -901,8 +900,7 @@ static void lbs_free_adapter(struct lbs_private *priv)
 	lbs_deb_enter(LBS_DEB_MAIN);
 
 	lbs_free_cmd_buffer(priv);
-	if (priv->event_fifo)
-		kfifo_free(priv->event_fifo);
+	kfifo_free(&priv->event_fifo);
 	del_timer(&priv->command_timer);
 	del_timer(&priv->auto_deepsleep_timer);
 	kfree(priv->networks);
@@ -1177,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index bcd4ba8be7db..f999fba0e25e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -164,7 +164,7 @@ struct fujitsu_hotkey_t {
 	struct input_dev *input;
 	char phys[32];
 	struct platform_device *pf_device;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	int rfkill_supported;
 	int rfkill_state;
@@ -824,12 +824,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	fujitsu_hotkey->fifo =
-	    kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL,
-			&fujitsu_hotkey->fifo_lock);
-	if (IS_ERR(fujitsu_hotkey->fifo)) {
+	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
-		error = PTR_ERR(fujitsu_hotkey->fifo);
 		goto err_stop;
 	}
 
@@ -934,7 +932,7 @@ err_unregister_input_dev:
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 err_stop:
 	return result;
 }
@@ -956,7 +954,7 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
 
 	input_free_device(input);
 
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 
 	fujitsu_hotkey->acpi_handle = NULL;
 
@@ -1008,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(fujitsu_hotkey->fifo,
+				status = kfifo_put(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode));
 				if (status != sizeof(keycode)) {
@@ -1022,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			} else if (keycode == 0) {
 				while ((status =
 					kfifo_get
-					(fujitsu_hotkey->fifo, (unsigned char *)
+					(&fujitsu_hotkey->fifo, (unsigned char *)
 					 &keycode_r,
 					 sizeof
 					 (keycode_r))) == sizeof(keycode_r)) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 7a2cc8a5c975..04625a048e74 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -142,7 +142,7 @@ struct sony_laptop_input_s {
 	atomic_t		users;
 	struct input_dev	*jog_dev;
 	struct input_dev	*key_dev;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	struct workqueue_struct	*wq;
 };
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -362,7 +362,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(sony_laptop_input.fifo,
+		kfifo_put(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 
 		if (!work_pending(&sony_laptop_release_key_work))
@@ -385,12 +385,11 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
-	sony_laptop_input.fifo =
-		kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 			    &sony_laptop_input.fifo_lock);
-	if (IS_ERR(sony_laptop_input.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		error = PTR_ERR(sony_laptop_input.fifo);
 		goto err_dec_users;
 	}
 
@@ -474,7 +473,7 @@ err_destroy_wq:
 	destroy_workqueue(sony_laptop_input.wq);
 
 err_free_kfifo:
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 
 err_dec_users:
 	atomic_dec(&sony_laptop_input.users);
@@ -500,7 +499,7 @@ static void sony_laptop_remove_input(void)
 	}
 
 	destroy_workqueue(sony_laptop_input.wq);
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 }
 
 /*********** Platform Device ***********/
@@ -2079,7 +2078,7 @@ static struct attribute_group spic_attribute_group = {
 
 struct sonypi_compat_s {
 	struct fasync_struct	*fifo_async;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	wait_queue_head_t	fifo_proc_list;
 	atomic_t		open_count;
@@ -2104,12 +2103,12 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	/* Flush input queue on first open */
 	unsigned long flags;
 
-	spin_lock_irqsave(sonypi_compat.fifo->lock, flags);
+	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(sonypi_compat.fifo);
+		__kfifo_reset(&sonypi_compat.fifo);
 
-	spin_unlock_irqrestore(sonypi_compat.fifo->lock, flags);
+	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
 	return 0;
 }
@@ -2120,17 +2119,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_compat.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_compat.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_compat.fifo_proc_list,
-				       kfifo_len(sonypi_compat.fifo) != 0);
+				       kfifo_len(&sonypi_compat.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2147,7 +2146,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_compat.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_compat.fifo))
+	if (kfifo_len(&sonypi_compat.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -2309,7 +2308,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,11 +2318,12 @@ static int sonypi_compat_init(void)
 	int error;
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
-	sonypi_compat.fifo = kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_compat.fifo_lock);
-	if (IS_ERR(sonypi_compat.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_compat.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_compat.fifo_proc_list);
@@ -2342,14 +2342,14 @@ static int sonypi_compat_init(void)
 	return 0;
 
 err_free_kfifo:
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 	return error;
 }
 
 static void sonypi_compat_exit(void)
 {
 	misc_deregister(&sonypi_misc_device);
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 }
 #else
 static int sonypi_compat_init(void) { return 0; }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b7689f3d05f5..cf0aa7e90be9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
+	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(session->cmdpool.queue,
+		if (!__kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(conn->session->cmdpool.queue,
+	if (!__kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2461,12 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-			      GFP_KERNEL, NULL);
-	if (IS_ERR(q->queue)) {
-		q->queue = NULL;
-		goto enomem;
-	}
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
@@ -2474,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2497,7 +2492,6 @@ void iscsi_pool_free(struct iscsi_pool *q)
 	for (i = 0; i < q->max; i++)
 		kfree(q->pool[i]);
 	kfree(q->pool);
-	kfree(q->queue);
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
@@ -2825,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(session->cmdpool.queue,
+	if (!__kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2845,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2908,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index ca25ee5190b0..a83ee56a185e 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
+	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(tcp_task->r2tpool.queue,
+				__kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(tcp_task->r2tqueue,
+			__kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
@@ -1127,9 +1127,8 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 		}
 
 		/* R2T xmit queue */
-		tcp_task->r2tqueue = kfifo_alloc(
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
-		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
+		if (kfifo_alloc(&tcp_task->r2tqueue,
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
@@ -1142,7 +1141,7 @@ r2t_alloc_fail:
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 	return -ENOMEM;
@@ -1157,7 +1156,7 @@ void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 }
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 9ad38e81e343..b1b5e51ca8e3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,19 +58,16 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	q->queue = kfifo_init((void *) q->pool, max * sizeof(void *),
-			      GFP_KERNEL, &q->lock);
-	if (IS_ERR(q->queue))
-		goto free_item;
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
+			      &q->lock);
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(q->queue, (void *) &iue, sizeof(void *));
+		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
 	return 0;
 
-free_item:
 	kfree(q->items);
 free_pool:
 	kfree(q->pool);
@@ -167,7 +164,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -179,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index 00a29855d0c4..ff43747a614f 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -37,7 +37,7 @@ static void recycle_frame(struct fhci_usb *usb, struct packet *pkt)
 	pkt->info = 0;
 	pkt->priv_data = NULL;
 
-	cq_put(usb->ep0->empty_frame_Q, pkt);
+	cq_put(&usb->ep0->empty_frame_Q, pkt);
 }
 
 /* confirm submitted packet */
@@ -57,7 +57,7 @@ void fhci_transaction_confirm(struct fhci_usb *usb, struct packet *pkt)
 		if ((td->data + td->actual_len) && trans_len)
 			memcpy(td->data + td->actual_len, pkt->data,
 			       trans_len);
-		cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+		cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 	}
 
 	recycle_frame(usb, pkt);
@@ -213,7 +213,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 	}
 
 	/* update frame object fields before transmitting */
-	pkt = cq_get(usb->ep0->empty_frame_Q);
+	pkt = cq_get(&usb->ep0->empty_frame_Q);
 	if (!pkt) {
 		fhci_dbg(usb->fhci, "there is no empty frame\n");
 		return -1;
@@ -222,7 +222,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 
 	pkt->info = 0;
 	if (data == NULL) {
-		data = cq_get(usb->ep0->dummy_packets_Q);
+		data = cq_get(&usb->ep0->dummy_packets_Q);
 		BUG_ON(!data);
 		pkt->info = PKT_DUMMY_PACKET;
 	}
@@ -246,7 +246,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 		list_del_init(&td->frame_lh);
 		td->status = USB_TD_OK;
 		if (pkt->info & PKT_DUMMY_PACKET)
-			cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+			cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 		recycle_frame(usb, pkt);
 		usb->actual_frame->total_bytes -= (len + PROTOCOL_OVERHEAD);
 		fhci_err(usb->fhci, "host transaction failed\n");
diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index b40332290319..d224ab467a40 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -106,33 +106,33 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cpm_muram_free(cpm_muram_offset(ep->td_base));
 
 		if (ep->conf_frame_Q) {
-			size = cq_howmany(ep->conf_frame_Q);
+			size = cq_howmany(&ep->conf_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->conf_frame_Q);
+				struct packet *pkt = cq_get(&ep->conf_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->conf_frame_Q);
+			cq_delete(&ep->conf_frame_Q);
 		}
 
 		if (ep->empty_frame_Q) {
-			size = cq_howmany(ep->empty_frame_Q);
+			size = cq_howmany(&ep->empty_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->empty_frame_Q);
+				struct packet *pkt = cq_get(&ep->empty_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->empty_frame_Q);
+			cq_delete(&ep->empty_frame_Q);
 		}
 
 		if (ep->dummy_packets_Q) {
-			size = cq_howmany(ep->dummy_packets_Q);
+			size = cq_howmany(&ep->dummy_packets_Q);
 			for (; size; size--) {
-				u8 *buff = cq_get(ep->dummy_packets_Q);
+				u8 *buff = cq_get(&ep->dummy_packets_Q);
 
 				kfree(buff);
 			}
-			cq_delete(ep->dummy_packets_Q);
+			cq_delete(&ep->dummy_packets_Q);
 		}
 
 		kfree(ep);
@@ -175,10 +175,9 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 	ep->td_base = cpm_muram_addr(ep_offset);
 
 	/* zero all queue pointers */
-	ep->conf_frame_Q = cq_new(ring_len + 2);
-	ep->empty_frame_Q = cq_new(ring_len + 2);
-	ep->dummy_packets_Q = cq_new(ring_len + 2);
-	if (!ep->conf_frame_Q || !ep->empty_frame_Q || !ep->dummy_packets_Q) {
+	if (cq_new(&ep->conf_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->empty_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->dummy_packets_Q, ring_len + 2)) {
 		err_for = "frame_queues";
 		goto err;
 	}
@@ -199,8 +198,8 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 			err_for = "buffer";
 			goto err;
 		}
-		cq_put(ep->empty_frame_Q, pkt);
-		cq_put(ep->dummy_packets_Q, buff);
+		cq_put(&ep->empty_frame_Q, pkt);
+		cq_put(&ep->dummy_packets_Q, buff);
 	}
 
 	/* we put the endpoint parameter RAM right behind the TD ring */
@@ -319,7 +318,7 @@ static void fhci_td_transaction_confirm(struct fhci_usb *usb)
 		if ((buf == DUMMY2_BD_BUFFER) && !(td_status & ~TD_W))
 			continue;
 
-		pkt = cq_get(ep->conf_frame_Q);
+		pkt = cq_get(&ep->conf_frame_Q);
 		if (!pkt)
 			fhci_err(usb->fhci, "no frame to confirm\n");
 
@@ -460,9 +459,9 @@ u32 fhci_host_transaction(struct fhci_usb *usb,
 		out_be16(&td->length, pkt->len);
 
 	/* put the frame to the confirmation queue */
-	cq_put(ep->conf_frame_Q, pkt);
+	cq_put(&ep->conf_frame_Q, pkt);
 
-	if (cq_howmany(ep->conf_frame_Q) == 1)
+	if (cq_howmany(&ep->conf_frame_Q) == 1)
 		out_8(&usb->fhci->regs->usb_comm, USB_CMD_STR_FIFO);
 
 	return 0;
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 7116284ed21a..2277428ef5d3 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -423,9 +423,9 @@ struct endpoint {
 	struct usb_td __iomem *td_base; /* first TD in the ring */
 	struct usb_td __iomem *conf_td; /* next TD for confirm after transac */
 	struct usb_td __iomem *empty_td;/* next TD for new transaction req. */
-	struct kfifo *empty_frame_Q;  /* Empty frames list to use */
-	struct kfifo *conf_frame_Q;   /* frames passed to TDs,waiting for tx */
-	struct kfifo *dummy_packets_Q;/* dummy packets for the CRC overun */
+	struct kfifo empty_frame_Q;  /* Empty frames list to use */
+	struct kfifo conf_frame_Q;   /* frames passed to TDs,waiting for tx */
+	struct kfifo dummy_packets_Q;/* dummy packets for the CRC overun */
 
 	bool already_pushed_dummy_bd;
 };
@@ -493,9 +493,9 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 }
 
 /* fifo of pointers */
-static inline struct kfifo *cq_new(int size)
+static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 4543f359be75..44b72d47fac2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,9 +939,8 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
-			&port->lock);
-		if (IS_ERR(port->write_fifo))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
+			&port->lock))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index ad6bdf5a5970..c3f8d82efd34 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,10 +33,10 @@ struct kfifo {
 	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
-extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-				gfp_t gfp_mask, spinlock_t *lock);
-extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
-				 spinlock_t *lock);
+extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+			unsigned int size, spinlock_t *lock);
+extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
+			gfp_t gfp_mask, spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 7394e3bc8f4b..ff92b46f5153 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kfifo.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
 #include <scsi/scsi_transport_iscsi.h>
@@ -231,7 +232,7 @@ struct iscsi_conn {
 };
 
 struct iscsi_pool {
-	struct kfifo		*queue;		/* FIFO Queue */
+	struct kfifo		queue;		/* FIFO Queue */
 	void			**pool;		/* Pool of elements */
 	int			max;		/* Max number of elements */
 };
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 9e3182e659db..741ae7ed4394 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -80,7 +80,7 @@ struct iscsi_tcp_task {
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
 	struct iscsi_pool	r2tpool;
-	struct kfifo		*r2tqueue;
+	struct kfifo		r2tqueue;
 	void			*dd_data;
 };
 
diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index ba615e4c1d7c..07e3adde21d9 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -21,7 +21,7 @@ struct srp_buf {
 struct srp_queue {
 	void *pool;
 	void *items;
-	struct kfifo *queue;
+	struct kfifo queue;
 	spinlock_t lock;
 };
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bbe..8da6bb9782bb 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,49 +27,51 @@
 #include <linux/kfifo.h>
 #include <linux/log2.h>
 
+static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+		unsigned int size, spinlock_t *lock)
+{
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->lock = lock;
+
+	kfifo_reset(fifo);
+}
+
 /**
- * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * kfifo_init - initialize a FIFO using a preallocated buffer
+ * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
- * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
- * &struct kfifo with kfree().
  */
-struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 gfp_t gfp_mask, spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
+			spinlock_t *lock)
 {
-	struct kfifo *fifo;
-
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
-	if (!fifo)
-		return ERR_PTR(-ENOMEM);
-
-	fifo->buffer = buffer;
-	fifo->size = size;
-	fifo->in = fifo->out = 0;
-	fifo->lock = lock;
-
-	return fifo;
+	_kfifo_init(fifo, buffer, size, lock);
 }
 EXPORT_SYMBOL(kfifo_init);
 
 /**
- * kfifo_alloc - allocates a new FIFO and its internal buffer
- * @size: the size of the internal buffer to be allocated.
+ * kfifo_alloc - allocates a new FIFO internal buffer
+ * @fifo: the fifo to assign then new buffer
+ * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
+ * This function dynamically allocates a new fifo internal buffer
+ *
  * The size will be rounded-up to a power of 2.
+ * The buffer will be release with kfifo_free().
+ * Return 0 if no error, otherwise the an error code
  */
-struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
+			spinlock_t *lock)
 {
 	unsigned char *buffer;
-	struct kfifo *ret;
 
 	/*
 	 * round up to the next power of 2, since our 'let the indices
@@ -80,26 +83,24 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 	}
 
 	buffer = kmalloc(size, gfp_mask);
-	if (!buffer)
-		return ERR_PTR(-ENOMEM);
-
-	ret = kfifo_init(buffer, size, gfp_mask, lock);
+	if (!buffer) {
+		_kfifo_init(fifo, 0, 0, NULL);
+		return -ENOMEM;
+	}
 
-	if (IS_ERR(ret))
-		kfree(buffer);
+	_kfifo_init(fifo, buffer, size, lock);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_alloc);
 
 /**
- * kfifo_free - frees the FIFO
+ * kfifo_free - frees the FIFO internal buffer
  * @fifo: the fifo to be freed.
  */
 void kfifo_free(struct kfifo *fifo)
 {
 	kfree(fifo->buffer);
-	kfree(fifo);
 }
 EXPORT_SYMBOL(kfifo_free);
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index dc328425fa20..6230ceb0823e 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -43,7 +43,7 @@ static int bufsize = 64 * 1024;
 static const char procname[] = "dccpprobe";
 
 static struct {
-	struct kfifo	  *fifo;
+	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
 	struct timespec	  tstart;
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(dccpw.fifo, tbuf, len);
+	kfifo_put(&dccpw.fifo, tbuf, len);
 	wake_up(&dccpw.wait);
 }
 
@@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = {
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
-	kfifo_reset(dccpw.fifo);
+	kfifo_reset(&dccpw.fifo);
 	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
@@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(dccpw.fifo) != 0);
+					 __kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,10 +156,8 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
-	if (IS_ERR(dccpw.fifo))
-		return PTR_ERR(dccpw.fifo);
-
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
@@ -172,14 +170,14 @@ static __init int dccpprobe_init(void)
 err1:
 	proc_net_remove(&init_net, procname);
 err0:
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	return ret;
 }
 module_init(dccpprobe_init);
 
 static __exit void dccpprobe_exit(void)
 {
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	proc_net_remove(&init_net, procname);
 	unregister_jprobe(&dccp_send_probe);
 
-- 
cgit v1.2.3


From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:27 -0800
Subject: kfifo: move out spinlock

Move the pointer to the spinlock out of struct kfifo.  Most users in
tree do not actually use a spinlock, so the few exceptions now have to
call kfifo_{get,put}_locked, which takes an extra argument to a
spinlock.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  2 +-
 drivers/char/sonypi.c                       | 21 ++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 36 +++++++------
 drivers/media/video/meye.c                  | 35 +++++++------
 drivers/net/wireless/libertas/main.c        |  2 +-
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++---
 drivers/platform/x86/sony-laptop.c          | 22 ++++----
 drivers/scsi/libiscsi.c                     |  2 +-
 drivers/scsi/libiscsi_tcp.c                 |  2 +-
 drivers/scsi/libsrp.c                       |  9 ++--
 drivers/usb/host/fhci.h                     |  2 +-
 drivers/usb/serial/generic.c                |  4 +-
 drivers/usb/serial/usb-serial.c             |  3 +-
 include/linux/kfifo.h                       | 80 +++++++++++++----------------
 kernel/kfifo.c                              | 17 +++---
 net/dccp/probe.c                            |  6 +--
 16 files changed, 131 insertions(+), 130 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 0f39bec28b45..935b30d80adf 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -686,7 +686,7 @@ static int nozomi_read_config_table(struct nozomi *dc)
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
 			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 9e6efb1f029f..dbcb3bd192c7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,8 +777,9 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+			 sizeof(kp), &sonypi_device.input_fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -827,8 +828,9 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(&sonypi_device.input_fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sonypi_device.input_fifo,
+			(unsigned char *)&kp, sizeof(kp),
+			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
 	}
 }
@@ -880,7 +882,8 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -929,7 +932,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -1313,8 +1317,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_device.fifo_lock);
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 		return error;
@@ -1394,7 +1397,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
 		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
-				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+				GFP_KERNEL);
 		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 			goto err_inpdev_unregister;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 65072bdfc1bf..98f24e6d906e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -55,7 +55,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
@@ -86,7 +86,8 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock);
 	return 0;
 }
 
@@ -113,8 +114,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
 	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock))
+					      GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
@@ -177,33 +177,37 @@ tpt_err:
 /*
  * returns 0 if no resource available
  */
-static u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
 }
 
-static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
+		u32 entry)
 {
-	BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+	BUG_ON(
+	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	== 0);
 }
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
+			&rscp->qpid_fifo_lock);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -211,27 +215,27 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index dacbbb839b9e..38bcedfd9fec 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,8 +800,8 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
 		}
@@ -811,7 +811,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,8 +821,8 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
 		}
@@ -831,7 +832,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -933,7 +935,8 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +968,8 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1456,8 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1478,8 +1483,8 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
-		       sizeof(int))) {
+	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
 	}
@@ -1745,14 +1750,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 403909287414..2cc7ecd8d123 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -883,7 +883,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
 	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
 		goto out;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index f999fba0e25e..13dc7bedcfce 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -825,7 +825,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
 	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
-			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+			GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
 		goto err_stop;
@@ -1006,9 +1006,10 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(&fujitsu_hotkey->fifo,
+				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
-						   sizeof(keycode));
+						   sizeof(keycode),
+						   &fujitsu_hotkey->fifo_lock);
 				if (status != sizeof(keycode)) {
 					vdbg_printk(FUJLAPTOP_DBG_WARN,
 					    "Could not push keycode [0x%x]\n",
@@ -1019,11 +1020,12 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get
-					(&fujitsu_hotkey->fifo, (unsigned char *)
-					 &keycode_r,
-					 sizeof
-					 (keycode_r))) == sizeof(keycode_r)) {
+					kfifo_get_locked(
+					 &fujitsu_hotkey->fifo,
+					 (unsigned char *) &keycode_r,
+					 sizeof(keycode_r),
+					 &fujitsu_hotkey->fifo_lock))
+					 == sizeof(keycode_r)) {
 					input_report_key(input, keycode_r, 0);
 					input_sync(input);
 					vdbg_printk(FUJLAPTOP_DBG_TRACE,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 04625a048e74..1538a0a3c0af 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,8 +300,9 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+			sizeof(kp), &sony_laptop_input.fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -362,8 +363,9 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(&sony_laptop_input.fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sony_laptop_input.fifo,
+			  (unsigned char *)&kp, sizeof(kp),
+			  &sony_laptop_input.fifo_lock);
 
 		if (!work_pending(&sony_laptop_release_key_work))
 			queue_work(sony_laptop_input.wq,
@@ -386,8 +388,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
 	error =
-	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-			    &sony_laptop_input.fifo_lock);
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		goto err_dec_users;
@@ -2129,7 +2130,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2308,7 +2310,8 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,8 +2322,7 @@ static int sonypi_compat_init(void)
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
 	error =
-	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_compat.fifo_lock);
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		return error;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index cf0aa7e90be9..1bccbc1e588e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2461,7 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*));
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index a83ee56a185e..41643c860d26 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1128,7 +1128,7 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 
 		/* R2T xmit queue */
 		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index b1b5e51ca8e3..db1b41c55fd3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,8 +58,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
-			      &q->lock);
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
 		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
@@ -164,7 +163,8 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -176,7 +176,8 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 2277428ef5d3..a76da201183b 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -495,7 +495,7 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 /* fifo of pointers */
 static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index bbe005cefcfb..61eef18218be 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put(port->write_fifo, buf, count);
+	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 44b72d47fac2..636a4f23445e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,8 +939,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
-			&port->lock))
+		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c3f8d82efd34..e0f5c9d4197d 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -30,13 +30,12 @@ struct kfifo {
 	unsigned int size;	/* the size of the allocated buffer */
 	unsigned int in;	/* data is added at offset (in % size) */
 	unsigned int out;	/* data is extracted from off. (out % size) */
-	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-			unsigned int size, spinlock_t *lock);
+			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-			gfp_t gfp_mask, spinlock_t *lock);
+			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
@@ -58,58 +57,67 @@ static inline void __kfifo_reset(struct kfifo *fifo)
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
 	__kfifo_reset(fifo);
+}
+
+/**
+ * __kfifo_len - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline unsigned int __kfifo_len(struct kfifo *fifo)
+{
+	register unsigned int	out;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	out = fifo->out;
+	smp_rmb();
+	return fifo->in - out;
 }
 
 /**
- * kfifo_put - puts some data into the FIFO
+ * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
- * @len: the length of the data to be added.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, buffer, len);
+	ret = __kfifo_put(fifo, from, n);
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
 /**
- * kfifo_get - gets some data from the FIFO
+ * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
- * @len: the size of the destination buffer.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @lock: pointer to the spinlock to use for locking.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  */
-static inline unsigned int kfifo_get(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, buffer, len);
+	ret = __kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -118,36 +126,18 @@ static inline unsigned int kfifo_get(struct kfifo *fifo,
 	if (fifo->in == fifo->out)
 		fifo->in = fifo->out = 0;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
-/**
- * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
- * @fifo: the fifo to be used.
- */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
-{
-	return fifo->in - fifo->out;
-}
-
 /**
  * kfifo_len - returns the number of bytes available in the FIFO
  * @fifo: the fifo to be used.
  */
 static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
-	ret = __kfifo_len(fifo);
-
-	spin_unlock_irqrestore(fifo->lock, flags);
-
-	return ret;
+	return __kfifo_len(fifo);
 }
 
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 8da6bb9782bb..4950bdbe3477 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,11 +28,10 @@
 #include <linux/log2.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-		unsigned int size, spinlock_t *lock)
+		unsigned int size)
 {
 	fifo->buffer = buffer;
 	fifo->size = size;
-	fifo->lock = lock;
 
 	kfifo_reset(fifo);
 }
@@ -42,16 +41,14 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @lock: the lock to be used to protect the fifo buffer
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
-			spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 }
 EXPORT_SYMBOL(kfifo_init);
 
@@ -60,7 +57,6 @@ EXPORT_SYMBOL(kfifo_init);
  * @fifo: the fifo to assign then new buffer
  * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
  *
  * This function dynamically allocates a new fifo internal buffer
  *
@@ -68,8 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  * The buffer will be release with kfifo_free().
  * Return 0 if no error, otherwise the an error code
  */
-int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
-			spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 {
 	unsigned char *buffer;
 
@@ -84,11 +79,11 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0, NULL);
+		_kfifo_init(fifo, 0, 0);
 		return -ENOMEM;
 	}
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 
 	return 0;
 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 6230ceb0823e..c6b50351aa78 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(&dccpw.fifo, tbuf, len);
+	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
+	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,7 +156,7 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
-- 
cgit v1.2.3


From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: cleanup namespace

change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo
should be reserved for internal functions only.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 12 +++++------
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 10 ++++-----
 drivers/net/wireless/libertas/cmd.c         |  4 ++--
 drivers/net/wireless/libertas/main.c        |  8 ++++----
 drivers/platform/x86/sony-laptop.c          |  2 +-
 drivers/scsi/libiscsi.c                     | 14 ++++++-------
 drivers/scsi/libiscsi_tcp.c                 | 20 +++++++++---------
 drivers/scsi/libsrp.c                       |  2 +-
 drivers/usb/host/fhci.h                     |  6 +++---
 drivers/usb/serial/generic.c                |  4 ++--
 include/linux/kfifo.h                       | 32 +++++++----------------------
 kernel/kfifo.c                              | 12 +++++------
 net/dccp/probe.c                            |  2 +-
 13 files changed, 55 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 935b30d80adf..61f5bfe74f38 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(&dc->port[port].fifo_ul)) {
+		if (kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(&dc->port[port].fifo_ul));
+				kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1720,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
+	room = port->fifo_ul.size - kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1877,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(&port->fifo_ul);
+	rval = kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 98f24e6d906e..d7d18fb02c93 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,19 +71,19 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
 		kfifo_get_locked(fifo, (unsigned char *) &entry,
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index ffed17f4f506..42611bea76a3 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2cc7ecd8d123..0622104f0a03 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(&priv->event_fifo))
+		else if (kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(&priv->event_fifo)) {
+		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 1538a0a3c0af..36e5dc6fc953 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2107,7 +2107,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(&sonypi_compat.fifo);
+		kfifo_reset(&sonypi_compat.fifo);
 
 	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1bccbc1e588e..5f0c46f43ee1 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 41643c860d26..c0be926637b1 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
+	BUG_ON(kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(&tcp_task->r2tqueue,
+			kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index db1b41c55fd3..975e448cfcb9 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index a76da201183b..96aa787f208f 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -505,19 +505,19 @@ static inline void cq_delete(struct kfifo *kfifo)
 
 static inline unsigned int cq_howmany(struct kfifo *kfifo)
 {
-	return __kfifo_len(kfifo) / sizeof(void *);
+	return kfifo_len(kfifo) / sizeof(void *);
 }
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return __kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_put(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	__kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_get(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 61eef18218be..d0a2e464cacd 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (__kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
+		room = port->write_fifo->size - kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e0f5c9d4197d..a893acda3964 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -37,34 +37,25 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int __kfifo_put(struct kfifo *fifo,
+extern unsigned int kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
-extern unsigned int __kfifo_get(struct kfifo *fifo,
+extern unsigned int kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
-/**
- * __kfifo_reset - removes the entire FIFO contents, no locking version
- * @fifo: the fifo to be emptied.
- */
-static inline void __kfifo_reset(struct kfifo *fifo)
-{
-	fifo->in = fifo->out = 0;
-}
-
 /**
  * kfifo_reset - removes the entire FIFO contents
  * @fifo: the fifo to be emptied.
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	__kfifo_reset(fifo);
+	fifo->in = fifo->out = 0;
 }
 
 /**
- * __kfifo_len - returns the number of bytes available in the FIFO
+ * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
  */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
+static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
 	register unsigned int	out;
 
@@ -92,7 +83,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, from, n);
+	ret = kfifo_put(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -117,7 +108,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, to, n);
+	ret = kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -131,13 +122,4 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 	return ret;
 }
 
-/**
- * kfifo_len - returns the number of bytes available in the FIFO
- * @fifo: the fifo to be used.
- */
-static inline unsigned int kfifo_len(struct kfifo *fifo)
-{
-	return __kfifo_len(fifo);
-}
-
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4950bdbe3477..963ffde4af1a 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,7 +100,7 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * __kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_put - puts some data into the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(kfifo_free);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_put(struct kfifo *fifo,
+unsigned int kfifo_put(struct kfifo *fifo,
 			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -144,10 +144,10 @@ unsigned int __kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_put);
+EXPORT_SYMBOL(kfifo_put);
 
 /**
- * __kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_get - gets some data from the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(__kfifo_put);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_get(struct kfifo *fifo,
+unsigned int kfifo_get(struct kfifo *fifo,
 			 unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -190,4 +190,4 @@ unsigned int __kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_get);
+EXPORT_SYMBOL(kfifo_get);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index c6b50351aa78..9ef36849edd7 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(&dccpw.fifo) != 0);
+					 kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-- 
cgit v1.2.3


From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into
 kfifo_out...

rename kfifo_put...  into kfifo_in...  to prevent miss use of old non in
kernel-tree drivers

ditto for kfifo_get...  -> kfifo_out...

Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc
annotations more readable.

Add mini "howto porting to the new API" in kfifo.h

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  4 +--
 drivers/char/sonypi.c                       |  8 +++---
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 16 ++++++------
 drivers/media/video/meye.c                  | 16 ++++++------
 drivers/net/wireless/libertas/main.c        |  5 ++--
 drivers/platform/x86/fujitsu-laptop.c       |  4 +--
 drivers/platform/x86/sony-laptop.c          |  8 +++---
 drivers/scsi/libiscsi.c                     | 14 +++++------
 drivers/scsi/libiscsi_tcp.c                 | 18 ++++++-------
 drivers/scsi/libsrp.c                       |  6 ++---
 drivers/usb/host/fhci.h                     |  4 +--
 drivers/usb/serial/generic.c                |  4 +--
 include/linux/kfifo.h                       | 39 +++++++++++++++++++++--------
 kernel/kfifo.c                              | 32 +++++++++++------------
 net/dccp/probe.c                            |  4 +--
 15 files changed, 101 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 61f5bfe74f38..9ef243429014 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_out(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_in(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index dbcb3bd192c7..0798754a607c 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp), &sonypi_device.input_fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -828,7 +828,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sonypi_device.input_fifo,
+		kfifo_in_locked(&sonypi_device.input_fifo,
 			(unsigned char *)&kp, sizeof(kp),
 			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
@@ -882,7 +882,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_device.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -932,7 +932,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_device.fifo, &c, sizeof(c),
 				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index d7d18fb02c93..dcbf2606c438 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,22 +71,22 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get_locked(fifo, (unsigned char *) &entry,
+		kfifo_out_locked(fifo, (unsigned char *) &entry,
 				sizeof(u32), fifo_lock);
 	return 0;
 }
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_in(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -180,7 +180,7 @@ tpt_err:
 static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
+	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
@@ -190,7 +190,7 @@ static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
 		u32 entry)
 {
 	BUG_ON(
-	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
 	== 0);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 38bcedfd9fec..884a569d60a2 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
@@ -821,7 +821,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -832,7 +832,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
@@ -935,7 +935,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+	kfifo_in_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
 			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -968,7 +968,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
 				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
@@ -1456,7 +1456,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+	kfifo_in_locked(&meye.grabq, (unsigned char *)&buf->index,
 			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -1483,7 +1483,7 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_out_locked(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0622104f0a03..2bcfa745524a 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -513,7 +513,8 @@ static int lbs_thread(void *data)
 		spin_lock_irq(&priv->driver_lock);
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+
+			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1176,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_in(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 13dc7bedcfce..b66029bd75d0 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1006,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
+				status = kfifo_in_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode),
 						   &fujitsu_hotkey->fifo_lock);
@@ -1020,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get_locked(
+					kfifo_out_locked(
 					 &fujitsu_hotkey->fifo,
 					 (unsigned char *) &keycode_r,
 					 sizeof(keycode_r),
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 36e5dc6fc953..2896ca4cd9ab 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			sizeof(kp), &sony_laptop_input.fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -363,7 +363,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sony_laptop_input.fifo,
+		kfifo_in_locked(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp),
 			  &sony_laptop_input.fifo_lock);
 
@@ -2130,7 +2130,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_compat.fifo, &c, sizeof(c),
 			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
@@ -2310,7 +2310,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_compat.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5f0c46f43ee1..c28a712fd4db 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_in(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_out(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_out(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_in(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_out(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index c0be926637b1..d51ffeca2ec9 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_in(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_get(&tcp_task->r2tqueue,
+			kfifo_out(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 975e448cfcb9..8424b8606efb 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_in(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
@@ -163,7 +163,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+	kfifo_in_locked(&iue->target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 96aa787f208f..72dae1c5ab38 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -510,14 +510,14 @@ static inline unsigned int cq_howmany(struct kfifo *kfifo)
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_in(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_out(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d0a2e464cacd..b0f1183755c9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index a893acda3964..1b59c4a0e85f 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -19,6 +19,25 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
+
+/*
+ * Howto porting drivers to the new generic fifo API:
+ *
+ * - Modify the declaration of the "struct kfifo *" object into a
+ *   in-place "struct kfifo" object
+ * - Init the in-place object with kfifo_alloc() or kfifo_init()
+ *   Note: The address of the in-place "struct kfifo" object must be
+ *   passed as the first argument to this functions
+ * - Replace the use of __kfifo_put into kfifo_in and __kfifo_get
+ *   into kfifo_out
+ * - Replace the use of kfifo_put into kfifo_in_locked and kfifo_get
+ *   into kfifo_out_locked
+ *   Note: the spinlock pointer formerly passed to kfifo_init/kfifo_alloc
+ *   must be passed now to the kfifo_in_locked and kfifo_out_locked
+ *   as the last parameter.
+ * - All formerly name __kfifo_* functions has been renamed into kfifo_*
+ */
+
 #ifndef _LINUX_KFIFO_H
 #define _LINUX_KFIFO_H
 
@@ -37,10 +56,10 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len);
-extern unsigned int kfifo_get(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+				const unsigned char *from, unsigned int len);
+extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
+				unsigned char *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -65,7 +84,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 }
 
 /**
- * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
+ * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @from: the data to be added.
  * @n: the length of the data to be added.
@@ -75,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -83,7 +102,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_put(fifo, from, n);
+	ret = kfifo_in(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -91,7 +110,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 }
 
 /**
- * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
+ * kfifo_out_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @n: the size of the destination buffer.
@@ -100,7 +119,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
  * This function copies at most @len bytes from the FIFO into the
  * @to buffer and returns the number of copied bytes.
  */
-static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -108,7 +127,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_get(fifo, to, n);
+	ret = kfifo_out(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 963ffde4af1a..d659442e73f2 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,20 +100,20 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
+ * @from: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_put(struct kfifo *fifo,
-			const unsigned char *buffer, unsigned int len)
+unsigned int kfifo_in(struct kfifo *fifo,
+			const unsigned char *from, unsigned int len)
 {
 	unsigned int l;
 
@@ -128,10 +128,10 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	memcpy(fifo->buffer, buffer + l, len - l);
+	memcpy(fifo->buffer, from + l, len - l);
 
 	/*
 	 * Ensure that we add the bytes to the kfifo -before-
@@ -144,22 +144,22 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_put);
+EXPORT_SYMBOL(kfifo_in);
 
 /**
- * kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
+ * @to: where the data must be copied.
  * @len: the size of the destination buffer.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_get(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo,
+			 unsigned char *to, unsigned int len)
 {
 	unsigned int l;
 
@@ -174,10 +174,10 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	memcpy(buffer + l, fifo->buffer, len - l);
+	memcpy(to + l, fifo->buffer, len - l);
 
 	/*
 	 * Ensure that we remove the bytes from the kfifo -before-
@@ -190,4 +190,4 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_get);
+EXPORT_SYMBOL(kfifo_out);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ef36849edd7..a1362dc8abb0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
-- 
cgit v1.2.3


From 9842c38e917636fa7dc6b88aff17a8f1fd7f0cc0 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:29 -0800
Subject: kfifo: fix warn_unused_result

Fix the "ignoring return value of '...', declared with attribute
warn_unused_result" compiler warning in several users of the new kfifo
API.

It removes the __must_check attribute from kfifo_in() and
kfifo_in_locked() which must not necessary performed.

Fix the allocation bug in the nozomi driver file, by moving out the
kfifo_alloc from the interrupt handler into the probe function.

Fix the kfifo_out() and kfifo_out_locked() users to handle a unexpected
end of fifo.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 31 +++++++++++++++++++++++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c |  5 +++--
 drivers/media/video/meye.c                  |  5 +++--
 drivers/net/wireless/libertas/main.c        |  6 ++++--
 drivers/scsi/libiscsi_tcp.c                 |  9 +++++++--
 drivers/scsi/libsrp.c                       |  7 +++++--
 include/linux/kfifo.h                       |  4 ++--
 7 files changed, 51 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 9ef243429014..7d73cd430340 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -685,8 +685,6 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -1433,6 +1431,16 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 		goto err_free_sbuf;
 	}
 
+	for (i = PORT_MDM; i < MAX_PORT; i++) {
+		if (kfifo_alloc(&dc->port[i].fifo_ul,
+		      FIFO_BUFFER_SIZE_UL, GFP_ATOMIC)) {
+			dev_err(&pdev->dev,
+					"Could not allocate kfifo buffer\n");
+			ret = -ENOMEM;
+			goto err_free_kfifo;
+		}
+	}
+
 	spin_lock_init(&dc->spin_mutex);
 
 	nozomi_setup_private_data(dc);
@@ -1445,7 +1453,7 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 			NOZOMI_NAME, dc);
 	if (unlikely(ret)) {
 		dev_err(&pdev->dev, "can't request irq %d\n", pdev->irq);
-		goto err_free_sbuf;
+		goto err_free_kfifo;
 	}
 
 	DBG1("base_addr: %p", dc->base_addr);
@@ -1464,13 +1472,28 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 	dc->state = NOZOMI_STATE_ENABLED;
 
 	for (i = 0; i < MAX_PORT; i++) {
+		struct device *tty_dev;
+
 		mutex_init(&dc->port[i].tty_sem);
 		tty_port_init(&dc->port[i].port);
-		tty_register_device(ntty_driver, dc->index_start + i,
+		tty_dev = tty_register_device(ntty_driver, dc->index_start + i,
 							&pdev->dev);
+
+		if (IS_ERR(tty_dev)) {
+			ret = PTR_ERR(tty_dev);
+			dev_err(&pdev->dev, "Could not allocate tty?\n");
+			goto err_free_tty;
+		}
 	}
+
 	return 0;
 
+err_free_tty:
+	for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i)
+		tty_unregister_device(ntty_driver, i);
+err_free_kfifo:
+	for (i = 0; i < MAX_PORT; i++)
+		kfifo_free(&dc->port[i].fifo_ul);
 err_free_sbuf:
 	kfree(dc->send_buf);
 	iounmap(dc->base_addr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index dcbf2606c438..31f9201b2980 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -86,8 +86,9 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_out_locked(fifo, (unsigned char *) &entry,
-				sizeof(u32), fifo_lock);
+		if (kfifo_out_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock) != sizeof(u32))
+					break;
 	return 0;
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 884a569d60a2..b421858ccf90 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -968,8 +968,9 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
-				sizeof(int), &meye.doneq_lock);
+		if (kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock) != sizeof(int))
+					break;
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2bcfa745524a..c2975c8e2f21 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -514,8 +514,10 @@ static int lbs_thread(void *data)
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
 
-			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
-				sizeof(event));
+			if (kfifo_out(&priv->event_fifo,
+				(unsigned char *) &event, sizeof(event)) !=
+				sizeof(event))
+					break;
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
 			spin_lock_irq(&priv->driver_lock);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index d51ffeca2ec9..db6856c138fc 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -990,8 +990,13 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_out(&tcp_task->r2tqueue,
-				    (void *)&tcp_task->r2t, sizeof(void *));
+			if (kfifo_out(&tcp_task->r2tqueue,
+			    (void *)&tcp_task->r2t, sizeof(void *)) !=
+			    sizeof(void *)) {
+				WARN_ONCE(1, "unexpected fifo state");
+				r2t = NULL;
+			}
+
 			r2t = tcp_task->r2t;
 		}
 		spin_unlock_bh(&session->lock);
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 8424b8606efb..ab19b3b4be52 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -163,8 +163,11 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
-			sizeof(void *), &target->iu_queue.lock);
+	if (kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
+		sizeof(void *), &target->iu_queue.lock) != sizeof(void *)) {
+			WARN_ONCE(1, "unexpected fifo state");
+			return NULL;
+	}
 	if (!iue)
 		return iue;
 	iue->target = target;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 1b59c4a0e85f..5ed2565c89b6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -56,7 +56,7 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+extern unsigned int kfifo_in(struct kfifo *fifo,
 				const unsigned char *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				unsigned char *to, unsigned int len);
@@ -94,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
+static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
-- 
cgit v1.2.3


From 37bdfbbfaab47811fcec84dff23c4e8da1a09f9e Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:30 -0800
Subject: kfifo: add DEFINE_KFIFO and friends, add very tiny functions

Add DECLARE_KFIFO - macro to declare a kfifo and the associated buffer inside a struct
 Add INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
 Add DEFINE_KFIFO - macro to define and initialize a kfifo as a global or local object
 Add kfifo_size() - returns the size of the fifo in bytes
 Add kfifo_is_empty() - returns true if the fifo is empty
 Add kfifo_is_full() - returns true if the fifo is full
 Add kfifo_avail() - returns the number of bytes available in the FIFO
 Do some code cleanup

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 94 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 5ed2565c89b6..dd53eed3e2af 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -51,6 +51,60 @@ struct kfifo {
 	unsigned int out;	/* data is extracted from off. (out % size) */
 };
 
+/*
+ * Macros for declaration and initialization of the kfifo datatype
+ */
+
+/* helper macro */
+#define __kfifo_initializer(s, b) \
+	(struct kfifo) { \
+		.size	= s, \
+		.in	= 0, \
+		.out	= 0, \
+		.buffer = b \
+	}
+
+/**
+ * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used inside struct or union declaration
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DECLARE_KFIFO(name, size) \
+union { \
+	struct kfifo name; \
+	unsigned char name##kfifo_buffer[size + sizeof(struct kfifo)]; \
+}
+
+/**
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ */
+#define INIT_KFIFO(name) \
+	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
+				sizeof(struct kfifo), name##kfifo_buffer)
+
+/**
+ * DEFINE_KFIFO - macro to define and initialize a kfifo
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used for global and local kfifo data type variables
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DEFINE_KFIFO(name, size) \
+	unsigned char name##kfifo_buffer[size]; \
+	struct kfifo name = __kfifo_initializer(size, name##kfifo_buffer)
+
+#undef __kfifo_initializer
+
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
@@ -70,6 +124,15 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_size - returns the size of the fifo in bytes
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_size(struct kfifo *fifo)
+{
+	return fifo->size;
+}
+
 /**
  * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
@@ -83,6 +146,33 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 	return fifo->in - out;
 }
 
+/**
+ * kfifo_is_empty - returns true if the fifo is empty
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_empty(struct kfifo *fifo)
+{
+	return fifo->in == fifo->out;
+}
+
+/**
+ * kfifo_is_full - returns true if the fifo is full
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_full(struct kfifo *fifo)
+{
+	return kfifo_len(fifo) == kfifo_size(fifo);
+}
+
+/**
+ * kfifo_avail - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
+{
+	return kfifo_size(fifo) - kfifo_len(fifo);
+}
+
 /**
  * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
@@ -133,8 +223,8 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	 * optimization: if the FIFO is empty, set the indices to 0
 	 * so we don't wrap the next time
 	 */
-	if (fifo->in == fifo->out)
-		fifo->in = fifo->out = 0;
+	if (kfifo_is_empty(fifo))
+		kfifo_reset(fifo);
 
 	spin_unlock_irqrestore(lock, flags);
 
-- 
cgit v1.2.3


From a121f24accac1600bf5b6fb1e12eeabdfed7cb1a Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:31 -0800
Subject: kfifo: add kfifo_skip, kfifo_from_user and kfifo_to_user

Add kfifo_reset_out() for save lockless discard the fifo output
 Add kfifo_skip() to skip a number of output bytes
 Add kfifo_from_user() to copy user space data into the fifo
 Add kfifo_to_user() to copy fifo data to user space

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  47 +++++++++++++++++
 kernel/kfifo.c        | 139 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 170 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index dd53eed3e2af..d3230fb08bc7 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,6 +124,16 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_reset_out - skip FIFO contents
+ * @fifo: the fifo to be emptied.
+ */
+static inline void kfifo_reset_out(struct kfifo *fifo)
+{
+	smp_mb();
+	fifo->out = fifo->in;
+}
+
 /**
  * kfifo_size - returns the size of the fifo in bytes
  * @fifo: the fifo to be used.
@@ -231,4 +241,41 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	return ret;
 }
 
+extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
+
+extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n);
+
+extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n);
+
+/**
+ * __kfifo_add_out internal helper function for updating the out offset
+ */
+static inline void __kfifo_add_out(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_mb();
+	fifo->out += off;
+}
+
+/**
+ * __kfifo_add_in internal helper function for updating the in offset
+ */
+static inline void __kfifo_add_in(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_wmb();
+	fifo->in += off;
+}
+
+/**
+ * __kfifo_off internal helper function for calculating the index of a
+ * given offeset
+ */
+static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
+{
+	return off & (fifo->size - 1);
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index d659442e73f2..2a78425ef67f 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -26,6 +26,7 @@
 #include <linux/err.h>
 #include <linux/kfifo.h>
 #include <linux/log2.h>
+#include <linux/uaccess.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 		unsigned int size)
@@ -99,6 +100,21 @@ void kfifo_free(struct kfifo *fifo)
 }
 EXPORT_SYMBOL(kfifo_free);
 
+/**
+ * kfifo_skip - skip output data
+ * @fifo: the fifo to be used.
+ * @len: number of bytes to skip
+ */
+void kfifo_skip(struct kfifo *fifo, unsigned int len)
+{
+	if (len < kfifo_len(fifo)) {
+		__kfifo_add_out(fifo, len);
+		return;
+	}
+	kfifo_reset_out(fifo);
+}
+EXPORT_SYMBOL(kfifo_skip);
+
 /**
  * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
@@ -115,6 +131,7 @@ EXPORT_SYMBOL(kfifo_free);
 unsigned int kfifo_in(struct kfifo *fifo,
 			const unsigned char *from, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->size - fifo->in + fifo->out);
@@ -126,21 +143,16 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
+	off = __kfifo_off(fifo, fifo->in);
+
 	/* first put the data starting from fifo->in to buffer end */
-	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
+	l = min(len, fifo->size - off);
+	memcpy(fifo->buffer + off, from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
 
-	/*
-	 * Ensure that we add the bytes to the kfifo -before-
-	 * we update the fifo->in index.
-	 */
-
-	smp_wmb();
-
-	fifo->in += len;
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
@@ -161,6 +173,7 @@ EXPORT_SYMBOL(kfifo_in);
 unsigned int kfifo_out(struct kfifo *fifo,
 			 unsigned char *to, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->in - fifo->out);
@@ -172,22 +185,116 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
+	off = __kfifo_off(fifo, fifo->out);
+
 	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	l = min(len, fifo->size - off);
+	memcpy(to, fifo->buffer + off, l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
 
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
+
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
 	/*
-	 * Ensure that we remove the bytes from the kfifo -before-
-	 * we update the fifo->out index.
+	 * Ensure that we sample the fifo->out index -before- we
+	 * start putting bytes into the kfifo.
 	 */
 
 	smp_mb();
 
-	fifo->out += len;
+	off = __kfifo_off(fifo, fifo->in);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - off);
+	ret = copy_from_user(fifo->buffer + off, from, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
+EXPORT_SYMBOL(kfifo_from_user);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	ret = copy_to_user(to + l, fifo->buffer, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_to_user);
+
-- 
cgit v1.2.3


From 86d4880313603810901f639ccb5c88ff13d4ad3c Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:32 -0800
Subject: kfifo: add record handling functions

Add kfifo_in_rec() - puts some record data into the FIFO
 Add kfifo_out_rec() - gets some record data from the FIFO
 Add kfifo_from_user_rec() - puts some data from user space into the FIFO
 Add kfifo_to_user_rec() - gets data from the FIFO and write it to user space
 Add kfifo_peek_rec() - gets the size of the next FIFO record field
 Add kfifo_skip_rec() - skip the next fifo out record
 Add kfifo_avail_rec() - determinate the number of bytes available in a record FIFO

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kfifo.c        | 286 +++++++++++++++++++++++++++++--------------
 2 files changed, 523 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index d3230fb08bc7..486e8ad3bb50 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -278,4 +278,334 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
+/**
+ * __kfifo_peek_n internal helper function for determinate the length of
+ * the next record in the fifo
+ */
+static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
+				unsigned int recsize)
+{
+#define __KFIFO_GET(fifo, off, shift) \
+	((fifo)->buffer[__kfifo_off((fifo), (fifo)->out+(off))] << (shift))
+
+	unsigned int l;
+
+	l = __KFIFO_GET(fifo, 0, 0);
+
+	if (--recsize)
+		l |= __KFIFO_GET(fifo, 1, 8);
+
+	return l;
+#undef	__KFIFO_GET
+}
+
+/**
+ * __kfifo_poke_n internal helper function for storing the length of
+ * the next record into the fifo
+ */
+static inline void __kfifo_poke_n(struct kfifo *fifo,
+			unsigned int recsize, unsigned int n)
+{
+#define __KFIFO_PUT(fifo, off, val, shift) \
+		( \
+		(fifo)->buffer[__kfifo_off((fifo), (fifo)->in+(off))] = \
+		(unsigned char)((val) >> (shift)) \
+		)
+
+	__KFIFO_PUT(fifo, 0, n, 0);
+
+	if (--recsize)
+		__KFIFO_PUT(fifo, 1, n, 8);
+#undef	__KFIFO_PUT
+}
+
+/**
+ * __kfifo_in_... internal functions for put date into the fifo
+ * do not call it directly, use kfifo_in_rec() instead
+ */
+extern unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_in_rec(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_in_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_in_rec - puts some record data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the FIFO and returns
+ * the number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
+	void *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_in_generic(fifo, from, n, recsize);
+	return __kfifo_in_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_out_... internal functions for get date from the fifo
+ * do not call it directly, use kfifo_out_rec() instead
+ */
+extern unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int reclen, unsigned int recsize);
+
+extern unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int n,
+	unsigned int recsize, unsigned int *total);
+
+static inline unsigned int __kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_out_n(fifo, to, l, recsize);
+}
+
+/**
+ * kfifo_out_rec - gets some record data from the FIFO
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to @to and returns the
+ * number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_out_generic(fifo, to, n, recsize, total);
+	return __kfifo_out_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_from_user_... internal functions for transfer from user space into
+ * the fifo. do not call it directly, use kfifo_from_user_rec() instead
+ */
+extern unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_from_user_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_from_user_rec - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the
+ * FIFO and returns the number of bytes which cannot be copied.
+ *
+ * If the returned value is equal or less the @n value, the copy_from_user()
+ * functions has failed. Otherwise the record doesn't fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_from_user_generic(fifo, from, n, recsize);
+	return __kfifo_from_user_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_to_user_... internal functions for transfer fifo data into user space
+ * do not call it directly, use kfifo_to_user_rec() instead
+ */
+extern unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int reclen,
+	unsigned int recsize);
+
+extern unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int recsize,
+	unsigned int *total);
+
+static inline unsigned int __kfifo_to_user_rec(struct kfifo *fifo,
+	void __user *to, unsigned int n,
+	unsigned int recsize, unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_to_user_n(fifo, to, n, l, recsize);
+}
+
+/**
+ * kfifo_to_user_rec - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to the @to.
+ * In case of an error, the function returns the number of bytes which cannot
+ * be copied.
+ * If the returned value is equal or less the @n value, the copy_to_user()
+ * functions has failed. Otherwise the record doesn't fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
+		void __user *to, unsigned int n, unsigned int recsize,
+		unsigned int *total)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_to_user_generic(fifo, to, n, recsize, total);
+	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_peek_... internal functions for peek into the next fifo record
+ * do not call it directly, use kfifo_peek_rec() instead
+ */
+extern unsigned int __kfifo_peek_generic(struct kfifo *fifo,
+				unsigned int recsize);
+
+/**
+ * kfifo_peek_rec - gets the size of the next FIFO record data
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function returns the size of the next FIFO record in number of bytes
+ */
+static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_peek_generic(fifo, recsize);
+	if (!recsize)
+		return kfifo_len(fifo);
+	return __kfifo_peek_n(fifo, recsize);
+}
+
+/**
+ * __kfifo_skip_... internal functions for skip the next fifo record
+ * do not call it directly, use kfifo_skip_rec() instead
+ */
+extern void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize);
+
+static inline void __kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l;
+
+	if (recsize) {
+		l = __kfifo_peek_n(fifo, recsize);
+
+		if (l + recsize <= kfifo_len(fifo)) {
+			__kfifo_add_out(fifo, l + recsize);
+			return;
+		}
+	}
+	kfifo_reset_out(fifo);
+}
+
+/**
+ * kfifo_skip_rec - skip the next fifo out record
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function skips the next FIFO record
+ */
+static inline void kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		__kfifo_skip_generic(fifo, recsize);
+	else
+		__kfifo_skip_rec(fifo, recsize);
+}
+
+/**
+ * kfifo_avail_rec - returns the number of bytes available in a record FIFO
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ */
+static inline __must_check unsigned int kfifo_avail_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l = kfifo_size(fifo) - kfifo_len(fifo);
+
+	return (l > recsize) ? l - recsize : 0;
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 2a78425ef67f..e92d519f93b1 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -115,27 +115,11 @@ void kfifo_skip(struct kfifo *fifo, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_skip);
 
-/**
- * kfifo_in - puts some data into the FIFO
- * @fifo: the fifo to be used.
- * @from: the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from buffer into
- * the FIFO depending on the free space, and returns the number of
- * bytes copied.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_in(struct kfifo *fifo,
-			const unsigned char *from, unsigned int len)
+static inline void __kfifo_in_data(struct kfifo *fifo,
+		const void *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -143,7 +127,7 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
@@ -151,33 +135,13 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
-
-	__kfifo_add_in(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_in);
 
-/**
- * kfifo_out - gets some data from the FIFO
- * @fifo: the fifo to be used.
- * @to: where the data must be copied.
- * @len: the size of the destination buffer.
- *
- * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_out(struct kfifo *fifo,
-			 unsigned char *to, unsigned int len)
+static inline void __kfifo_out_data(struct kfifo *fifo,
+		void *to, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->in - fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->in index -before- we
 	 * start removing bytes from the kfifo.
@@ -185,7 +149,7 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
-	off = __kfifo_off(fifo, fifo->out);
+	off = __kfifo_off(fifo, fifo->out + off);
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
@@ -193,34 +157,14 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
-
-	__kfifo_add_out(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
 
-/**
- * kfifo_from_user - puts some data from user space into the FIFO
- * @fifo: the fifo to be used.
- * @from: pointer to the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 	int ret;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -228,29 +172,101 @@ unsigned int kfifo_from_user(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
 
 	if (unlikely(ret))
-		return l - ret;
+		return ret + len - l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	return copy_from_user(fifo->buffer, from + l, len - l);
+}
+
+static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off)
+{
+	unsigned int l;
+	int ret;
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out + off);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
 
 	if (unlikely(ret))
-		return len - ret;
+		return ret + len - l;
 
-	__kfifo_add_in(fifo, len);
+	/* then get the rest (if any) from the beginning of the buffer */
+	return copy_to_user(to + l, fifo->buffer, len - l);
+}
 
+unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
+
+	__kfifo_in_data(fifo, from, len, recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_in_n);
+
+/**
+ * kfifo_in - puts some data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+				unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+
+	__kfifo_in_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
 	return len;
 }
-EXPORT_SYMBOL(kfifo_from_user);
+EXPORT_SYMBOL(kfifo_in);
+
+unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_in_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_in_generic);
+
+unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_len(fifo) < len + recsize)
+		return len;
+
+	__kfifo_out_data(fifo, to, len, recsize);
+	__kfifo_add_out(fifo, len + recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_out_n);
 
 /**
- * kfifo_to_user - gets data from the FIFO and write it to user space
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -261,40 +277,124 @@ EXPORT_SYMBOL(kfifo_from_user);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
 {
-	unsigned int off;
-	unsigned int l;
-	int ret;
+	len = min(kfifo_len(fifo), len);
 
-	len = min(len, fifo->in - fifo->out);
+	__kfifo_out_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 
-	/*
-	 * Ensure that we sample the fifo->in index -before- we
-	 * start removing bytes from the kfifo.
-	 */
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
 
-	smp_rmb();
+unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_out_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_out_generic);
 
-	off = __kfifo_off(fifo, fifo->out);
+unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
 
-	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - off);
-	ret = copy_to_user(to, fifo->buffer + off, l);
+	return __kfifo_from_user_data(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_n);
 
-	if (unlikely(ret))
-		return l - ret;
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_from_user);
 
-	/* then get the rest (if any) from the beginning of the buffer */
-	ret = copy_to_user(to + l, fifo->buffer, len - l);
+unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_from_user_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_generic);
 
-	if (unlikely(ret))
-		return len - ret;
+unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int reclen,
+	unsigned int recsize)
+{
+	unsigned int ret;
 
-	__kfifo_add_out(fifo, len);
+	if (kfifo_len(fifo) < reclen + recsize)
+		return len;
 
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+
+	if (likely(ret == 0))
+		__kfifo_add_out(fifo, reclen + recsize);
+
+	return ret;
+}
+EXPORT_SYMBOL(__kfifo_to_user_n);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	len = min(kfifo_len(fifo), len);
+	len -= __kfifo_to_user_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 	return len;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
+unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_to_user_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_to_user_generic);
+
+unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	if (recsize == 0)
+		return kfifo_avail(fifo);
+
+	return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_peek_generic);
+
+void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	__kfifo_skip_rec(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_skip_generic);
+
-- 
cgit v1.2.3


From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 06:52:08 -0500
Subject: ext4, jbd2: Add barriers for file systems with exernal journals

This is a bit complicated because we are trying to optimize when we
send barriers to the fs data disk.  We could just throw in an extra
barrier to the data disk whenever we send a barrier to the journal
disk, but that's not always strictly necessary.

We only need to send a barrier during a commit when there are data
blocks which are must be written out due to an inode written in
ordered mode, or if fsync() depends on the commit to force data blocks
to disk.  Finally, before we drop transactions from the beginning of
the journal during a checkpoint operation, we need to guarantee that
any blocks that were flushed out to the data disk are firmly on the
rust platter before we drop the transaction from the journal.

Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c      | 16 ++++++++++++++--
 fs/jbd2/checkpoint.c | 15 +++++++++++++++
 fs/jbd2/commit.c     | 19 +++++++++++--------
 include/linux/jbd2.h |  1 +
 4 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		return ext4_force_commit(inode->i_sb);
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-	if (jbd2_log_start_commit(journal, commit_tid))
+	if (jbd2_log_start_commit(journal, commit_tid)) {
+		/*
+		 * When the journal is on a different device than the
+		 * fs data disk, we need to issue the barrier in
+		 * writeback mode.  (In ordered mode, the jbd2 layer
+		 * will take care of issuing the barrier.  In
+		 * data=journal, all of the data blocks are written to
+		 * the journal device.)
+		 */
+		if (ext4_should_writeback_data(inode) &&
+		    (journal->j_fs_dev != journal->j_dev) &&
+		    (journal->j_flags & JBD2_BARRIER))
+			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 		jbd2_log_wait_commit(journal, commit_tid);
-	else if (journal->j_flags & JBD2_BARRIER)
+	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 
 /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * If there is an external journal, we need to make sure that
+	 * any data blocks that were recently written out --- perhaps
+	 * by jbd2_log_do_checkpoint() --- are flushed out before we
+	 * drop the transactions from the external journal.  It's
+	 * unlikely this will be necessary, especially with a
+	 * appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
+	 * doesn't get called all that often.
+	 */
+	if ((journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
+		commit_transaction->t_flushed_data_blocks = 1;
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -708,8 +709,17 @@ start_journal_io:
 		}
 	}
 
-	/* Done it all: now write the commit record asynchronously. */
+	/* 
+	 * If the journal is not located on the file system device,
+	 * then we must flush the file system device before we issue
+	 * the commit record
+	 */
+	if (commit_transaction->t_flushed_data_blocks &&
+	    (journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 
+	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
 			blkdev_issue_flush(journal->j_dev, NULL);
 	}
 
-	/*
-	 * This is the right place to wait for data buffers both for ASYNC
-	 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-	 * the commit block went to disk (which happens above). If commit is
-	 * SYNC, we need to wait for data buffers before we start writing
-	 * commit block, which happens below in such setting.
-	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
 	if (err) {
 		printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
+	unsigned int t_flushed_data_blocks:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From c459001fa4f71deafb62e00fa70d35f695498965 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Wed, 9 Dec 2009 03:05:30 +0300
Subject: ext3: quota macros cleanup [V2]

Currently all quota block reservation macros contains hardcoded "2"
aka MAXQUOTAS value. This is no good because in some places it is not
obvious to understand what does this digit represent. Let's introduce
new macro with self descriptive name.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c          | 8 ++++----
 fs/ext3/namei.c          | 8 ++++----
 include/linux/ext3_jbd.h | 7 +++++--
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509e..455e6e6e5cb9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 		if (max_blocks > DIO_MAX_BLOCKS)
 			max_blocks = DIO_MAX_BLOCKS;
 		handle = ext3_journal_start(inode, DIO_CREDITS +
-				2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+				EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
-					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+					EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during vfs_dq_init so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b77..81f7348b2de3 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2175,7 +2175,7 @@ static int ext3_symlink (struct inode * dir,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index cf82d519be40..d7b5ddca99c2 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -44,13 +44,13 @@
 
 #define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
 					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+					 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
 
-#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+#define EXT3_DELETE_TRANS_BLOCKS(sb)   (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
 
 /* Define an arbitrary limit for the amount of data we will anticipate
  * writing to any given transaction.  For unbounded transactions such as
@@ -86,6 +86,9 @@
 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0
 #endif
+#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
 
 int
 ext3_mark_iloc_dirty(handle_t *handle,
-- 
cgit v1.2.3


From b462707e7ccad058ae151e5c5b06eb5cadcb737f Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:12 +0300
Subject: Add unlocked version of inode_add_bytes() function

Quota code requires unlocked version of this function. Off course
we can just copy-paste the code, but copy-pasting is always an evil.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/stat.c          | 10 ++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8b..c4ecd52c5737 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 */
 
-void inode_add_bytes(struct inode *inode, loff_t bytes)
+/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
+void __inode_add_bytes(struct inode *inode, loff_t bytes)
 {
-	spin_lock(&inode->i_lock);
 	inode->i_blocks += bytes >> 9;
 	bytes &= 511;
 	inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
 		inode->i_blocks++;
 		inode->i_bytes -= 512;
 	}
+}
+
+void inode_add_bytes(struct inode *inode, loff_t bytes)
+{
+	spin_lock(&inode->i_lock);
+	__inode_add_bytes(inode, bytes);
 	spin_unlock(&inode->i_lock);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e3012e0ac06..9147ca88f253 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2297,6 +2297,7 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
-- 
cgit v1.2.3


From fd8fbfc1709822bd94247c5b2ab15a5f5041e103 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:13 +0300
Subject: quota: decouple fs reserved space from quota reservation

Currently inode_reservation is managed by fs itself and this
reservation is transfered on dquot_transfer(). This means what
inode_reservation must always be in sync with
dquot->dq_dqb.dqb_rsvspace. Otherwise dquot_transfer() will result
in incorrect quota(WARN_ON in dquot_claim_reserved_space() will be
triggered)
This is not easy because of complex locking order issues
for example http://bugzilla.kernel.org/show_bug.cgi?id=14739

The patch introduce quota reservation field for each fs-inode
(fs specific inode is used in order to prevent bloating generic
vfs inode). This reservation is managed by quota code internally
similar to i_blocks/i_bytes and may not be always in sync with
internal fs reservation.

Also perform some code rearrangement:
- Unify dquot_reserve_space() and dquot_reserve_space()
- Unify dquot_release_reserved_space() and dquot_free_space()
- Also this patch add missing warning update to release_rsv()
  dquot_release_reserved_space() must call flush_warnings() as
  dquot_free_space() does.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 213 ++++++++++++++++++++++++++++----------------------
 include/linux/quota.h |   5 +-
 2 files changed, 122 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c13..1cb8fa84300f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1318,6 +1318,67 @@ void vfs_dq_drop(struct inode *inode)
 }
 EXPORT_SYMBOL(vfs_dq_drop);
 
+/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+	/* Filesystem must explicitly define it's own method in order to use
+	 * quota reservation interface */
+	BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+	return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) += number;
+	spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	__inode_add_bytes(inode, number);
+	spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+	qsize_t ret;
+	spin_lock(&inode->i_lock);
+	ret = *inode_reserved_space(inode);
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+				int reserve)
+{
+	if (reserve)
+		inode_add_rsv_space(inode, number);
+	else
+		inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+	if (reserve)
+		inode_sub_rsv_space(inode, number);
+	else
+		inode_sub_bytes(inode, number);
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1336,6 +1397,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
 
+	/*
+	 * First test before acquiring mutex - solves deadlocks when we
+	 * re-enter the quota code and are already holding the mutex
+	 */
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out;
+	}
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out_unlock;
+	}
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
@@ -1346,7 +1422,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
 			ret = NO_QUOTA;
-			goto out_unlock;
+			spin_unlock(&dq_data_lock);
+			goto out_flush_warn;
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1434,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		else
 			dquot_incr_space(inode->i_dquot[cnt], number);
 	}
-	if (!reserve)
-		inode_add_bytes(inode, number);
-out_unlock:
+	inode_incr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
-	flush_warnings(inode->i_dquot, warntype);
-	return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	int cnt, ret = QUOTA_OK;
-
-	/*
-	 * First test before acquiring mutex - solves deadlocks when we
-	 * re-enter the quota code and are already holding the mutex
-	 */
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out;
-	}
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out_unlock;
-	}
-
-	ret = __dquot_alloc_space(inode, number, warn, 0);
-	if (ret == NO_QUOTA)
-		goto out_unlock;
 
+	if (reserve)
+		goto out_flush_warn;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_flush_warn:
+	flush_warnings(inode->i_dquot, warntype);
 out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
 }
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+	return __dquot_alloc_space(inode, number, warn, 0);
+}
 EXPORT_SYMBOL(dquot_alloc_space);
 
 int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
 {
-	int ret = QUOTA_OK;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return __dquot_alloc_space(inode, number, warn, 1);
 }
 EXPORT_SYMBOL(dquot_reserve_space);
 
@@ -1471,14 +1516,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	int ret = QUOTA_OK;
 
 	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode))	{
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
@@ -1490,7 +1535,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 							number);
 	}
 	/* Update inode bytes */
-	inode_add_bytes(inode, number);
+	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1502,39 +1547,10 @@ out:
 }
 EXPORT_SYMBOL(dquot_claim_space);
 
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	int cnt;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	spin_lock(&dq_data_lock);
-	/* Release reserved dquots */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_free_reserved_space(inode->i_dquot[cnt], number);
-	}
-	spin_unlock(&dq_data_lock);
-
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
@@ -1543,7 +1559,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_sub:
-		inode_sub_bytes(inode, number);
+		inode_decr_space(inode, number, reserve);
 		return QUOTA_OK;
 	}
 
@@ -1558,20 +1574,42 @@ out_sub:
 		if (!inode->i_dquot[cnt])
 			continue;
 		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
-		dquot_decr_space(inode->i_dquot[cnt], number);
+		if (reserve)
+			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+		else
+			dquot_decr_space(inode->i_dquot[cnt], number);
 	}
-	inode_sub_bytes(inode, number);
+	inode_decr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
+
+	if (reserve)
+		goto out_unlock;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+	return  __dquot_free_space(inode, number, 0);
+}
 EXPORT_SYMBOL(dquot_free_space);
 
+/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+	__dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
 /*
  * This operation can block, but only after everything is updated
  */
@@ -1609,19 +1647,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
-/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
-	qsize_t reserved_space = 0;
-
-	if (sb_any_quota_active(inode->i_sb) &&
-	    inode->i_sb->dq_op->get_reserved_space)
-		reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
-	return reserved_space;
-}
-
 /*
  * Transfer the number of inode and blocks from one diskquota to an other.
  *
@@ -1665,7 +1690,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
-	rsv_space = dquot_get_reserved_space(inode);
+	rsv_space = inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e70e62194243..a6861f117480 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -315,8 +315,9 @@ struct dquot_operations {
 	int (*claim_space) (struct inode *, qsize_t);
 	/* release rsved quota for delayed alloc */
 	void (*release_rsv) (struct inode *, qsize_t);
-	/* get reserved quota for delayed alloc */
-	qsize_t (*get_reserved_space) (struct inode *);
+	/* get reserved quota for delayed alloc, value returned is managed by
+	 * quota code only */
+	qsize_t *(*get_reserved_space) (struct inode *);
 };
 
 /* Operations handling requests from userspace */
-- 
cgit v1.2.3


From b8a052d01669977f224255b0f9f2737018171ddb Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:00:30 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for the
 orphan list

Use a separate lock to protect the orphan list, so we can stop
overloading the use of lock_super().

Port of ext4 commit 3b9d4ed26680771295d904a6b83e88e620780893
by Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/namei.c            | 20 +++++++++++---------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 81f7348b2de3..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0, rc;
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT3_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
-	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * here (on s_orphan_lock), so race with ext3_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
 	ext3_std_error(inode->i_sb, err);
 	return err;
 }
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
 	ext3_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 806b8b780add..97dd3828384c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sb->dq_op = &ext3_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index f07f34de2f0e..dd61d83026a0 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -72,6 +72,7 @@ struct ext3_sb_info {
 	struct inode * s_journal_inode;
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 96d2a495c25d525873529b736cdb63ad502b101c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:01:05 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for resizing

Use a separate lock to protect s_groups_count and the other block
group descriptors which get changed via an on-line resize operation,
so we can stop overloading the use of lock_super().

Port of ext4 commit 32ed5058ce90024efcd811254b4b1de0468099df by
Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/resize.c           | 35 ++++++++++++++++++-----------------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b6179178..54351ac7cef9 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -662,11 +662,12 @@ exit_free:
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	/*
 	 * OK, now we've set up the new group.  Time to make it active.
 	 *
-	 * Current kernels don't lock all allocations via lock_super(),
+	 * We do not lock all allocations via s_resize_lock
 	 * so we have to be safe wrt. concurrent accesses the group
 	 * data.  So we need to be careful to set all of the relevant
 	 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
 	o_groups_count = EXT3_SB(sb)->s_groups_count;
 
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_resize_lock);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 						 EXT3_SB(sb)->s_sbh))) {
 		ext3_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 97dd3828384c..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1929,6 +1929,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index dd61d83026a0..258088ab3c6b 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -73,6 +73,7 @@ struct ext3_sb_info {
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Dec 2009 07:57:07 -0500
Subject: fs-writeback: Add helper function to start writeback if idle

ext4, at least, would like to start pushing on writeback if it starts
to get close to ENOSPC when reserving worst-case blocks for delalloc
writes.  Writing out delalloc data will convert those worst-case
predictions into usually smaller actual usage, freeing up space
before we hit ENOSPC based on this speculation.

Thanks to Jens for the suggestion for the helper function,
& the naming help.

I've made the helper return status on whether writeback was
started even though I don't plan to use it in the ext4 patch;
it seems like it would be potentially useful to test this
in some cases.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 17 +++++++++++++++++
 include/linux/writeback.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		writeback_inodes_sb(sb);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-- 
cgit v1.2.3


From 119eecc831a42bd090543568932e440c6831f1bb Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Wed, 23 Dec 2009 09:10:48 +0100
Subject: Fix usb_serial_probe() problem introduced by the recent kfifo changes

The USB serial code was a new user of the kfifo API, and it was missed
when porting things to the new kfifo API.

Please make the write_fifo in place.  Here is my patch to fix the
regression and full ported version.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Greg KH <greg@kroah.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/generic.c    | 12 ++++++------
 drivers/usb/serial/usb-serial.c |  5 ++---
 include/linux/usb/serial.h      |  3 ++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index b0f1183755c9..f1ea3a33b6e6 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(&port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(&port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - kfifo_len(port->write_fifo);
+		room = kfifo_avail(&port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -391,7 +391,7 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
 	} else if (serial->num_bulk_out)
-		chars = kfifo_len(port->write_fifo);
+		chars = kfifo_len(&port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -507,7 +507,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (status) {
 			dbg("%s - nonzero multi-urb write bulk status "
 				"received: %d", __func__, status);
-			kfifo_reset(port->write_fifo);
+			kfifo_reset_out(&port->write_fifo);
 		} else
 			usb_serial_generic_write_start(port);
 	}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 636a4f23445e..33c85f7084f8 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -595,8 +595,7 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
-	if (!IS_ERR(port->write_fifo) && port->write_fifo)
-		kfifo_free(port->write_fifo);
+	kfifo_free(&port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -939,7 +938,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index acf6e457c04b..1819396ed501 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -16,6 +16,7 @@
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/sysrq.h>
+#include <linux/kfifo.h>
 
 #define SERIAL_TTY_MAJOR	188	/* Nice legal number now */
 #define SERIAL_TTY_MINORS	254	/* loads of devices :) */
@@ -94,7 +95,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
-	struct kfifo		*write_fifo;
+	struct kfifo		write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v1.2.3


From 9c717de946ed7f5782e6dffacf2d05859073058c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 23 Dec 2009 09:23:33 -0800
Subject: kfifo: fix Error/broken kernel-doc notation

Fix kernel-doc errors and warnings in new header file kfifo.h.
Don't use kernel-doc "/**" for internal functions whose comments
are not in kernel-doc format.

kernel-doc section header names (like "Note:") must be unique
per function.  Looks like I need to document that.

  Error(include/linux/kfifo.h:76): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:88): Excess function parameter 'size' description in 'INIT_KFIFO'
  Error(include/linux/kfifo.h:101): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:257): No description found for parameter 'fifo'
    (many of this last type, from internal functions)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 486e8ad3bb50..3d44e9c65a8e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -69,8 +69,8 @@ struct kfifo {
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used inside struct or union declaration
- * Note: the macro creates two objects:
+ * Note1: the macro can be used inside struct or union declaration
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -83,7 +83,6 @@ union { \
 /**
  * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
  */
 #define INIT_KFIFO(name) \
 	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
@@ -94,8 +93,8 @@ union { \
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used for global and local kfifo data type variables
- * Note: the macro creates two objects:
+ * Note1: the macro can be used for global and local kfifo data type variables
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -249,7 +248,7 @@ extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
 	void __user *to, unsigned int n);
 
-/**
+/*
  * __kfifo_add_out internal helper function for updating the out offset
  */
 static inline void __kfifo_add_out(struct kfifo *fifo,
@@ -259,7 +258,7 @@ static inline void __kfifo_add_out(struct kfifo *fifo,
 	fifo->out += off;
 }
 
-/**
+/*
  * __kfifo_add_in internal helper function for updating the in offset
  */
 static inline void __kfifo_add_in(struct kfifo *fifo,
@@ -269,7 +268,7 @@ static inline void __kfifo_add_in(struct kfifo *fifo,
 	fifo->in += off;
 }
 
-/**
+/*
  * __kfifo_off internal helper function for calculating the index of a
  * given offeset
  */
@@ -278,7 +277,7 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
-/**
+/*
  * __kfifo_peek_n internal helper function for determinate the length of
  * the next record in the fifo
  */
@@ -299,7 +298,7 @@ static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
 #undef	__KFIFO_GET
 }
 
-/**
+/*
  * __kfifo_poke_n internal helper function for storing the length of
  * the next record into the fifo
  */
@@ -319,7 +318,7 @@ static inline void __kfifo_poke_n(struct kfifo *fifo,
 #undef	__KFIFO_PUT
 }
 
-/**
+/*
  * __kfifo_in_... internal functions for put date into the fifo
  * do not call it directly, use kfifo_in_rec() instead
  */
@@ -367,7 +366,7 @@ static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
 	return __kfifo_in_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_out_... internal functions for get date from the fifo
  * do not call it directly, use kfifo_out_rec() instead
  */
@@ -425,7 +424,7 @@ static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
 	return __kfifo_out_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_from_user_... internal functions for transfer from user space into
  * the fifo. do not call it directly, use kfifo_from_user_rec() instead
  */
@@ -474,7 +473,7 @@ static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
 	return __kfifo_from_user_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_to_user_... internal functions for transfer fifo data into user space
  * do not call it directly, use kfifo_to_user_rec() instead
  */
@@ -533,7 +532,7 @@ static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
 	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_peek_... internal functions for peek into the next fifo record
  * do not call it directly, use kfifo_peek_rec() instead
  */
@@ -557,7 +556,7 @@ static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
 	return __kfifo_peek_n(fifo, recsize);
 }
 
-/**
+/*
  * __kfifo_skip_... internal functions for skip the next fifo record
  * do not call it directly, use kfifo_skip_rec() instead
  */
-- 
cgit v1.2.3


From 26579ab70aa0e0ea434e6e100279d2f67c094431 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:19 +0200
Subject: Driver core: device_attribute parameters can often be const*

Most device_attributes are const, and are begging to be
put in a ro section. However, the create and remove
file interfaces were failing to propagate the const promise
which the only functions they call offer.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/sysfs.txt | 8 ++++----
 drivers/base/core.c                 | 6 ++++--
 include/linux/device.h              | 4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index b245d524d568..a4ac2b98c613 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -91,8 +91,8 @@ struct device_attribute {
 			 const char *buf, size_t count);
 };
 
-int device_create_file(struct device *, struct device_attribute *);
-void device_remove_file(struct device *, struct device_attribute *);
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
 
 It also defines this helper for defining device attributes: 
 
@@ -316,8 +316,8 @@ DEVICE_ATTR(_name, _mode, _show, _store);
 
 Creation/Removal:
 
-int device_create_file(struct device *device, struct device_attribute * attr);
-void device_remove_file(struct device * dev, struct device_attribute * attr);
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
 
 
 - bus drivers (include/linux/device.h)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f1290cbd1350..2fd9e611f8a6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -446,7 +446,8 @@ struct kset *devices_kset;
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-int device_create_file(struct device *dev, struct device_attribute *attr)
+int device_create_file(struct device *dev,
+		       const struct device_attribute *attr)
 {
 	int error = 0;
 	if (dev)
@@ -459,7 +460,8 @@ int device_create_file(struct device *dev, struct device_attribute *attr)
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-void device_remove_file(struct device *dev, struct device_attribute *attr)
+void device_remove_file(struct device *dev,
+			const struct device_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_file(&dev->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 2a73d9bcbc9c..aa5b3e66a147 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,9 +319,9 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   struct device_attribute *entry);
+					   const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
-			       struct device_attribute *attr);
+			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-- 
cgit v1.2.3


From 66ecb92be9eb579df93add22d19843e7869f168e Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:20 +0200
Subject: Driver core: bin_attribute parameters can often be const*

Many struct bin_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore
make the promise not to, which will let those descriptors
be put in a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 6 ++++--
 fs/sysfs/bin.c         | 6 ++++--
 include/linux/device.h | 6 +++---
 include/linux/sysfs.h  | 9 +++++----
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2fd9e611f8a6..83afc8b8f27b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -472,7 +472,8 @@ void device_remove_file(struct device *dev,
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-int device_create_bin_file(struct device *dev, struct bin_attribute *attr)
+int device_create_bin_file(struct device *dev,
+			   const struct bin_attribute *attr)
 {
 	int error = -EINVAL;
 	if (dev)
@@ -486,7 +487,8 @@ EXPORT_SYMBOL_GPL(device_create_bin_file);
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
+void device_remove_bin_file(struct device *dev,
+			    const struct bin_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_bin_file(&dev->kobj, attr);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10ae..a0a500af24a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
  *	@attr:	attribute descriptor.
  */
 
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+int sysfs_create_bin_file(struct kobject *kobj,
+			  const struct bin_attribute *attr)
 {
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
  *	@attr:	attribute descriptor.
  */
 
-void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr)
 {
 	sysfs_hash_and_remove(kobj->sd, attr->attr.name);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index aa5b3e66a147..10d74ce93a46 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,13 +319,13 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   const struct device_attribute *entry);
+					const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
 			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
-					       struct bin_attribute *attr);
+					const struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-				   struct bin_attribute *attr);
+				   const struct bin_attribute *attr);
 extern int device_schedule_callback_owner(struct device *dev,
 		void (*func)(struct device *dev), struct module *owner);
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9d68fed50f11..cfa83083a2d4 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -99,8 +99,9 @@ int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
-				       struct bin_attribute *attr);
-void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
+				       const struct bin_attribute *attr);
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr);
 
 int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target,
 				   const char *name);
@@ -175,13 +176,13 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 }
 
 static inline int sysfs_create_bin_file(struct kobject *kobj,
-					struct bin_attribute *attr)
+					const struct bin_attribute *attr)
 {
 	return 0;
 }
 
 static inline void sysfs_remove_bin_file(struct kobject *kobj,
-					 struct bin_attribute *attr)
+					 const struct bin_attribute *attr)
 {
 }
 
-- 
cgit v1.2.3


From 099c2f21d8cf0724b85abb2c589d6276953781b7 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:21 +0200
Subject: Driver core: driver_attribute parameters can often be const*

Many struct driver_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore make
the promise not to, which will let those descriptors be put in
a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/driver-model/driver.txt | 4 ++--
 Documentation/filesystems/sysfs.txt   | 4 ++--
 drivers/base/driver.c                 | 4 ++--
 include/linux/device.h                | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
index 60120fb3b961..d2cd6fb8ba9e 100644
--- a/Documentation/driver-model/driver.txt
+++ b/Documentation/driver-model/driver.txt
@@ -226,5 +226,5 @@ struct driver_attribute driver_attr_debug;
 This can then be used to add and remove the attribute from the
 driver's directory using:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index a4ac2b98c613..931c806642c5 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -358,7 +358,7 @@ DRIVER_ATTR(_name, _mode, _show, _store)
 
 Creation/Removal:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
 
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index f367885a7646..90c9fff09ead 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(driver_find_device);
  * @attr: driver attribute descriptor.
  */
 int driver_create_file(struct device_driver *drv,
-		       struct driver_attribute *attr)
+		       const struct driver_attribute *attr)
 {
 	int error;
 	if (drv)
@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_create_file);
  * @attr: driver attribute descriptor.
  */
 void driver_remove_file(struct device_driver *drv,
-			struct driver_attribute *attr)
+			const struct driver_attribute *attr)
 {
 	if (drv)
 		sysfs_remove_file(&drv->p->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 10d74ce93a46..a62799f2ab00 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -166,9 +166,9 @@ struct driver_attribute driver_attr_##_name =		\
 	__ATTR(_name, _mode, _show, _store)
 
 extern int __must_check driver_create_file(struct device_driver *driver,
-					   struct driver_attribute *attr);
+					const struct driver_attribute *attr);
 extern void driver_remove_file(struct device_driver *driver,
-			       struct driver_attribute *attr);
+			       const struct driver_attribute *attr);
 
 extern int __must_check driver_add_kobj(struct device_driver *drv,
 					struct kobject *kobj,
-- 
cgit v1.2.3


From 29d249ed80c80b479df78e456e6809223c648505 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 18 Dec 2009 09:59:48 -0800
Subject: Staging: dst: remove from the tree

DST is dead, no one is using it and upstream
has abandoned it, so remove it from the tree because
it is not going anywhere.

Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/Kconfig           |   2 -
 drivers/staging/Makefile          |   1 -
 drivers/staging/dst/Kconfig       |  67 ---
 drivers/staging/dst/Makefile      |   3 -
 drivers/staging/dst/crypto.c      | 733 -----------------------------
 drivers/staging/dst/dcore.c       | 968 --------------------------------------
 drivers/staging/dst/export.c      | 660 --------------------------
 drivers/staging/dst/state.c       | 844 ---------------------------------
 drivers/staging/dst/thread_pool.c | 348 --------------
 drivers/staging/dst/trans.c       | 337 -------------
 include/linux/dst.h               | 587 -----------------------
 11 files changed, 4550 deletions(-)
 delete mode 100644 drivers/staging/dst/Kconfig
 delete mode 100644 drivers/staging/dst/Makefile
 delete mode 100644 drivers/staging/dst/crypto.c
 delete mode 100644 drivers/staging/dst/dcore.c
 delete mode 100644 drivers/staging/dst/export.c
 delete mode 100644 drivers/staging/dst/state.c
 delete mode 100644 drivers/staging/dst/thread_pool.c
 delete mode 100644 drivers/staging/dst/trans.c
 delete mode 100644 include/linux/dst.h

(limited to 'include/linux')

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index db0de940949e..94eb86319ff3 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -87,8 +87,6 @@ source "drivers/staging/frontier/Kconfig"
 
 source "drivers/staging/dream/Kconfig"
 
-source "drivers/staging/dst/Kconfig"
-
 source "drivers/staging/pohmelfs/Kconfig"
 
 source "drivers/staging/b3dfg/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 73c6a71155e0..b5e67b889f60 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_RTL8192E)		+= rtl8192e/
 obj-$(CONFIG_INPUT_MIMIO)	+= mimio/
 obj-$(CONFIG_TRANZPORT)		+= frontier/
 obj-$(CONFIG_DREAM)		+= dream/
-obj-$(CONFIG_DST)		+= dst/
 obj-$(CONFIG_POHMELFS)		+= pohmelfs/
 obj-$(CONFIG_B3DFG)		+= b3dfg/
 obj-$(CONFIG_IDE_PHISON)	+= phison/
diff --git a/drivers/staging/dst/Kconfig b/drivers/staging/dst/Kconfig
deleted file mode 100644
index 448d342ac2a2..000000000000
--- a/drivers/staging/dst/Kconfig
+++ /dev/null
@@ -1,67 +0,0 @@
-config DST
-	tristate "Distributed storage"
-	depends on NET && CRYPTO && SYSFS && BLK_DEV
-	select CONNECTOR
-	---help---
-	DST is a network block device storage, which can be used to organize
-	exported storage on the remote nodes into the local block device.
-
-	DST works on top of any network media and protocol; it is just a matter
-	of configuration utility to understand the correct addresses. The most
-	common example is TCP over IP, which allows to pass through firewalls and
-	create remote backup storage in a different datacenter. DST requires
-	single port to be enabled on the exporting node and outgoing connections
-	on the local node.
-
-	DST works with in-kernel client and server, which improves performance by
-	eliminating unneded data copies and by not depending on the version
-	of the external IO components. It requires userspace configuration utility
-	though.
-
-	DST uses transaction model, when each store has to be explicitly acked
-	from the remote node to be considered as successfully written. There
-	may be lots of in-flight transactions. When remote host does not ack
-	the transaction it will be resent predefined number of times with specified
-	timeouts between them. All those parameters are configurable. Transactions
-	are marked as failed after all resends complete unsuccessfully; having
-	long enough resend timeout and/or large number of resends allows not to
-	return error to the higher (FS usually) layer in case of short network
-	problems or remote node outages. In case of network RAID setup this means
-	that storage will not degrade until transactions are marked as failed, and
-	thus will not force checksum recalculation and data rebuild. In case of
-	connection failure DST will try to reconnect to the remote node automatically.
-	DST sends ping commands at idle time to detect if remote node is alive.
-
-	Because of transactional model it is possible to use zero-copy sending
-	without worry of data corruption (which in turn could be detected by the
-	strong checksums though).
-
-	DST may fully encrypt the data channel in case of untrusted channel and implement
-	strong checksum of the transferred data. It is possible to configure algorithms
-	and crypto keys; they should match on both sides of the network channel.
-	Crypto processing does not introduce noticeble performance overhead, since DST
-	uses configurable pool of threads to perform crypto processing.
-
-	DST utilizes memory pool model of all its transaction allocations (it is the
-	only additional allocation on the client) and server allocations (bio pools,
-	while pages are allocated from the slab).
-
-	At startup DST performs a simple negotiation with the export node to determine
-	access permissions and size of the exported storage. It can be extended if
-	new parameters should be autonegotiated.
-
-	DST carries block IO flags in the protocol, which allows to transparently implement
-	barriers and sync/flush operations. Those flags are used in the export node where
-	IO against the local storage is performed, which means that sync write will be sync
-	on the remote node too, which in turn improves data integrity and improved resistance
-	to errors and data corruption during power outages or storage damages.
-
-	Homepage: http://www.ioremap.net/projects/dst
-	Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/
-
-config DST_DEBUG
-	bool "DST debug"
-	depends on DST
-	---help---
-	This option will enable HEAVY debugging of the DST.
-	Turn it on ONLY if you have to debug some really obscure problem.
diff --git a/drivers/staging/dst/Makefile b/drivers/staging/dst/Makefile
deleted file mode 100644
index 3a8b0cf9643e..000000000000
--- a/drivers/staging/dst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_DST) += nst.o
-
-nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o
diff --git a/drivers/staging/dst/crypto.c b/drivers/staging/dst/crypto.c
deleted file mode 100644
index 351295c97a4b..000000000000
--- a/drivers/staging/dst/crypto.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/crypto.h>
-#include <linux/dst.h>
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-/*
- * Tricky bastard, but IV can be more complex with time...
- */
-static inline u64 dst_gen_iv(struct dst_trans *t)
-{
-	return t->gen;
-}
-
-/*
- * Crypto machinery: hash/cipher support for the given crypto controls.
- */
-static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key)
-{
-	int err;
-	struct crypto_hash *hash;
-
-	hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hash)) {
-		err = PTR_ERR(hash);
-		dprintk("%s: failed to allocate hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_exit;
-	}
-
-	ctl->crypto_attached_size = crypto_hash_digestsize(hash);
-
-	if (!ctl->hash_keysize)
-		return hash;
-
-	err = crypto_hash_setkey(hash, key, ctl->hash_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_free;
-	}
-
-	return hash;
-
-err_out_free:
-	crypto_free_hash(hash);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl,
-		u8 *key)
-{
-	int err = -EINVAL;
-	struct crypto_ablkcipher *cipher;
-
-	if (!ctl->cipher_keysize)
-		goto err_out_exit;
-
-	cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0);
-	if (IS_ERR(cipher)) {
-		err = PTR_ERR(cipher);
-		dprintk("%s: failed to allocate cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_exit;
-	}
-
-	crypto_ablkcipher_clear_flags(cipher, ~0);
-
-	err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_free;
-	}
-
-	return cipher;
-
-err_out_free:
-	crypto_free_ablkcipher(cipher);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Crypto engine has a pool of pages to encrypt data into before sending
- * it over the network. This pool is freed/allocated here.
- */
-static void dst_crypto_pages_free(struct dst_crypto_engine *e)
-{
-	unsigned int i;
-
-	for (i = 0; i < e->page_num; ++i)
-		__free_page(e->pages[i]);
-	kfree(e->pages);
-}
-
-static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num)
-{
-	int i;
-
-	e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL);
-	if (!e->pages)
-		return -ENOMEM;
-
-	for (i = 0; i < num; ++i) {
-		e->pages[i] = alloc_page(GFP_KERNEL);
-		if (!e->pages[i])
-			goto err_out_free_pages;
-	}
-
-	e->page_num = num;
-	return 0;
-
-err_out_free_pages:
-	while (--i >= 0)
-		__free_page(e->pages[i]);
-
-	kfree(e->pages);
-	return -ENOMEM;
-}
-
-/*
- * Initialize crypto engine for given node.
- * Setup cipher/hash, keys, pool of threads and private data.
- */
-static int dst_crypto_engine_init(struct dst_crypto_engine *e,
-		struct dst_node *n)
-{
-	int err;
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	err = dst_crypto_pages_alloc(e, n->max_pages);
-	if (err)
-		goto err_out_exit;
-
-	e->size = PAGE_SIZE;
-	e->data = kmalloc(e->size, GFP_KERNEL);
-	if (!e->data) {
-		err = -ENOMEM;
-		goto err_out_free_pages;
-	}
-
-	if (ctl->hash_algo[0]) {
-		e->hash = dst_init_hash(ctl, n->hash_key);
-		if (IS_ERR(e->hash)) {
-			err = PTR_ERR(e->hash);
-			e->hash = NULL;
-			goto err_out_free;
-		}
-	}
-
-	if (ctl->cipher_algo[0]) {
-		e->cipher = dst_init_cipher(ctl, n->cipher_key);
-		if (IS_ERR(e->cipher)) {
-			err = PTR_ERR(e->cipher);
-			e->cipher = NULL;
-			goto err_out_free_hash;
-		}
-	}
-
-	return 0;
-
-err_out_free_hash:
-	crypto_free_hash(e->hash);
-err_out_free:
-	kfree(e->data);
-err_out_free_pages:
-	dst_crypto_pages_free(e);
-err_out_exit:
-	return err;
-}
-
-static void dst_crypto_engine_exit(struct dst_crypto_engine *e)
-{
-	if (e->hash)
-		crypto_free_hash(e->hash);
-	if (e->cipher)
-		crypto_free_ablkcipher(e->cipher);
-	dst_crypto_pages_free(e);
-	kfree(e->data);
-}
-
-/*
- * Waiting for cipher processing to be completed.
- */
-struct dst_crypto_completion {
-	struct completion		complete;
-	int				error;
-};
-
-static void dst_crypto_complete(struct crypto_async_request *req, int err)
-{
-	struct dst_crypto_completion *c = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	dprintk("%s: req: %p, err: %d.\n", __func__, req, err);
-	c->error = err;
-	complete(&c->complete);
-}
-
-static int dst_crypto_process(struct ablkcipher_request *req,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src,
-		void *iv, int enc, unsigned long timeout)
-{
-	struct dst_crypto_completion c;
-	int err;
-
-	init_completion(&c.complete);
-	c.error = -EINPROGRESS;
-
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					dst_crypto_complete, &c);
-
-	ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv);
-
-	if (enc)
-		err = crypto_ablkcipher_encrypt(req);
-	else
-		err = crypto_ablkcipher_decrypt(req);
-
-	switch (err) {
-	case -EINPROGRESS:
-	case -EBUSY:
-		err = wait_for_completion_interruptible_timeout(&c.complete,
-				timeout);
-		if (!err)
-			err = -ETIMEDOUT;
-		else
-			err = c.error;
-		break;
-	default:
-		break;
-	}
-
-	return err;
-}
-
-/*
- * DST uses generic iteration approach for data crypto processing.
- * Single block IO request is switched into array of scatterlists,
- * which are submitted to the crypto processing iterator.
- *
- * Input and output iterator initialization are different, since
- * in output case we can not encrypt data in-place and need a
- * temporary storage, which is then being sent to the remote peer.
- */
-static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_crypt_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	struct ablkcipher_request *req = e->data;
-	u8 iv[32];
-
-	memset(iv, 0, sizeof(iv));
-
-	memcpy(iv, &e->iv, sizeof(e->iv));
-
-	return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout);
-}
-
-static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio)
-{
-	struct ablkcipher_request *req = e->data;
-
-	memset(req, 0, sizeof(struct ablkcipher_request));
-	ablkcipher_request_set_tfm(req, e->cipher);
-
-	if (e->enc)
-		return dst_trans_iter_out(bio, e, dst_crypt_iterator);
-	else
-		return dst_trans_iter_in(bio, e, dst_crypt_iterator);
-}
-
-static int dst_hash_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	return crypto_hash_update(e->data, sg_src, sg_src->length);
-}
-
-static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst)
-{
-	struct hash_desc *desc = e->data;
-	int err;
-
-	desc->tfm = e->hash;
-	desc->flags = 0;
-
-	err = crypto_hash_init(desc);
-	if (err)
-		return err;
-
-	err = dst_trans_iter_in(bio, e, dst_hash_iterator);
-	if (err)
-		return err;
-
-	err = crypto_hash_final(desc, dst);
-	if (err)
-		return err;
-
-	return 0;
-}
-
-/*
- * Initialize/cleanup a crypto thread. The only thing it should
- * do is to allocate a pool of pages as temporary storage.
- * And to setup cipher and/or hash.
- */
-static void *dst_crypto_thread_init(void *data)
-{
-	struct dst_node *n = data;
-	struct dst_crypto_engine *e;
-	int err = -ENOMEM;
-
-	e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL);
-	if (!e)
-		goto err_out_exit;
-	e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist),
-			GFP_KERNEL);
-	if (!e->src)
-		goto err_out_free;
-
-	e->dst = e->src + n->max_pages;
-
-	err = dst_crypto_engine_init(e, n);
-	if (err)
-		goto err_out_free_all;
-
-	return e;
-
-err_out_free_all:
-	kfree(e->src);
-err_out_free:
-	kfree(e);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static void dst_crypto_thread_cleanup(void *private)
-{
-	struct dst_crypto_engine *e = private;
-
-	dst_crypto_engine_exit(e);
-	kfree(e->src);
-	kfree(e);
-}
-
-/*
- * Initialize crypto engine for given node: store keys, create pool
- * of threads, initialize each one.
- *
- * Each thread has unique ID, but 0 and 1 are reserved for receiving and
- * accepting threads (if export node), so IDs could start from 2, but starting
- * them from 10 allows easily understand what this thread is for.
- */
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl)
-{
-	void *key = (ctl + 1);
-	int err = -ENOMEM, i;
-	char name[32];
-
-	if (ctl->hash_keysize) {
-		n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL);
-		if (!n->hash_key)
-			goto err_out_exit;
-		memcpy(n->hash_key, key, ctl->hash_keysize);
-	}
-
-	if (ctl->cipher_keysize) {
-		n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL);
-		if (!n->cipher_key)
-			goto err_out_free_hash;
-		memcpy(n->cipher_key, key, ctl->cipher_keysize);
-	}
-	memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl));
-
-	for (i = 0; i < ctl->thread_num; ++i) {
-		snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i);
-		/* Unique ids... */
-		err = thread_pool_add_worker(n->pool, name, i + 10,
-			dst_crypto_thread_init, dst_crypto_thread_cleanup, n);
-		if (err)
-			goto err_out_free_threads;
-	}
-
-	return 0;
-
-err_out_free_threads:
-	while (--i >= 0)
-		thread_pool_del_worker_id(n->pool, i+10);
-
-	if (ctl->cipher_keysize)
-		kfree(n->cipher_key);
-	ctl->cipher_keysize = 0;
-err_out_free_hash:
-	if (ctl->hash_keysize)
-		kfree(n->hash_key);
-	ctl->hash_keysize = 0;
-err_out_exit:
-	return err;
-}
-
-void dst_node_crypto_exit(struct dst_node *n)
-{
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	if (ctl->cipher_algo[0] || ctl->hash_algo[0]) {
-		kfree(n->hash_key);
-		kfree(n->cipher_key);
-	}
-}
-
-/*
- * Thrad pool setup callback. Just stores a transaction in private data.
- */
-static int dst_trans_crypto_setup(void *crypto_engine, void *trans)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = trans;
-	return 0;
-}
-
-#if 0
-static void dst_dump_bio(struct bio *bio)
-{
-	u8 *p;
-	struct bio_vec *bv;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		dprintk("%s: %llu/%u: size: %u, offset: %u, data: ",
-				__func__, bio->bi_sector, bio->bi_size,
-				bv->bv_len, bv->bv_offset);
-
-		p = kmap(bv->bv_page) + bv->bv_offset;
-		for (i = 0; i < bv->bv_len; ++i)
-			printk(KERN_DEBUG "%02x ", p[i]);
-		kunmap(bv->bv_page);
-		printk("\n");
-	}
-}
-#endif
-
-/*
- * Encrypt/hash data and send it to the network.
- */
-static int dst_crypto_process_sending(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash)
-{
-	int err;
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	if (e->hash) {
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-#ifdef CONFIG_DST_DEBUG
-		{
-			unsigned int i;
-
-			/* dst_dump_bio(bio); */
-
-			printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ",
-				__func__, (u64)bio->bi_sector,
-				bio->bi_size, bio_data_dir(bio));
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-					printk("%02x ", hash[i]);
-			printk("\n");
-		}
-#endif
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Check if received data is valid. Decipher if it is.
- */
-static int dst_crypto_process_receiving(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash, u8 *recv_hash)
-{
-	int err;
-
-	if (e->hash) {
-		int mismatch;
-
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-		mismatch = !!memcmp(recv_hash, hash,
-				crypto_hash_digestsize(e->hash));
-#ifdef CONFIG_DST_DEBUG
-		/* dst_dump_bio(bio); */
-
-		printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio_data_dir(bio), mismatch);
-		if (mismatch) {
-			unsigned int i;
-
-			printk(", recv/calc: ");
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-				printk("%02x/%02x ", recv_hash[i], hash[i]);
-
-		}
-		printk("\n");
-#endif
-		err = -1;
-		if (mismatch)
-			goto err_out_exit;
-	}
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Thread pool callback to encrypt data and send it to the netowork.
- */
-static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct dst_trans *t = schedule_data;
-	struct bio *bio = t->bio;
-	int err;
-
-	dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n",
-			__func__, t, t->gen, e->cipher, e->hash);
-
-	e->enc = t->enc;
-	e->iv = dst_gen_iv(t);
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_crypto_process_sending(e, bio, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			t->cmd.csize = crypto_hash_digestsize(e->hash);
-			t->cmd.size += t->cmd.csize;
-		}
-
-		return dst_trans_send(t);
-	} else {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		dst_trans_remove(t);
-		dst_trans_put(t);
-	}
-
-	return 0;
-
-err_out_exit:
-	t->error = err;
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Schedule crypto processing for given transaction.
- */
-int dst_trans_crypto(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_trans_crypto_setup, dst_trans_crypto_action,
-		t, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Crypto machinery for the export node.
- */
-static int dst_export_crypto_setup(void *crypto_engine, void *bio)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = bio;
-	return 0;
-}
-
-static int dst_export_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct bio *bio = schedule_data;
-	struct dst_export_priv *p = bio->bi_private;
-	int err;
-
-	dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n",
-			__func__, e, e->data, (u64)bio->bi_sector,
-			bio->bi_size, bio_data_dir(bio));
-
-	e->enc = (bio_data_dir(bio) == READ);
-	e->iv = p->cmd.id;
-
-	if (bio_data_dir(bio) == WRITE) {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		generic_make_request(bio);
-	} else {
-		err = dst_crypto_process_sending(e, bio, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			p->cmd.csize = crypto_hash_digestsize(e->hash);
-			p->cmd.size += p->cmd.csize;
-		}
-
-		err = dst_export_send_bio(bio);
-	}
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
-
-int dst_export_crypto(struct dst_node *n, struct bio *bio)
-{
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_export_crypto_setup, dst_export_crypto_action,
-		bio, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
deleted file mode 100644
index c83ca7e3d048..000000000000
--- a/drivers/staging/dst/dcore.c
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/jhash.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <linux/in.h>
-#include <linux/in6.h>
-
-#include <net/sock.h>
-
-static int dst_major;
-
-static DEFINE_MUTEX(dst_hash_lock);
-static struct list_head *dst_hashtable;
-static unsigned int dst_hashtable_size = 128;
-module_param(dst_hashtable_size, uint, 0644);
-
-static char dst_name[] = "Dementianting goldfish";
-
-static DEFINE_IDR(dst_index_idr);
-static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL };
-
-/*
- * DST sysfs tree for device called 'storage':
- *
- * /sys/bus/dst/devices/storage/
- * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025
- * /sys/bus/dst/devices/storage/size : 800
- * /sys/bus/dst/devices/storage/name : storage
- */
-
-static int dst_dev_match(struct device *dev, struct device_driver *drv)
-{
-	return 1;
-}
-
-static struct bus_type dst_dev_bus_type = {
-	.name 		= "dst",
-	.match 		= &dst_dev_match,
-};
-
-static void dst_node_release(struct device *dev)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	kfree(info);
-}
-
-static struct device dst_node_dev = {
-	.bus 		= &dst_dev_bus_type,
-	.release 	= &dst_node_release
-};
-
-/*
- * Setting size of the node after it was changed.
- */
-static void dst_node_set_size(struct dst_node *n)
-{
-	struct block_device *bdev;
-
-	set_capacity(n->disk, n->size >> 9);
-
-	bdev = bdget_disk(n->disk, 0);
-	if (bdev) {
-		mutex_lock(&bdev->bd_inode->i_mutex);
-		i_size_write(bdev->bd_inode, n->size);
-		mutex_unlock(&bdev->bd_inode->i_mutex);
-		bdput(bdev);
-	}
-}
-
-/*
- * Distributed storage request processing function.
- */
-static int dst_request(struct request_queue *q, struct bio *bio)
-{
-	struct dst_node *n = q->queuedata;
-	int err = -EIO;
-
-	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
-		/*
-		 * This is a dirty^Wnice hack, but if we complete this
-		 * operation with -EOPNOTSUPP like intended, XFS
-		 * will stuck and freeze the machine. This may be
-		 * not particulary XFS problem though, but it is the
-		 * only FS which sends empty barrier at umount time
-		 * I worked with.
-		 *
-		 * Empty barriers are not allowed anyway, see 51fd77bd9f512
-		 * for example, although later it was changed to
-		 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
-		 * work in this case.
-		 */
-		/* err = -EOPNOTSUPP; */
-		err = 0;
-		goto end_io;
-	}
-
-	bio_get(bio);
-
-	return dst_process_bio(n, bio);
-
-end_io:
-	bio_endio(bio, err);
-	return err;
-}
-
-/*
- * Open/close callbacks for appropriate block device.
- */
-static int dst_bdev_open(struct block_device *bdev, fmode_t mode)
-{
-	struct dst_node *n = bdev->bd_disk->private_data;
-
-	dst_node_get(n);
-	return 0;
-}
-
-static int dst_bdev_release(struct gendisk *disk, fmode_t mode)
-{
-	struct dst_node *n = disk->private_data;
-
-	dst_node_put(n);
-	return 0;
-}
-
-static struct block_device_operations dst_blk_ops = {
-	.open		= dst_bdev_open,
-	.release	= dst_bdev_release,
-	.owner		= THIS_MODULE,
-};
-
-/*
- * Block layer binding - disk is created when array is fully configured
- * by userspace request.
- */
-static int dst_node_create_disk(struct dst_node *n)
-{
-	int err = -ENOMEM;
-	u32 index = 0;
-
-	n->queue = blk_init_queue(NULL, NULL);
-	if (!n->queue)
-		goto err_out_exit;
-
-	n->queue->queuedata = n;
-	blk_queue_make_request(n->queue, dst_request);
-	blk_queue_max_phys_segments(n->queue, n->max_pages);
-	blk_queue_max_hw_segments(n->queue, n->max_pages);
-
-	err = -ENOMEM;
-	n->disk = alloc_disk(1);
-	if (!n->disk)
-		goto err_out_free_queue;
-
-	if (!(n->state->permissions & DST_PERM_WRITE)) {
-		printk(KERN_INFO "DST node %s attached read-only.\n", n->name);
-		set_disk_ro(n->disk, 1);
-	}
-
-	if (!idr_pre_get(&dst_index_idr, GFP_KERNEL))
-		goto err_out_put;
-
-	mutex_lock(&dst_hash_lock);
-	err = idr_get_new(&dst_index_idr, NULL, &index);
-	mutex_unlock(&dst_hash_lock);
-	if (err)
-		goto err_out_put;
-
-	n->disk->major = dst_major;
-	n->disk->first_minor = index;
-	n->disk->fops = &dst_blk_ops;
-	n->disk->queue = n->queue;
-	n->disk->private_data = n;
-	snprintf(n->disk->disk_name, sizeof(n->disk->disk_name),
-			"dst-%s", n->name);
-
-	return 0;
-
-err_out_put:
-	put_disk(n->disk);
-err_out_free_queue:
-	blk_cleanup_queue(n->queue);
-err_out_exit:
-	return err;
-}
-
-/*
- * Sysfs machinery: show device's size.
- */
-static ssize_t dst_show_size(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%llu\n", info->size);
-}
-
-/*
- * Show local exported device.
- */
-static ssize_t dst_show_local(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%s\n", info->local);
-}
-
-/*
- * Shows type of the remote node - device major/minor number
- * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes.
- */
-static ssize_t dst_show_type(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-	int family = info->net.addr.sa_family;
-
-	if (family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr;
-		return sprintf(buf, "%u.%u.%u.%u:%d\n",
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)
-				&info->net.addr;
-		return sprintf(buf,
-			"%pi6:%d\n",
-			&sin->sin6_addr, ntohs(sin->sin6_port));
-	} else {
-		int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */
-		int size, addrlen = info->net.addr.sa_data_len;
-		unsigned char *a = (unsigned char *)&info->net.addr.sa_data;
-		char *buf_orig = buf;
-
-		size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ",
-				family, addrlen);
-		sz -= size;
-		buf += size;
-
-		for (i = 0; i < addrlen; ++i) {
-			if (sz < 3)
-				break;
-
-			size = snprintf(buf, sz, "%02x ", a[i]);
-			sz -= size;
-			buf += size;
-		}
-		buf += sprintf(buf, "\n");
-
-		return buf - buf_orig;
-	}
-	return 0;
-}
-
-static struct device_attribute dst_node_attrs[] = {
-	__ATTR(size, 0444, dst_show_size, NULL),
-	__ATTR(type, 0444, dst_show_type, NULL),
-	__ATTR(local, 0444, dst_show_local, NULL),
-};
-
-static int dst_create_node_attributes(struct dst_node *n)
-{
-	int err, i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) {
-		err = device_create_file(&n->info->device,
-				&dst_node_attrs[i]);
-		if (err)
-			goto err_out_remove_all;
-	}
-	return 0;
-
-err_out_remove_all:
-	while (--i >= 0)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-
-	return err;
-}
-
-static void dst_remove_node_attributes(struct dst_node *n)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-}
-
-/*
- * Sysfs cleanup and initialization.
- * Shows number of useful parameters.
- */
-static void dst_node_sysfs_exit(struct dst_node *n)
-{
-	if (n->info) {
-		dst_remove_node_attributes(n);
-		device_unregister(&n->info->device);
-		n->info = NULL;
-	}
-}
-
-static int dst_node_sysfs_init(struct dst_node *n)
-{
-	int err;
-
-	n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL);
-	if (!n->info)
-		return -ENOMEM;
-
-	memcpy(&n->info->device, &dst_node_dev, sizeof(struct device));
-	n->info->size = n->size;
-
-	dev_set_name(&n->info->device, "dst-%s", n->name);
-	err = device_register(&n->info->device);
-	if (err) {
-		dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n",
-				n->name, err);
-		goto err_out_exit;
-	}
-
-	dst_create_node_attributes(n);
-
-	return 0;
-
-err_out_exit:
-	kfree(n->info);
-	n->info = NULL;
-	return err;
-}
-
-/*
- * DST node hash tables machinery.
- */
-static inline unsigned int dst_hash(char *str, unsigned int size)
-{
-	return jhash(str, size, 0) % dst_hashtable_size;
-}
-
-static void dst_node_remove(struct dst_node *n)
-{
-	mutex_lock(&dst_hash_lock);
-	list_del_init(&n->node_entry);
-	mutex_unlock(&dst_hash_lock);
-}
-
-static void dst_node_add(struct dst_node *n)
-{
-	unsigned hash = dst_hash(n->name, sizeof(n->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_add_tail(&n->node_entry, &dst_hashtable[hash]);
-	mutex_unlock(&dst_hash_lock);
-}
-
-/*
- * Cleaning node when it is about to be freed.
- * There are still users of the socket though,
- * so connection cleanup should be protected.
- */
-static void dst_node_cleanup(struct dst_node *n)
-{
-	struct dst_state *st = n->state;
-
-	if (!st)
-		return;
-
-	if (n->queue) {
-		blk_cleanup_queue(n->queue);
-
-		mutex_lock(&dst_hash_lock);
-		idr_remove(&dst_index_idr, n->disk->first_minor);
-		mutex_unlock(&dst_hash_lock);
-
-		put_disk(n->disk);
-	}
-
-	if (n->bdev) {
-		sync_blockdev(n->bdev);
-		close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	}
-
-	dst_state_lock(st);
-	st->need_exit = 1;
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-
-	wake_up(&st->thread_wait);
-
-	dst_state_put(st);
-	n->state = NULL;
-}
-
-/*
- * Free security attributes attached to given node.
- */
-static void dst_security_exit(struct dst_node *n)
-{
-	struct dst_secure *s, *tmp;
-
-	list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) {
-		list_del(&s->sec_entry);
-		kfree(s);
-	}
-}
-
-/*
- * Free node when there are no more users.
- * Actually node has to be freed on behalf od userspace process,
- * since there are number of threads, which are embedded in the
- * node, so they can not exit and free node from there, that is
- * why there is a wakeup if reference counter is not equal to zero.
- */
-void dst_node_put(struct dst_node *n)
-{
-	if (unlikely(!n))
-		return;
-
-	dprintk("%s: n: %p, refcnt: %d.\n",
-			__func__, n, atomic_read(&n->refcnt));
-
-	if (atomic_dec_and_test(&n->refcnt)) {
-		dst_node_remove(n);
-		n->trans_scan_timeout = 0;
-		dst_node_cleanup(n);
-		thread_pool_destroy(n->pool);
-		dst_node_sysfs_exit(n);
-		dst_node_crypto_exit(n);
-		dst_security_exit(n);
-		dst_node_trans_exit(n);
-
-		kfree(n);
-
-		dprintk("%s: freed n: %p.\n", __func__, n);
-	} else {
-		wake_up(&n->wait);
-	}
-}
-
-/*
- * Setting up export device: lookup by the name, get its size
- * and setup listening socket, which will accept clients, which
- * will submit IO for given storage.
- */
-static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl,
-		struct dst_export_ctl *le)
-{
-	int err;
-
-	snprintf(n->info->local, sizeof(n->info->local), "%s", le->device);
-
-	n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL);
-	if (IS_ERR(n->bdev))
-		return PTR_ERR(n->bdev);
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size);
-	else
-		n->size = n->bdev->bd_inode->i_size;
-
-	n->info->size = n->size;
-	err = dst_node_init_listened(n, le);
-	if (err)
-		goto err_out_cleanup;
-
-	return 0;
-
-err_out_cleanup:
-	close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	n->bdev = NULL;
-
-	return err;
-}
-
-/* Empty thread pool callbacks for the network processing threads. */
-static inline void *dst_thread_network_init(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-	return data;
-}
-
-static inline void dst_thread_network_cleanup(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-}
-
-/*
- * Allocate DST node and initialize some of its parameters.
- */
-static struct dst_node *dst_alloc_node(struct dst_ctl *ctl,
-		int (*start)(struct dst_node *),
-		int num)
-{
-	struct dst_node *n;
-	int err;
-
-	n = kzalloc(sizeof(struct dst_node), GFP_KERNEL);
-	if (!n)
-		return NULL;
-
-	INIT_LIST_HEAD(&n->node_entry);
-
-	INIT_LIST_HEAD(&n->security_list);
-	mutex_init(&n->security_lock);
-
-	init_waitqueue_head(&n->wait);
-
-	n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout);
-	if (!n->trans_scan_timeout)
-		n->trans_scan_timeout = HZ;
-
-	n->trans_max_retries = ctl->trans_max_retries;
-	if (!n->trans_max_retries)
-		n->trans_max_retries = 10;
-
-	/*
-	 * Pretty much arbitrary default numbers.
-	 * 32 matches maximum number of pages in bio originated from ext3 (31).
-	 */
-	n->max_pages = ctl->max_pages;
-	if (!n->max_pages)
-		n->max_pages = 32;
-
-	if (n->max_pages > 1024)
-		n->max_pages = 1024;
-
-	n->start = start;
-	n->size = ctl->size;
-
-	atomic_set(&n->refcnt, 1);
-	atomic_long_set(&n->gen, 0);
-	snprintf(n->name, sizeof(n->name), "%s", ctl->name);
-
-	err = dst_node_sysfs_init(n);
-	if (err)
-		goto err_out_free;
-
-	n->pool = thread_pool_create(num, n->name, dst_thread_network_init,
-			dst_thread_network_cleanup, n);
-	if (IS_ERR(n->pool)) {
-		err = PTR_ERR(n->pool);
-		goto err_out_sysfs_exit;
-	}
-
-	dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name);
-
-	return n;
-
-err_out_sysfs_exit:
-	dst_node_sysfs_exit(n);
-err_out_free:
-	kfree(n);
-	return NULL;
-}
-
-/*
- * Starting a node, connected to the remote server:
- * register block device and initialize transaction mechanism.
- * In revers order though.
- *
- * It will autonegotiate some parameters with the remote node
- * and update local if needed.
- *
- * Transaction initialization should be the last thing before
- * starting the node, since transaction should include not only
- * block IO, but also crypto related data (if any), which are
- * initialized separately.
- */
-static int dst_start_remote(struct dst_node *n)
-{
-	int err;
-
-	err = dst_node_trans_init(n, sizeof(struct dst_trans));
-	if (err)
-		return err;
-
-	err = dst_node_create_disk(n);
-	if (err)
-		return err;
-
-	dst_node_set_size(n);
-	add_disk(n->disk);
-
-	dprintk("DST: started remote node '%s', minor: %d.\n",
-			n->name, n->disk->first_minor);
-
-	return 0;
-}
-
-/*
- * Adding remote node and initialize connection.
- */
-static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_network_ctl *rctl = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_network_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_remote, 1);
-	if (!n)
-		return -ENOMEM;
-
-	memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl));
-	err = dst_node_init_connected(n, rctl);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-/*
- * Adding export node: initializing block device and listening socket.
- */
-static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_export_ctl *le = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_export_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_export, 2);
-	if (!n)
-		return -EINVAL;
-
-	err = dst_setup_export(n, ctl, le);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-static int dst_node_remove_unload(struct dst_node *n)
-{
-	printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n",
-			n->name, n->size);
-
-	if (n->disk)
-		del_gendisk(n->disk);
-
-	dst_node_remove(n);
-	dst_node_sysfs_exit(n);
-
-	/*
-	 * This is not a hack. Really.
-	 * Node's reference counter allows to implement fine grained
-	 * node freeing, but since all transactions (which hold node's
-	 * reference counter) are processed in the dedicated thread,
-	 * it is possible that reference will hit zero in that thread,
-	 * so we will not be able to exit thread and cleanup the node.
-	 *
-	 * So, we remove disk, so no new activity is possible, and
-	 * wait until all pending transaction are completed (either
-	 * in receiving thread or by timeout in workqueue), in this
-	 * case reference counter will be less or equal to 2 (once set in
-	 * dst_alloc_node() and then in connector message parser;
-	 * or when we force module unloading, and connector message
-	 * parser does not hold a reference, in this case reference
-	 * counter will be equal to 1),
-	 * and subsequent dst_node_put() calls will free the node.
-	 */
-	dprintk("%s: going to sleep with %d refcnt.\n",
-			__func__, atomic_read(&n->refcnt));
-	wait_event(n->wait, atomic_read(&n->refcnt) <= 2);
-
-	dst_node_put(n);
-	return 0;
-}
-
-/*
- * Remove node from the hash table.
- */
-static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	if (!n)
-		return -ENODEV;
-
-	return dst_node_remove_unload(n);
-}
-
-/*
- * Initialize crypto processing for given node.
- */
-static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_crypto_ctl *crypto = data;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize +
-			crypto->cipher_keysize)
-		return -EINVAL;
-
-	if (n->trans_cache)
-		return -EEXIST;
-
-	return dst_node_crypto_init(n, crypto);
-}
-
-/*
- * Security attributes for given node.
- */
-static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_secure *s;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_secure_user))
-		return -EINVAL;
-
-	s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	memcpy(&s->sec, data, size);
-
-	mutex_lock(&n->security_lock);
-	list_add_tail(&s->sec_entry, &n->security_list);
-	mutex_unlock(&n->security_lock);
-
-	return 0;
-}
-
-/*
- * Kill'em all!
- */
-static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-
-	if (!n)
-		return -ENODEV;
-
-	if (n->trans_cache)
-		return 0;
-
-	err = n->start(n);
-	if (err)
-		return err;
-
-	printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size);
-	return 0;
-}
-
-typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size);
-
-/*
- * List of userspace commands.
- */
-static dst_command_func dst_commands[] = {
-	[DST_ADD_REMOTE] = &dst_add_remote,
-	[DST_ADD_EXPORT] = &dst_add_export,
-	[DST_DEL_NODE] = &dst_del_node,
-	[DST_CRYPTO] = &dst_crypto_init,
-	[DST_SECURITY] = &dst_security_init,
-	[DST_START] = &dst_start_node,
-};
-
-/*
- * Configuration parser.
- */
-static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct dst_ctl *ctl;
-	int err;
-	struct dst_ctl_ack ack;
-	struct dst_node *n = NULL, *tmp;
-	unsigned int hash;
-
-	if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	if (msg->len < sizeof(struct dst_ctl)) {
-		err = -EBADMSG;
-		goto out;
-	}
-
-	ctl = (struct dst_ctl *)msg->data;
-
-	if (ctl->cmd >= DST_CMD_MAX) {
-		err = -EINVAL;
-		goto out;
-	}
-	hash = dst_hash(ctl->name, sizeof(ctl->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) {
-		if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) {
-			n = tmp;
-			dst_node_get(n);
-			break;
-		}
-	}
-	mutex_unlock(&dst_hash_lock);
-
-	err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl),
-			msg->len - sizeof(struct dst_ctl));
-
-	dst_node_put(n);
-out:
-	memcpy(&ack.msg, msg, sizeof(struct cn_msg));
-
-	ack.msg.ack = msg->ack + 1;
-	ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg);
-
-	ack.error = err;
-
-	cn_netlink_send(&ack.msg, 0, GFP_KERNEL);
-}
-
-/*
- * Global initialization: sysfs, hash table, block device registration,
- * connector and various caches.
- */
-static int __init dst_sysfs_init(void)
-{
-	return bus_register(&dst_dev_bus_type);
-}
-
-static void dst_sysfs_exit(void)
-{
-	bus_unregister(&dst_dev_bus_type);
-}
-
-static int __init dst_hashtable_init(void)
-{
-	unsigned int i;
-
-	dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head),
-			GFP_KERNEL);
-	if (!dst_hashtable)
-		return -ENOMEM;
-
-	for (i = 0; i < dst_hashtable_size; ++i)
-		INIT_LIST_HEAD(&dst_hashtable[i]);
-
-	return 0;
-}
-
-static void dst_hashtable_exit(void)
-{
-	unsigned int i;
-	struct dst_node *n, *tmp;
-
-	for (i = 0; i < dst_hashtable_size; ++i) {
-		list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) {
-			dst_node_remove_unload(n);
-		}
-	}
-
-	kfree(dst_hashtable);
-}
-
-static int __init dst_sys_init(void)
-{
-	int err = -ENOMEM;
-
-	err = dst_hashtable_init();
-	if (err)
-		goto err_out_exit;
-
-	err = dst_export_init();
-	if (err)
-		goto err_out_hashtable_exit;
-
-	err = register_blkdev(dst_major, DST_NAME);
-	if (err < 0)
-		goto err_out_export_exit;
-	if (err)
-		dst_major = err;
-
-	err = dst_sysfs_init();
-	if (err)
-		goto err_out_unregister;
-
-	err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback);
-	if (err)
-		goto err_out_sysfs_exit;
-
-	printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name);
-
-	return 0;
-
-err_out_sysfs_exit:
-	dst_sysfs_exit();
-err_out_unregister:
-	unregister_blkdev(dst_major, DST_NAME);
-err_out_export_exit:
-	dst_export_exit();
-err_out_hashtable_exit:
-	dst_hashtable_exit();
-err_out_exit:
-	return err;
-}
-
-static void __exit dst_sys_exit(void)
-{
-	cn_del_callback(&cn_dst_id);
-	unregister_blkdev(dst_major, DST_NAME);
-	dst_hashtable_exit();
-	dst_sysfs_exit();
-	dst_export_exit();
-}
-
-module_init(dst_sys_init);
-module_exit(dst_sys_exit);
-
-MODULE_DESCRIPTION("Distributed storage");
-MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/dst/export.c b/drivers/staging/dst/export.c
deleted file mode 100644
index c324230e8b60..000000000000
--- a/drivers/staging/dst/export.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <net/sock.h>
-
-/*
- * Export bioset is used for server block IO requests.
- */
-static struct bio_set *dst_bio_set;
-
-int __init dst_export_init(void)
-{
-	int err = -ENOMEM;
-
-	dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
-	if (!dst_bio_set)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-void dst_export_exit(void)
-{
-	bioset_free(dst_bio_set);
-}
-
-/*
- * When client connects and autonegotiates with the server node,
- * its permissions are checked in a security attributes and sent
- * back.
- */
-static unsigned int dst_check_permissions(struct dst_state *main,
-		struct dst_state *st)
-{
-	struct dst_node *n = main->node;
-	struct dst_secure *sentry;
-	struct dst_secure_user *s;
-	struct saddr *sa = &st->ctl.addr;
-	unsigned int perm = 0;
-
-	mutex_lock(&n->security_lock);
-	list_for_each_entry(sentry, &n->security_list, sec_entry) {
-		s = &sentry->sec;
-
-		if (s->addr.sa_family != sa->sa_family)
-			continue;
-
-		if (s->addr.sa_data_len != sa->sa_data_len)
-			continue;
-
-		/*
-		 * This '2' below is a port field. This may be very wrong to do
-		 * in atalk for example though. If there will be any need
-		 * to extent protocol to something else, I can create
-		 * per-family helpers and use them instead of this memcmp.
-		 */
-		if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
-					sa->sa_data_len - 2))
-			continue;
-
-		perm = s->permissions;
-	}
-	mutex_unlock(&n->security_lock);
-
-	return perm;
-}
-
-/*
- * Accept new client: allocate appropriate network state and check permissions.
- */
-static struct dst_state *dst_accept_client(struct dst_state *st)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-	struct socket *sock = NULL;
-	struct dst_state *new;
-
-	while (!err && !sock) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait,
-						&wait, TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				/*
-				 * Magic HZ? Polling check above is not safe in
-				 * all cases (like socket reset in BH context),
-				 * so it is simpler just to postpone it to the
-				 * process context instead of implementing
-				 * special locking there.
-				 */
-				schedule_timeout(HZ);
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
-			__func__, st, revents, revents & err_mask,
-			revents & POLLIN);
-
-		if (revents & err_mask) {
-			dprintk("%s: revents: %x, socket: %p, err: %d.\n",
-					__func__, revents, st->socket, err);
-			err = -ECONNRESET;
-		}
-
-		if (!n->trans_scan_timeout || st->need_exit)
-			err = -ENODEV;
-
-		if (st->socket && (revents & POLLIN))
-			err = kernel_accept(st->socket, &sock, 0);
-
-		dst_state_unlock(st);
-	}
-
-	if (err)
-		goto err_out_exit;
-
-	new = dst_state_alloc(st->node);
-	if (IS_ERR(new)) {
-		err = -ENOMEM;
-		goto err_out_release;
-	}
-	new->socket = sock;
-
-	new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
-	err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
-			(int *)&new->ctl.addr.sa_data_len);
-	if (err)
-		goto err_out_put;
-
-	new->permissions = dst_check_permissions(st, new);
-	if (new->permissions == 0) {
-		err = -EPERM;
-		dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-				"Client is not allowed to connect");
-		goto err_out_put;
-	}
-
-	err = dst_poll_init(new);
-	if (err)
-		goto err_out_put;
-
-	dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-			"Connected client");
-
-	return new;
-
-err_out_put:
-	dst_state_put(new);
-err_out_release:
-	sock_release(sock);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Each server's block request sometime finishes.
- * Usually it happens in hard irq context of the appropriate controller,
- * so to play good with all cases we just queue BIO into the queue
- * and wake up processing thread, which gets completed request and
- * send (encrypting if needed) it back to the client (if it was a read
- * request), or sends back reply that writing successfully completed.
- */
-static int dst_export_process_request_queue(struct dst_state *st)
-{
-	unsigned long flags;
-	struct dst_export_priv *p = NULL;
-	struct bio *bio;
-	int err = 0;
-
-	while (!list_empty(&st->request_list)) {
-		spin_lock_irqsave(&st->request_lock, flags);
-		if (!list_empty(&st->request_list)) {
-			p = list_first_entry(&st->request_list,
-				struct dst_export_priv, request_entry);
-			list_del(&p->request_entry);
-		}
-		spin_unlock_irqrestore(&st->request_lock, flags);
-
-		if (!p)
-			break;
-
-		bio = p->bio;
-
-		if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
-			err = dst_export_crypto(st->node, bio);
-		else
-			err = dst_export_send_bio(bio);
-
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-/*
- * Cleanup export state.
- * It has to wait until all requests are finished,
- * and then free them all.
- */
-static void dst_state_cleanup_export(struct dst_state *st)
-{
-	struct dst_export_priv *p;
-	unsigned long flags;
-
-	/*
-	 * This loop waits for all pending bios to be completed and freed.
-	 */
-	while (atomic_read(&st->refcnt) > 1) {
-		dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
-				__func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list));
-		wait_event_timeout(st->thread_wait,
-				(atomic_read(&st->refcnt) == 1) ||
-				!list_empty(&st->request_list),
-				HZ/2);
-
-		while (!list_empty(&st->request_list)) {
-			p = NULL;
-			spin_lock_irqsave(&st->request_lock, flags);
-			if (!list_empty(&st->request_list)) {
-				p = list_first_entry(&st->request_list,
-					struct dst_export_priv, request_entry);
-				list_del(&p->request_entry);
-			}
-			spin_unlock_irqrestore(&st->request_lock, flags);
-
-			if (p)
-				bio_put(p->bio);
-
-			dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
-				"%p.\n", __func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list), p);
-		}
-	}
-
-	dst_state_put(st);
-}
-
-/*
- * Client accepting thread.
- * Not only accepts new connection, but also schedules receiving thread
- * and performs request completion described above.
- */
-static int dst_accept(void *init_data, void *schedule_data)
-{
-	struct dst_state *main_st = schedule_data;
-	struct dst_node *n = init_data;
-	struct dst_state *st;
-	int err;
-
-	while (n->trans_scan_timeout && !main_st->need_exit) {
-		dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
-		st = dst_accept_client(main_st);
-		if (IS_ERR(st))
-			continue;
-
-		err = dst_state_schedule_receiver(st);
-		if (!err) {
-			while (n->trans_scan_timeout) {
-				err = wait_event_interruptible_timeout(st->thread_wait,
-					!list_empty(&st->request_list) ||
-					!n->trans_scan_timeout ||
-					st->need_exit,
-					HZ);
-
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				if (list_empty(&st->request_list))
-					continue;
-
-				err = dst_export_process_request_queue(st);
-				if (err)
-					break;
-			}
-
-			st->need_exit = 1;
-			wake_up(&st->thread_wait);
-		}
-
-		dst_state_cleanup_export(st);
-	}
-
-	dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
-
-	dst_state_lock(main_st);
-	dst_poll_exit(main_st);
-	dst_state_socket_release(main_st);
-	dst_state_unlock(main_st);
-	dst_state_put(main_st);
-	dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
-
-	return 0;
-}
-
-int dst_start_export(struct dst_node *n)
-{
-	if (list_empty(&n->security_list)) {
-		printk(KERN_ERR "You are trying to export node '%s' "
-				"without security attributes.\nNo clients will "
-				"be allowed to connect. Exiting.\n", n->name);
-		return -EINVAL;
-	}
-	return dst_node_trans_init(n, sizeof(struct dst_export_priv));
-}
-
-/*
- * Initialize listening state and schedule accepting thread.
- */
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-	struct dst_network_ctl *ctl = &le->ctl;
-
-	memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_put;
-
-	st->socket->sk->sk_reuse = 1;
-
-	err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
-			ctl->addr.sa_data_len);
-	if (err)
-		goto err_out_socket_release;
-
-	err = kernel_listen(st->socket, 1024);
-	if (err)
-		goto err_out_socket_release;
-	n->state = st;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_socket_release;
-
-	dst_state_get(st);
-
-	err = thread_pool_schedule(n->pool, dst_thread_setup,
-			dst_accept, st, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_poll_exit;
-
-	return 0;
-
-err_out_poll_exit:
-	dst_poll_exit(st);
-err_out_socket_release:
-	dst_state_socket_release(st);
-err_out_put:
-	dst_state_put(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-/*
- * Free bio and related private data.
- * Also drop a reference counter for appropriate state,
- * which waits when there are no more block IOs in-flight.
- */
-static void dst_bio_destructor(struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_export_priv *priv = bio->bi_private;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (!bv->bv_page)
-			break;
-
-		__free_page(bv->bv_page);
-	}
-
-	if (priv)
-		dst_state_put(priv->state);
-	bio_free(bio, dst_bio_set);
-}
-
-/*
- * Block IO completion. Queue request to be sent back to
- * the client (or just confirmation).
- */
-static void dst_bio_end_io(struct bio *bio, int err)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	unsigned long flags;
-
-	spin_lock_irqsave(&st->request_lock, flags);
-	list_add_tail(&p->request_entry, &st->request_list);
-	spin_unlock_irqrestore(&st->request_lock, flags);
-
-	wake_up(&st->thread_wait);
-}
-
-/*
- * Allocate read request for the server.
- */
-static int dst_export_read_request(struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = bio_add_page(bio, page, size, 0);
-		dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
-				__func__, (u64)bio->bi_sector, bio->bi_size,
-				size, err);
-		if (err <= 0)
-			goto err_out_free_page;
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Allocate write request for the server.
- * Should not only get pages, but also read data from the network.
- */
-static int dst_export_write_request(struct dst_state *st,
-		struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	void *data;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		data = kmap(page);
-		if (!data)
-			goto err_out_free_page;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = dst_data_recv(st, data, size);
-		if (err)
-			goto err_out_unmap_page;
-
-		err = bio_add_page(bio, page, size, 0);
-		if (err <= 0)
-			goto err_out_unmap_page;
-
-		kunmap(page);
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_unmap_page:
-	kunmap(page);
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Groovy, we've gotten an IO request from the client.
- * Allocate BIO from the bioset, private data from the mempool
- * and lots of pages for IO.
- */
-int dst_process_io(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct bio *bio;
-	struct dst_export_priv *priv;
-	int err = -ENOMEM;
-
-	if (unlikely(!n->bdev)) {
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	bio = bio_alloc_bioset(GFP_KERNEL,
-			PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
-			dst_bio_set);
-	if (!bio)
-		goto err_out_exit;
-
-	priv = (struct dst_export_priv *)(((void *)bio) -
-			sizeof (struct dst_export_priv));
-
-	priv->state = dst_state_get(st);
-	priv->bio = bio;
-
-	bio->bi_private = priv;
-	bio->bi_end_io = dst_bio_end_io;
-	bio->bi_destructor = dst_bio_destructor;
-	bio->bi_bdev = n->bdev;
-
-	/*
-	 * Server side is only interested in two low bits:
-	 * uptodate (set by itself actually) and rw block
-	 */
-	bio->bi_flags |= cmd->flags & 3;
-
-	bio->bi_rw = cmd->rw;
-	bio->bi_size = 0;
-	bio->bi_sector = cmd->sector;
-
-	dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
-
-	priv->cmd.flags = 0;
-	priv->cmd.size = cmd->size;
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_recv_cdata(st, priv->cmd.hash);
-		if (err)
-			goto err_out_free;
-
-		err = dst_export_write_request(st, bio, cmd->size);
-		if (err)
-			goto err_out_free;
-
-		if (dst_need_crypto(n))
-			return dst_export_crypto(n, bio);
-	} else {
-		err = dst_export_read_request(bio, cmd->size);
-		if (err)
-			goto err_out_free;
-	}
-
-	dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio->bi_rw, bio_data_dir(bio),
-			bio->bi_flags, bio->bi_phys_segments);
-
-	generic_make_request(bio);
-
-	return 0;
-
-err_out_free:
-	bio_put(bio);
-err_out_exit:
-	return err;
-}
-
-/*
- * Ok, block IO is ready, let's send it back to the client...
- */
-int dst_export_send_bio(struct bio *bio)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	struct dst_cmd *cmd = &p->cmd;
-	int err;
-
-	dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
-			__func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
-			cmd->csize, bio->bi_flags, bio->bi_rw);
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = -ECONNRESET;
-		goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		/* ... or just confirmation that writing has completed. */
-		cmd->size = cmd->csize = 0;
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-		if (err)
-			goto err_out_unlock;
-	} else {
-		err = dst_send_bio(st, cmd, bio);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return 0;
-
-err_out_unlock:
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/state.c b/drivers/staging/dst/state.c
deleted file mode 100644
index 02a05e6c48c3..000000000000
--- a/drivers/staging/dst/state.c
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-
-/*
- * Polling machinery.
- */
-
-struct dst_poll_helper {
-	poll_table		pt;
-	struct dst_state	*st;
-};
-
-static int dst_queue_wake(wait_queue_t *wait, unsigned mode,
-		int sync, void *key)
-{
-	struct dst_state *st = container_of(wait, struct dst_state, wait);
-
-	wake_up(&st->thread_wait);
-	return 1;
-}
-
-static void dst_queue_func(struct file *file, wait_queue_head_t *whead,
-				 poll_table *pt)
-{
-	struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st;
-
-	st->whead = whead;
-	init_waitqueue_func_entry(&st->wait, dst_queue_wake);
-	add_wait_queue(whead, &st->wait);
-}
-
-void dst_poll_exit(struct dst_state *st)
-{
-	if (st->whead) {
-		remove_wait_queue(st->whead, &st->wait);
-		st->whead = NULL;
-	}
-}
-
-int dst_poll_init(struct dst_state *st)
-{
-	struct dst_poll_helper ph;
-
-	ph.st = st;
-	init_poll_funcptr(&ph.pt, &dst_queue_func);
-
-	st->socket->ops->poll(NULL, st->socket, &ph.pt);
-	return 0;
-}
-
-/*
- * Header receiving function - may block.
- */
-static int dst_data_recv_header(struct socket *sock,
-		void *data, unsigned int size, int block)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT;
-
-	err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err != size)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Header sending function - may block.
- */
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0);
-
-	err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
-	if (err != size) {
-		dprintk("%s: size: %u, more: %d, err: %d.\n",
-				__func__, size, more, err);
-		return -1;
-	}
-
-	return 0;
-}
-
-/*
- * Block autoconfiguration: request size of the storage and permissions.
- */
-static int dst_request_remote_config(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	int err = -EINVAL;
-	struct dst_cmd *cmd = st->data;
-
-	memset(cmd, 0, sizeof(struct dst_cmd));
-	cmd->cmd = DST_CFG;
-
-	dst_convert_cmd(cmd);
-
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	if (err)
-		goto out;
-
-	err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1);
-	if (err)
-		goto out;
-
-	dst_convert_cmd(cmd);
-
-	if (cmd->cmd != DST_CFG) {
-		err = -EINVAL;
-		dprintk("%s: checking result: cmd: %d, size reported: %llu.\n",
-			__func__, cmd->cmd, cmd->sector);
-		goto out;
-	}
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->size, cmd->sector);
-	else
-		n->size = cmd->sector;
-
-	n->info->size = n->size;
-	st->permissions = cmd->rw;
-
-out:
-	dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n",
-			__func__, n, err, n->size, st->permissions);
-	return err;
-}
-
-/*
- * Socket machinery.
- */
-
-#define DST_DEFAULT_TIMEO	20000
-
-int dst_state_socket_create(struct dst_state *st)
-{
-	int err;
-	struct socket *sock;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock);
-	if (err < 0)
-		return err;
-
-	sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo =
-		msecs_to_jiffies(DST_DEFAULT_TIMEO);
-	sock->sk->sk_allocation = GFP_NOIO;
-
-	st->socket = st->read_socket = sock;
-	return 0;
-}
-
-void dst_state_socket_release(struct dst_state *st)
-{
-	dprintk("%s: st: %p, socket: %p, n: %p.\n",
-			__func__, st, st->socket, st->node);
-	if (st->socket) {
-		sock_release(st->socket);
-		st->socket = NULL;
-		st->read_socket = NULL;
-	}
-}
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str)
-{
-	if (sk->ops->family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
-		printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str,
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (sk->ops->family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa;
-		printk(KERN_INFO "%s %pi6:%d",
-			str, &sin->sin6_addr, ntohs(sin->sin6_port));
-	}
-}
-
-void dst_state_exit_connected(struct dst_state *st)
-{
-	if (st->socket) {
-		dst_poll_exit(st);
-		st->socket->ops->shutdown(st->socket, 2);
-
-		dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr,
-				"Disconnected peer");
-		dst_state_socket_release(st);
-	}
-}
-
-static int dst_state_init_connected(struct dst_state *st)
-{
-	int err;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_exit;
-
-	err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr,
-			st->ctl.addr.sa_data_len, 0);
-	if (err)
-		goto err_out_release;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_release;
-
-	dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr,
-			"Connected to peer");
-
-	return 0;
-
-err_out_release:
-	dst_state_socket_release(st);
-err_out_exit:
-	return err;
-}
-
-/*
- * State reset is used to reconnect to the remote peer.
- * May fail, but who cares, we will try again later.
- */
-static inline void dst_state_reset_nolock(struct dst_state *st)
-{
-	dst_state_exit_connected(st);
-	dst_state_init_connected(st);
-}
-
-static inline void dst_state_reset(struct dst_state *st)
-{
-	dst_state_lock(st);
-	dst_state_reset_nolock(st);
-	dst_state_unlock(st);
-}
-
-/*
- * Basic network sending/receiving functions.
- * Blocked mode is used.
- */
-static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	BUG_ON(!size);
-
-	iov.iov_base = buf;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_DONTWAIT;
-
-	err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err <= 0) {
-		dprintk("%s: failed to recv data: size: %llu, err: %d.\n",
-				__func__, size, err);
-		if (err == 0)
-			err = -ECONNRESET;
-
-		dst_state_exit_connected(st);
-	}
-
-	return err;
-}
-
-/*
- * Ping command to early detect failed nodes.
- */
-static int dst_send_ping(struct dst_state *st)
-{
-	struct dst_cmd *cmd = st->data;
-	int err = -ECONNRESET;
-
-	dst_state_lock(st);
-	if (st->socket) {
-		memset(cmd, 0, sizeof(struct dst_cmd));
-
-		cmd->cmd = __cpu_to_be32(DST_PING);
-
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__,
-			st, st->socket, err);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receiving function, which should either return error or read
- * whole block request. If there was no traffic for a one second,
- * send a ping, since remote node may die.
- */
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-
-	while (size && !err) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait, &wait,
-						TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				if (!schedule_timeout(HZ)) {
-					err = dst_send_ping(st);
-					if (err)
-						return err;
-				}
-
-				continue;
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		if (st->socket && (st->read_socket == st->socket) &&
-				(revents & POLLIN)) {
-			err = dst_data_recv_raw(st, data, size);
-			if (err > 0) {
-				data += err;
-				size -= err;
-				err = 0;
-			}
-		}
-
-		if (revents & err_mask || !st->socket) {
-			dprintk("%s: revents: %x, socket: %p, size: %u, "
-					"err: %d.\n", __func__, revents,
-					st->socket, size, err);
-			err = -ECONNRESET;
-		}
-
-		dst_state_unlock(st);
-
-		if (!n->trans_scan_timeout)
-			err = -ENODEV;
-	}
-
-	return err;
-}
-
-/*
- * Send block autoconf reply.
- */
-static int dst_process_cfg(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	int err;
-
-	cmd->sector = n->size;
-	cmd->rw = st->permissions;
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receive block IO from the network.
- */
-static int dst_recv_bio(struct dst_state *st, struct bio *bio,
-		unsigned int total_size)
-{
-	struct bio_vec *bv;
-	int i, err;
-	void *data;
-	unsigned int sz;
-
-	bio_for_each_segment(bv, bio, i) {
-		sz = min(total_size, bv->bv_len);
-
-		dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, "
-				"off: %u.\n", __func__, (u64)bio->bi_sector,
-				bio->bi_size, total_size, bv->bv_len, sz,
-				bv->bv_offset);
-
-		data = kmap(bv->bv_page) + bv->bv_offset;
-		err = dst_data_recv(st, data, sz);
-		kunmap(bv->bv_page);
-
-		bv->bv_len = sz;
-
-		if (err)
-			return err;
-
-		total_size -= sz;
-		if (total_size == 0)
-			break;
-	}
-
-	return 0;
-}
-
-/*
- * Our block IO has just completed and arrived: get it.
- */
-static int dst_process_io_response(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct dst_trans *t;
-	int err = 0;
-	struct bio *bio;
-
-	mutex_lock(&n->trans_lock);
-	t = dst_trans_search(n, cmd->id);
-	mutex_unlock(&n->trans_lock);
-
-	if (!t)
-		goto err_out_exit;
-
-	bio = t->bio;
-
-	dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n",
-		__func__, (u64)bio->bi_sector, bio->bi_size, cmd->size,
-		cmd->csize, bio_data_dir(bio));
-
-	if (bio_data_dir(bio) == READ) {
-		if (bio->bi_size != cmd->size - cmd->csize)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n)) {
-			err = dst_recv_cdata(st, t->cmd.hash);
-			if (err)
-				goto err_out_exit;
-		}
-
-		err = dst_recv_bio(st, t->bio, bio->bi_size);
-		if (err)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n))
-			return dst_trans_crypto(t);
-	} else {
-		err = -EBADMSG;
-		if (cmd->size || cmd->csize)
-			goto err_out_exit;
-	}
-
-	dst_trans_remove(t);
-	dst_trans_put(t);
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive crypto data.
- */
-int dst_recv_cdata(struct dst_state *st, void *cdata)
-{
-	struct dst_cmd *cmd = st->data;
-	struct dst_node *n = st->node;
-	struct dst_crypto_ctl *c = &n->crypto;
-	int err;
-
-	if (cmd->csize != c->crypto_attached_size) {
-		dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, "
-				"csize: %u != digest size %u.\n",
-				__func__, cmd->cmd, cmd->sector, cmd->size,
-				cmd->csize, c->crypto_attached_size);
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	err = dst_data_recv(st, cdata, cmd->csize);
-	if (err)
-		goto err_out_exit;
-
-	cmd->size -= cmd->csize;
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive the command and start its processing.
- */
-static int dst_recv_processing(struct dst_state *st)
-{
-	int err = -EINTR;
-	struct dst_cmd *cmd = st->data;
-
-	/*
-	 * If socket will be reset after this statement, then
-	 * dst_data_recv() will just fail and loop will
-	 * start again, so it can be done without any locks.
-	 *
-	 * st->read_socket is needed to prevents state machine
-	 * breaking between this data reading and subsequent one
-	 * in protocol specific functions during connection reset.
-	 * In case of reset we have to read next command and do
-	 * not expect data for old command to magically appear in
-	 * new connection.
-	 */
-	st->read_socket = st->socket;
-	err = dst_data_recv(st, cmd, sizeof(struct dst_cmd));
-	if (err)
-		goto out_exit;
-
-	dst_convert_cmd(cmd);
-
-	dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, "
-			"sector: %llu, flags: %llx, rw: %llx.\n",
-			__func__, cmd->cmd, cmd->size,
-			cmd->csize, cmd->id, cmd->sector,
-			cmd->flags, cmd->rw);
-
-	/*
-	 * This should catch protocol breakage and random garbage
-	 * instead of commands.
-	 */
-	if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) {
-		err = -EBADMSG;
-		goto out_exit;
-	}
-
-	err = -EPROTO;
-	switch (cmd->cmd) {
-	case DST_IO_RESPONSE:
-		err = dst_process_io_response(st);
-		break;
-	case DST_IO:
-		err = dst_process_io(st);
-		break;
-	case DST_CFG:
-		err = dst_process_cfg(st);
-		break;
-	case DST_PING:
-		err = 0;
-		break;
-	default:
-		break;
-	}
-
-out_exit:
-	return err;
-}
-
-/*
- * Receiving thread. For the client node we should try to reconnect,
- * for accepted client we just drop the state and expect it to reconnect.
- */
-static int dst_recv(void *init_data, void *schedule_data)
-{
-	struct dst_state *st = schedule_data;
-	struct dst_node *n = init_data;
-	int err = 0;
-
-	dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n",
-			__func__, st, n, n->trans_scan_timeout, st->need_exit);
-
-	while (n->trans_scan_timeout && !st->need_exit) {
-		err = dst_recv_processing(st);
-		if (err < 0) {
-			if (!st->ctl.type)
-				break;
-
-			if (!n->trans_scan_timeout || st->need_exit)
-				break;
-
-			dst_state_reset(st);
-			msleep(1000);
-		}
-	}
-
-	st->need_exit = 1;
-	wake_up(&st->thread_wait);
-
-	dprintk("%s: freeing receiving socket st: %p.\n", __func__, st);
-	dst_state_lock(st);
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-	dst_state_put(st);
-
-	dprintk("%s: freed receiving socket st: %p.\n", __func__, st);
-
-	return err;
-}
-
-/*
- * Network state dies here and borns couple of lines below.
- * This object is the main network state processing engine:
- * sending, receiving, reconnections, all network related
- * tasks are handled on behalf of the state.
- */
-static void dst_state_free(struct dst_state *st)
-{
-	dprintk("%s: st: %p.\n", __func__, st);
-	if (st->cleanup)
-		st->cleanup(st);
-	kfree(st->data);
-	kfree(st);
-}
-
-struct dst_state *dst_state_alloc(struct dst_node *n)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = kzalloc(sizeof(struct dst_state), GFP_KERNEL);
-	if (!st)
-		goto err_out_exit;
-
-	st->node = n;
-	st->need_exit = 0;
-
-	st->size = PAGE_SIZE;
-	st->data = kmalloc(st->size, GFP_KERNEL);
-	if (!st->data)
-		goto err_out_free;
-
-	spin_lock_init(&st->request_lock);
-	INIT_LIST_HEAD(&st->request_list);
-
-	mutex_init(&st->state_lock);
-	init_waitqueue_head(&st->thread_wait);
-
-	/*
-	 * One for processing thread, another one for node itself.
-	 */
-	atomic_set(&st->refcnt, 2);
-
-	dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node);
-
-	return st;
-
-err_out_free:
-	kfree(st);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-int dst_state_schedule_receiver(struct dst_state *st)
-{
-	return thread_pool_schedule_private(st->node->pool, dst_thread_setup,
-			dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node);
-}
-
-/*
- * Initialize client's connection to the remote peer: allocate state,
- * connect and perform block IO autoconfiguration.
- */
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, r, sizeof(struct dst_network_ctl));
-
-	err = dst_state_init_connected(st);
-	if (err)
-		goto err_out_free_data;
-
-	err = dst_request_remote_config(st);
-	if (err)
-		goto err_out_exit_connected;
-	n->state = st;
-
-	err = dst_state_schedule_receiver(st);
-	if (err)
-		goto err_out_exit_connected;
-
-	return 0;
-
-err_out_exit_connected:
-	dst_state_exit_connected(st);
-err_out_free_data:
-	dst_state_free(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-void dst_state_put(struct dst_state *st)
-{
-	dprintk("%s: st: %p, refcnt: %d.\n",
-			__func__, st, atomic_read(&st->refcnt));
-	if (atomic_dec_and_test(&st->refcnt))
-		dst_state_free(st);
-}
-
-/*
- * Send block IO to the network one by one using zero-copy ->sendpage().
- */
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_crypto_ctl *c = &st->node->crypto;
-	int err, i = 0;
-	int flags = MSG_WAITALL;
-
-	err = dst_data_send_header(st->socket, cmd,
-		sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt);
-	if (err)
-		goto err_out_exit;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (i < bio->bi_vcnt - 1)
-			flags |= MSG_MORE;
-
-		err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset,
-				bv->bv_len, flags);
-		if (err <= 0)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	dprintk("%s: %d/%d, flags: %x, err: %d.\n",
-			__func__, i, bio->bi_vcnt, flags, err);
-	return err;
-}
-
-/*
- * Send transaction to the remote peer.
- */
-int dst_trans_send(struct dst_trans *t)
-{
-	int err;
-	struct dst_state *st = t->n->state;
-	struct bio *bio = t->bio;
-
-	dst_convert_cmd(&t->cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = dst_state_init_connected(st);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_send_bio(st, &t->cmd, t->bio);
-	} else {
-		err = dst_data_send_header(st->socket, &t->cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	if (err)
-		goto err_out_reset;
-
-	dst_state_unlock(st);
-	return 0;
-
-err_out_reset:
-	dst_state_reset_nolock(st);
-err_out_unlock:
-	dst_state_unlock(st);
-
-	return err;
-}
diff --git a/drivers/staging/dst/thread_pool.c b/drivers/staging/dst/thread_pool.c
deleted file mode 100644
index 29a82b2602f3..000000000000
--- a/drivers/staging/dst/thread_pool.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/dst.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-/*
- * Thread pool abstraction allows to schedule a work to be performed
- * on behalf of kernel thread. One does not operate with threads itself,
- * instead user provides setup and cleanup callbacks for thread pool itself,
- * and action and cleanup callbacks for each submitted work.
- *
- * Each worker has private data initialized at creation time and data,
- * provided by user at scheduling time.
- *
- * When action is being performed, thread can not be used by other users,
- * instead they will sleep until there is free thread to pick their work.
- */
-struct thread_pool_worker {
-	struct list_head	worker_entry;
-
-	struct task_struct	*thread;
-
-	struct thread_pool	*pool;
-
-	int			error;
-	int			has_data;
-	int			need_exit;
-	unsigned int		id;
-
-	wait_queue_head_t	wait;
-
-	void			*private;
-	void			*schedule_data;
-
-	int			(*action)(void *private, void *schedule_data);
-	void			(*cleanup)(void *private);
-};
-
-static void thread_pool_exit_worker(struct thread_pool_worker *w)
-{
-	kthread_stop(w->thread);
-
-	w->cleanup(w->private);
-	kfree(w);
-}
-
-/*
- * Called to mark thread as ready and allow users to schedule new work.
- */
-static void thread_pool_worker_make_ready(struct thread_pool_worker *w)
-{
-	struct thread_pool *p = w->pool;
-
-	mutex_lock(&p->thread_lock);
-
-	if (!w->need_exit) {
-		list_move_tail(&w->worker_entry, &p->ready_list);
-		w->has_data = 0;
-		mutex_unlock(&p->thread_lock);
-
-		wake_up(&p->wait);
-	} else {
-		p->thread_num--;
-		list_del(&w->worker_entry);
-		mutex_unlock(&p->thread_lock);
-
-		thread_pool_exit_worker(w);
-	}
-}
-
-/*
- * Thread action loop: waits until there is new work.
- */
-static int thread_pool_worker_func(void *data)
-{
-	struct thread_pool_worker *w = data;
-
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(w->wait,
-			kthread_should_stop() || w->has_data);
-
-		if (kthread_should_stop())
-			break;
-
-		if (!w->has_data)
-			continue;
-
-		w->action(w->private, w->schedule_data);
-		thread_pool_worker_make_ready(w);
-	}
-
-	return 0;
-}
-
-/*
- * Remove single worker without specifying which one.
- */
-void thread_pool_del_worker(struct thread_pool *p)
-{
-	struct thread_pool_worker *w = NULL;
-
-	while (!w && p->thread_num) {
-		wait_event(p->wait, !list_empty(&p->ready_list) ||
-				!p->thread_num);
-
-		dprintk("%s: locking list_empty: %d, thread_num: %d.\n",
-				__func__, list_empty(&p->ready_list),
-				p->thread_num);
-
-		mutex_lock(&p->thread_lock);
-		if (!list_empty(&p->ready_list)) {
-			w = list_first_entry(&p->ready_list,
-					struct thread_pool_worker,
-					worker_entry);
-
-			dprintk("%s: deleting w: %p, thread_num: %d, "
-					"list: %p [%p.%p].\n", __func__,
-					w, p->thread_num, &p->ready_list,
-					p->ready_list.prev, p->ready_list.next);
-
-			p->thread_num--;
-			list_del(&w->worker_entry);
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	if (w)
-		thread_pool_exit_worker(w);
-	dprintk("%s: deleted w: %p, thread_num: %d.\n",
-			__func__, w, p->thread_num);
-}
-
-/*
- * Remove a worker with given ID.
- */
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id)
-{
-	struct thread_pool_worker *w;
-	int found = 0;
-
-	mutex_lock(&p->thread_lock);
-	list_for_each_entry(w, &p->ready_list, worker_entry) {
-		if (w->id == id) {
-			found = 1;
-			p->thread_num--;
-			list_del(&w->worker_entry);
-			break;
-		}
-	}
-
-	if (!found) {
-		list_for_each_entry(w, &p->active_list, worker_entry) {
-			if (w->id == id) {
-				w->need_exit = 1;
-				break;
-			}
-		}
-	}
-	mutex_unlock(&p->thread_lock);
-
-	if (found)
-		thread_pool_exit_worker(w);
-}
-
-/*
- * Add new worker thread with given parameters.
- * If initialization callback fails, return error.
- */
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w;
-	int err = -ENOMEM;
-
-	w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL);
-	if (!w)
-		goto err_out_exit;
-
-	w->pool = p;
-	init_waitqueue_head(&w->wait);
-	w->cleanup = cleanup;
-	w->id = id;
-
-	w->thread = kthread_run(thread_pool_worker_func, w, "%s", name);
-	if (IS_ERR(w->thread)) {
-		err = PTR_ERR(w->thread);
-		goto err_out_free;
-	}
-
-	w->private = init(private);
-	if (IS_ERR(w->private)) {
-		err = PTR_ERR(w->private);
-		goto err_out_stop_thread;
-	}
-
-	mutex_lock(&p->thread_lock);
-	list_add_tail(&w->worker_entry, &p->ready_list);
-	p->thread_num++;
-	mutex_unlock(&p->thread_lock);
-
-	return 0;
-
-err_out_stop_thread:
-	kthread_stop(w->thread);
-err_out_free:
-	kfree(w);
-err_out_exit:
-	return err;
-}
-
-/*
- * Destroy the whole pool.
- */
-void thread_pool_destroy(struct thread_pool *p)
-{
-	while (p->thread_num) {
-		dprintk("%s: num: %d.\n", __func__, p->thread_num);
-		thread_pool_del_worker(p);
-	}
-
-	kfree(p);
-}
-
-/*
- * Create a pool with given number of threads.
- * They will have sequential IDs started from zero.
- */
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w, *tmp;
-	struct thread_pool *p;
-	int err = -ENOMEM;
-	int i;
-
-	p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL);
-	if (!p)
-		goto err_out_exit;
-
-	init_waitqueue_head(&p->wait);
-	mutex_init(&p->thread_lock);
-	INIT_LIST_HEAD(&p->ready_list);
-	INIT_LIST_HEAD(&p->active_list);
-	p->thread_num = 0;
-
-	for (i = 0; i < num; ++i) {
-		err = thread_pool_add_worker(p, name, i, init,
-				cleanup, private);
-		if (err)
-			goto err_out_free_all;
-	}
-
-	return p;
-
-err_out_free_all:
-	list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-		list_del(&w->worker_entry);
-		thread_pool_exit_worker(w);
-	}
-	kfree(p);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Schedule execution of the action on a given thread,
- * provided ID pointer has to match previously stored
- * private data.
- */
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout, void *id)
-{
-	struct thread_pool_worker *w, *tmp, *worker = NULL;
-	int err = 0;
-
-	while (!worker && !err) {
-		timeout = wait_event_interruptible_timeout(p->wait,
-				!list_empty(&p->ready_list),
-				timeout);
-
-		if (!timeout) {
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		worker = NULL;
-		mutex_lock(&p->thread_lock);
-		list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-			if (id && id != w->private)
-				continue;
-
-			worker = w;
-
-			list_move_tail(&w->worker_entry, &p->active_list);
-
-			err = setup(w->private, data);
-			if (!err) {
-				w->schedule_data = data;
-				w->action = action;
-				w->has_data = 1;
-				wake_up(&w->wait);
-			} else {
-				list_move_tail(&w->worker_entry,
-						&p->ready_list);
-			}
-
-			break;
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	return err;
-}
-
-/*
- * Schedule execution on arbitrary thread from the pool.
- */
-int thread_pool_schedule(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout)
-{
-	return thread_pool_schedule_private(p, setup,
-			action, data, timeout, NULL);
-}
diff --git a/drivers/staging/dst/trans.c b/drivers/staging/dst/trans.c
deleted file mode 100644
index 1c36a6bc31d5..000000000000
--- a/drivers/staging/dst/trans.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/slab.h>
-#include <linux/mempool.h>
-
-/*
- * Transaction memory pool size.
- */
-static int dst_mempool_num = 32;
-module_param(dst_mempool_num, int, 0644);
-
-/*
- * Transaction tree management.
- */
-static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
-{
-	if (gen < new)
-		return 1;
-	if (gen > new)
-		return -1;
-	return 0;
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
-{
-	struct rb_root *root = &node->trans_root;
-	struct rb_node *n = root->rb_node;
-	struct dst_trans *t, *ret = NULL;
-	int cmp;
-
-	while (n) {
-		t = rb_entry(n, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, gen);
-		if (cmp < 0)
-			n = n->rb_left;
-		else if (cmp > 0)
-			n = n->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	dprintk("%s: %s transaction: id: %llu.\n", __func__,
-			(ret) ? "found" : "not found", gen);
-
-	return ret;
-}
-
-static int dst_trans_insert(struct dst_trans *new)
-{
-	struct rb_root *root = &new->n->trans_root;
-	struct rb_node **n = &root->rb_node, *parent = NULL;
-	struct dst_trans *ret = NULL, *t;
-	int cmp;
-
-	while (*n) {
-		parent = *n;
-
-		t = rb_entry(parent, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, new->gen);
-		if (cmp < 0)
-			n = &parent->rb_left;
-		else if (cmp > 0)
-			n = &parent->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	new->send_time = jiffies;
-	if (ret) {
-		printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu, new: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu.\n", __func__,
-			ret->gen, (u64)ret->bio->bi_sector,
-			ret->bio->bi_size, ret->send_time,
-			new->gen, (u64)new->bio->bi_sector,
-			new->bio->bi_size, new->send_time);
-		return -EEXIST;
-	}
-
-	rb_link_node(&new->trans_entry, parent, n);
-	rb_insert_color(&new->trans_entry, root);
-
-	dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
-		__func__, new->gen, (u64)new->bio->bi_sector,
-		new->bio->bi_size, new->send_time);
-
-	return 0;
-}
-
-int dst_trans_remove_nolock(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-
-	if (t->trans_entry.rb_parent_color) {
-		rb_erase(&t->trans_entry, &n->trans_root);
-		t->trans_entry.rb_parent_color = 0;
-	}
-	return 0;
-}
-
-int dst_trans_remove(struct dst_trans *t)
-{
-	int ret;
-	struct dst_node *n = t->n;
-
-	mutex_lock(&n->trans_lock);
-	ret = dst_trans_remove_nolock(t);
-	mutex_unlock(&n->trans_lock);
-
-	return ret;
-}
-
-/*
- * When transaction is completed and there are no more users,
- * we complete appriate block IO request with given error status.
- */
-void dst_trans_put(struct dst_trans *t)
-{
-	if (atomic_dec_and_test(&t->refcnt)) {
-		struct bio *bio = t->bio;
-
-		dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
-				__func__, t, t->gen, bio);
-
-		bio_endio(bio, t->error);
-		bio_put(bio);
-
-		dst_node_put(t->n);
-		mempool_free(t, t->n->trans_pool);
-	}
-}
-
-/*
- * Process given block IO request: allocate transaction, insert it into the tree
- * and send/schedule crypto processing.
- */
-int dst_process_bio(struct dst_node *n, struct bio *bio)
-{
-	struct dst_trans *t;
-	int err = -ENOMEM;
-
-	t = mempool_alloc(n->trans_pool, GFP_NOFS);
-	if (!t)
-		goto err_out_exit;
-
-	t->n = dst_node_get(n);
-	t->bio = bio;
-	t->error = 0;
-	t->retries = 0;
-	atomic_set(&t->refcnt, 1);
-	t->gen = atomic_long_inc_return(&n->gen);
-
-	t->enc = bio_data_dir(bio);
-	dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
-
-	mutex_lock(&n->trans_lock);
-	err = dst_trans_insert(t);
-	mutex_unlock(&n->trans_lock);
-	if (err)
-		goto err_out_free;
-
-	dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
-			__func__, t->gen, (u64)bio->bi_sector,
-			bio->bi_size, t->enc, dst_need_crypto(n));
-
-	if (dst_need_crypto(n) && t->enc)
-		dst_trans_crypto(t);
-	else
-		dst_trans_send(t);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	mempool_free(t, n->trans_pool);
-err_out_exit:
-	bio_endio(bio, err);
-	bio_put(bio);
-	return err;
-}
-
-/*
- * Scan for timeout/stale transactions.
- * Each transaction is being resent multiple times before error completion.
- */
-static void dst_trans_scan(struct work_struct *work)
-{
-	struct dst_node *n = container_of(work, struct dst_node,
-			trans_work.work);
-	struct rb_node *rb_node;
-	struct dst_trans *t;
-	unsigned long timeout = n->trans_scan_timeout;
-	int num = 10 * n->trans_max_retries;
-
-	mutex_lock(&n->trans_lock);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		if (timeout && time_after(t->send_time + timeout, jiffies)
-				&& t->retries == 0)
-			break;
-#if 0
-		dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
-			__func__, t, t->gen, n->name,
-			t->retries, n->trans_max_retries);
-#endif
-		if (--num == 0)
-			break;
-
-		dst_trans_get(t);
-
-		rb_node = rb_next(rb_node);
-
-		if (timeout && (++t->retries < n->trans_max_retries)) {
-			dst_trans_send(t);
-		} else {
-			t->error = -ETIMEDOUT;
-			dst_trans_remove_nolock(t);
-			dst_trans_put(t);
-		}
-
-		dst_trans_put(t);
-	}
-
-	mutex_unlock(&n->trans_lock);
-
-	/*
-	 * If no timeout specified then system is in the middle of exiting
-	 * process, so no need to reschedule scanning process again.
-	 */
-	if (timeout) {
-		if (!num)
-			timeout = HZ;
-		schedule_delayed_work(&n->trans_work, timeout);
-	}
-}
-
-/*
- * Flush all transactions and mark them as timed out.
- * Destroy transaction pools.
- */
-void dst_node_trans_exit(struct dst_node *n)
-{
-	struct dst_trans *t;
-	struct rb_node *rb_node;
-
-	if (!n->trans_cache)
-		return;
-
-	dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
-	cancel_delayed_work_sync(&n->trans_work);
-	flush_scheduled_work();
-	dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		dprintk("%s: t: %p, gen: %llu, n: %s.\n",
-			__func__, t, t->gen, n->name);
-
-		rb_node = rb_next(rb_node);
-
-		t->error = -ETIMEDOUT;
-		dst_trans_remove_nolock(t);
-		dst_trans_put(t);
-	}
-
-	mempool_destroy(n->trans_pool);
-	kmem_cache_destroy(n->trans_cache);
-}
-
-/*
- * Initialize transaction storage for given node.
- * Transaction stores not only control information,
- * but also network command and crypto data (if needed)
- * to reduce number of allocations. Thus transaction size
- * differs from node to node.
- */
-int dst_node_trans_init(struct dst_node *n, unsigned int size)
-{
-	/*
-	 * We need this, since node with given name can be dropped from the
-	 * hash table, but be still alive, so subsequent creation of the node
-	 * with the same name may collide with existing cache name.
-	 */
-
-	snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
-
-	n->trans_cache = kmem_cache_create(n->cache_name,
-			size + n->crypto.crypto_attached_size,
-			0, 0, NULL);
-	if (!n->trans_cache)
-		goto err_out_exit;
-
-	n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
-			n->trans_cache);
-	if (!n->trans_pool)
-		goto err_out_cache_destroy;
-
-	mutex_init(&n->trans_lock);
-	n->trans_root = RB_ROOT;
-
-	INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
-	schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
-
-	dprintk("%s: n: %p, size: %u, crypto: %u.\n",
-		__func__, n, size, n->crypto.crypto_attached_size);
-
-	return 0;
-
-err_out_cache_destroy:
-	kmem_cache_destroy(n->trans_cache);
-err_out_exit:
-	return -ENOMEM;
-}
diff --git a/include/linux/dst.h b/include/linux/dst.h
deleted file mode 100644
index e26fed84b1aa..000000000000
--- a/include/linux/dst.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __DST_H
-#define __DST_H
-
-#include <linux/types.h>
-#include <linux/connector.h>
-
-#define DST_NAMELEN		32
-#define DST_NAME		"dst"
-
-enum {
-	/* Remove node with given id from storage */
-	DST_DEL_NODE	= 0,
-	/* Add remote node with given id to the storage */
-	DST_ADD_REMOTE,
-	/* Add local node with given id to the storage to be exported and used by remote peers */
-	DST_ADD_EXPORT,
-	/* Crypto initialization command (hash/cipher used to protect the connection) */
-	DST_CRYPTO,
-	/* Security attributes for given connection (permissions for example) */
-	DST_SECURITY,
-	/* Register given node in the block layer subsystem */
-	DST_START,
-	DST_CMD_MAX
-};
-
-struct dst_ctl
-{
-	/* Storage name */
-	char			name[DST_NAMELEN];
-	/* Command flags */
-	__u32			flags;
-	/* Command itself (see above) */
-	__u32			cmd;
-	/* Maximum number of pages per single request in this device */
-	__u32			max_pages;
-	/* Stale/error transaction scanning timeout in milliseconds */
-	__u32			trans_scan_timeout;
-	/* Maximum number of retry sends before completing transaction as broken */
-	__u32			trans_max_retries;
-	/* Storage size */
-	__u64			size;
-};
-
-/* Reply command carries completion status */
-struct dst_ctl_ack
-{
-	struct cn_msg		msg;
-	int			error;
-	int			unused[3];
-};
-
-/*
- * Unfortunaltely socket address structure is not exported to userspace
- * and is redefined there.
- */
-#define SADDR_MAX_DATA	128
-
-struct saddr {
-	/* address family, AF_xxx	*/
-	unsigned short		sa_family;
-	/* 14 bytes of protocol address	*/
-	char			sa_data[SADDR_MAX_DATA];
-	/* Number of bytes used in sa_data */
-	unsigned short		sa_data_len;
-};
-
-/* Address structure */
-struct dst_network_ctl
-{
-	/* Socket type: datagram, stream...*/
-	unsigned int		type;
-	/* Let me guess, is it a Jupiter diameter? */
-	unsigned int		proto;
-	/* Peer's address */
-	struct saddr		addr;
-};
-
-struct dst_crypto_ctl
-{
-	/* Cipher and hash names */
-	char			cipher_algo[DST_NAMELEN];
-	char			hash_algo[DST_NAMELEN];
-
-	/* Key sizes. Can be zero for digest for example */
-	unsigned int		cipher_keysize, hash_keysize;
-	/* Alignment. Calculated by the DST itself. */
-	unsigned int		crypto_attached_size;
-	/* Number of threads to perform crypto operations */
-	int			thread_num;
-};
-
-/* Export security attributes have this bits checked in when client connects */
-#define DST_PERM_READ		(1<<0)
-#define DST_PERM_WRITE		(1<<1)
-
-/*
- * Right now it is simple model, where each remote address
- * is assigned to set of permissions it is allowed to perform.
- * In real world block device does not know anything but
- * reading and writing, so it should be more than enough.
- */
-struct dst_secure_user
-{
-	unsigned int		permissions;
-	struct saddr		addr;
-};
-
-/*
- * Export control command: device to export and network address to accept
- * clients to work with given device
- */
-struct dst_export_ctl
-{
-	char			device[DST_NAMELEN];
-	struct dst_network_ctl	ctl;
-};
-
-enum {
-	DST_CFG	= 1, 		/* Request remote configuration */
-	DST_IO,			/* IO command */
-	DST_IO_RESPONSE,	/* IO response */
-	DST_PING,		/* Keepalive message */
-	DST_NCMD_MAX,
-};
-
-struct dst_cmd
-{
-	/* Network command itself, see above */
-	__u32			cmd;
-	/*
-	 * Size of the attached data
-	 * (in most cases, for READ command it means how many bytes were requested)
-	 */
-	__u32			size;
-	/* Crypto size: number of attached bytes with digest/hmac */
-	__u32			csize;
-	/* Here we can carry secret data */
-	__u32			reserved;
-	/* Read/write bits, see how they are encoded in bio structure */
-	__u64			rw;
-	/* BIO flags */
-	__u64			flags;
-	/* Unique command id (like transaction ID) */
-	__u64			id;
-	/* Sector to start IO from */
-	__u64			sector;
-	/* Hash data is placed after this header */
-	__u8			hash[0];
-};
-
-/*
- * Convert command to/from network byte order.
- * We do not use hton*() functions, since there is
- * no 64-bit implementation.
- */
-static inline void dst_convert_cmd(struct dst_cmd *c)
-{
-	c->cmd = __cpu_to_be32(c->cmd);
-	c->csize = __cpu_to_be32(c->csize);
-	c->size = __cpu_to_be32(c->size);
-	c->sector = __cpu_to_be64(c->sector);
-	c->id = __cpu_to_be64(c->id);
-	c->flags = __cpu_to_be64(c->flags);
-	c->rw = __cpu_to_be64(c->rw);
-}
-
-/* Transaction id */
-typedef __u64 dst_gen_t;
-
-#ifdef __KERNEL__
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/device.h>
-#include <linux/mempool.h>
-#include <linux/net.h>
-#include <linux/poll.h>
-#include <linux/rbtree.h>
-
-#ifdef CONFIG_DST_DEBUG
-#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
-#else
-static inline void __attribute__ ((format (printf, 1, 2)))
-	dprintk(const char *fmt, ...) {}
-#endif
-
-struct dst_node;
-
-struct dst_trans
-{
-	/* DST node we are working with */
-	struct dst_node		*n;
-
-	/* Entry inside transaction tree */
-	struct rb_node		trans_entry;
-
-	/* Merlin kills this transaction when this memory cell equals zero */
-	atomic_t		refcnt;
-
-	/* How this transaction should be processed by crypto engine */
-	short			enc;
-	/* How many times this transaction was resent */
-	short			retries;
-	/* Completion status */
-	int			error;
-
-	/* When did we send it to the remote peer */
-	long			send_time;
-
-	/* My name is...
-	 * Well, computers does not speak, they have unique id instead */
-	dst_gen_t		gen;
-
-	/* Block IO we are working with */
-	struct bio		*bio;
-
-	/* Network command for above block IO request */
-	struct dst_cmd		cmd;
-};
-
-struct dst_crypto_engine
-{
-	/* What should we do with all block requests */
-	struct crypto_hash	*hash;
-	struct crypto_ablkcipher	*cipher;
-
-	/* Pool of pages used to encrypt data into before sending */
-	int			page_num;
-	struct page		**pages;
-
-	/* What to do with current request */
-	int			enc;
-	/* Who we are and where do we go */
-	struct scatterlist	*src, *dst;
-
-	/* Maximum timeout waiting for encryption to be completed */
-	long			timeout;
-	/* IV is a 64-bit sequential counter */
-	u64			iv;
-
-	/* Secret data */
-	void			*private;
-
-	/* Cached temporary data lives here */
-	int			size;
-	void			*data;
-};
-
-struct dst_state
-{
-	/* The main state protection */
-	struct mutex		state_lock;
-
-	/* Polling machinery for sockets */
-	wait_queue_t 		wait;
-	wait_queue_head_t 	*whead;
-	/* Most of events are being waited here */
-	wait_queue_head_t 	thread_wait;
-
-	/* Who owns this? */
-	struct dst_node		*node;
-
-	/* Network address for this state */
-	struct dst_network_ctl	ctl;
-
-	/* Permissions to work with: read-only or rw connection */
-	u32			permissions;
-
-	/* Called when we need to clean private data */
-	void			(* cleanup)(struct dst_state *st);
-
-	/* Used by the server: BIO completion queues BIOs here */
-	struct list_head	request_list;
-	spinlock_t		request_lock;
-
-	/* Guess what? No, it is not number of planets */
-	atomic_t		refcnt;
-
-	/* This flags is set when connection should be dropped */
-	int			need_exit;
-
-	/*
-	 * Socket to work with. Second pointer is used for
-	 * lockless check if socket was changed before performing
-	 * next action (like working with cached polling result)
-	 */
-	struct socket		*socket, *read_socket;
-
-	/* Cached preallocated data */
-	void			*data;
-	unsigned int		size;
-
-	/* Currently processed command */
-	struct dst_cmd		cmd;
-};
-
-struct dst_info
-{
-	/* Device size */
-	u64			size;
-
-	/* Local device name for export devices */
-	char			local[DST_NAMELEN];
-
-	/* Network setup */
-	struct dst_network_ctl	net;
-
-	/* Sysfs bits use this */
-	struct device		device;
-};
-
-struct dst_node
-{
-	struct list_head	node_entry;
-
-	/* Hi, my name is stored here */
-	char			name[DST_NAMELEN];
-	/* My cache name is stored here */
-	char			cache_name[DST_NAMELEN];
-
-	/* Block device attached to given node.
-	 * Only valid for exporting nodes */
-	struct block_device 	*bdev;
-	/* Network state machine for given peer */
-	struct dst_state	*state;
-
-	/* Block IO machinery */
-	struct request_queue	*queue;
-	struct gendisk		*disk;
-
-	/* Number of threads in processing pool */
-	int			thread_num;
-	/* Maximum number of pages in single IO */
-	int			max_pages;
-
-	/* I'm that big in bytes */
-	loff_t			size;
-
-	/* Exported to userspace node information */
-	struct dst_info		*info;
-
-	/*
-	 * Security attribute list.
-	 * Used only by exporting node currently.
-	 */
-	struct list_head	security_list;
-	struct mutex		security_lock;
-
-	/*
-	 * When this unerflows below zero, university collapses.
-	 * But this will not happen, since node will be freed,
-	 * when reference counter reaches zero.
-	 */
-	atomic_t		refcnt;
-
-	/* How precisely should I be started? */
-	int 			(*start)(struct dst_node *);
-
-	/* Crypto capabilities */
-	struct dst_crypto_ctl	crypto;
-	u8			*hash_key;
-	u8			*cipher_key;
-
-	/* Pool of processing thread */
-	struct thread_pool	*pool;
-
-	/* Transaction IDs live here */
-	atomic_long_t		gen;
-
-	/*
-	 * How frequently and how many times transaction
-	 * tree should be scanned to drop stale objects.
-	 */
-	long			trans_scan_timeout;
-	int			trans_max_retries;
-
-	/* Small gnomes live here */
-	struct rb_root		trans_root;
-	struct mutex		trans_lock;
-
-	/*
-	 * Transaction cache/memory pool.
-	 * It is big enough to contain not only transaction
-	 * itself, but additional crypto data (digest/hmac).
-	 */
-	struct kmem_cache	*trans_cache;
-	mempool_t		*trans_pool;
-
-	/* This entity scans transaction tree */
-	struct delayed_work 	trans_work;
-
-	wait_queue_head_t	wait;
-};
-
-/* Kernel representation of the security attribute */
-struct dst_secure
-{
-	struct list_head	sec_entry;
-	struct dst_secure_user	sec;
-};
-
-int dst_process_bio(struct dst_node *n, struct bio *bio);
-
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
-
-static inline struct dst_state *dst_state_get(struct dst_state *st)
-{
-	BUG_ON(atomic_read(&st->refcnt) == 0);
-	atomic_inc(&st->refcnt);
-	return st;
-}
-
-void dst_state_put(struct dst_state *st);
-
-struct dst_state *dst_state_alloc(struct dst_node *n);
-int dst_state_socket_create(struct dst_state *st);
-void dst_state_socket_release(struct dst_state *st);
-
-void dst_state_exit_connected(struct dst_state *st);
-
-int dst_state_schedule_receiver(struct dst_state *st);
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
-
-static inline void dst_state_lock(struct dst_state *st)
-{
-	mutex_lock(&st->state_lock);
-}
-
-static inline void dst_state_unlock(struct dst_state *st)
-{
-	mutex_unlock(&st->state_lock);
-}
-
-void dst_poll_exit(struct dst_state *st);
-int dst_poll_init(struct dst_state *st);
-
-static inline unsigned int dst_state_poll(struct dst_state *st)
-{
-	unsigned int revents = POLLHUP | POLLERR;
-
-	dst_state_lock(st);
-	if (st->socket)
-		revents = st->socket->ops->poll(NULL, st->socket, NULL);
-	dst_state_unlock(st);
-
-	return revents;
-}
-
-static inline int dst_thread_setup(void *private, void *data)
-{
-	return 0;
-}
-
-void dst_node_put(struct dst_node *n);
-
-static inline struct dst_node *dst_node_get(struct dst_node *n)
-{
-	atomic_inc(&n->refcnt);
-	return n;
-}
-
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
-int dst_recv_cdata(struct dst_state *st, void *cdata);
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more);
-
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
-
-int dst_process_io(struct dst_state *st);
-int dst_export_crypto(struct dst_node *n, struct bio *bio);
-int dst_export_send_bio(struct bio *bio);
-int dst_start_export(struct dst_node *n);
-
-int __init dst_export_init(void);
-void dst_export_exit(void);
-
-/* Private structure for export block IO requests */
-struct dst_export_priv
-{
-	struct list_head		request_entry;
-	struct dst_state		*state;
-	struct bio			*bio;
-	struct dst_cmd			cmd;
-};
-
-static inline void dst_trans_get(struct dst_trans *t)
-{
-	atomic_inc(&t->refcnt);
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
-int dst_trans_remove(struct dst_trans *t);
-int dst_trans_remove_nolock(struct dst_trans *t);
-void dst_trans_put(struct dst_trans *t);
-
-/*
- * Convert bio into network command.
- */
-static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
-		u32 command, u64 id)
-{
-	cmd->cmd = command;
-	cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
-	cmd->rw = bio->bi_rw;
-	cmd->size = bio->bi_size;
-	cmd->csize = 0;
-	cmd->id = id;
-	cmd->sector = bio->bi_sector;
-};
-
-int dst_trans_send(struct dst_trans *t);
-int dst_trans_crypto(struct dst_trans *t);
-
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
-void dst_node_crypto_exit(struct dst_node *n);
-
-static inline int dst_need_crypto(struct dst_node *n)
-{
-	struct dst_crypto_ctl *c = &n->crypto;
-	/*
-	 * Logical OR is appropriate here, but boolean one produces
-	 * more optimal code, so it is used instead.
-	 */
-	return (c->hash_algo[0] | c->cipher_algo[0]);
-}
-
-int dst_node_trans_init(struct dst_node *n, unsigned int size);
-void dst_node_trans_exit(struct dst_node *n);
-
-/*
- * Pool of threads.
- * Ready list contains threads currently free to be used,
- * active one contains threads with some work scheduled for them.
- * Caller can wait in given queue when thread is ready.
- */
-struct thread_pool
-{
-	int			thread_num;
-	struct mutex		thread_lock;
-	struct list_head	ready_list, active_list;
-
-	wait_queue_head_t	wait;
-};
-
-void thread_pool_del_worker(struct thread_pool *p);
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-void thread_pool_destroy(struct thread_pool *p);
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-int thread_pool_schedule(struct thread_pool *p,
-		int (* setup)(void *stored_private, void *setup_data),
-		int (* action)(void *stored_private, void *setup_data),
-		void *setup_data, long timeout);
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (* setup)(void *private, void *data),
-		int (* action)(void *private, void *data),
-		void *data, long timeout, void *id);
-
-#endif /* __KERNEL__ */
-#endif /* __DST_H */
-- 
cgit v1.2.3


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 include/linux/sysctl.h     | 1 +
 net/ipv4/devinet.c         | 1 +
 net/ipv4/fib_frontend.c    | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..b2304929434e 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
+#define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..bd27fbc9db62 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_SRC_VMARK=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 29 Dec 2009 08:35:35 +0100
Subject: block: Fix incorrect alignment offset reporting and update
 documentation

queue_sector_alignment_offset returned the wrong value which caused
partitions to report an incorrect alignment_offset.  Since offset
alignment calculation is needed several places it has been split into a
separate helper function.  The topology stacking function has been
updated accordingly.

Furthermore, comments have been added to clarify how the stacking
function works.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h | 11 +++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e14fcbcedbfa..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
 
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
- * @t:	the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t:	the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	sector_t alignment;
-	unsigned int top, bottom, granularity;
+	unsigned int top, bottom;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
-	granularity = max(b->physical_block_size, b->io_min);
-	alignment = b->alignment_offset - (offset & (granularity - 1));
+	alignment = queue_limit_alignment_offset(b, offset);
 
+	/* Bottom device has different alignment.  Check that it is
+	 * compatible with the current top alignment.
+	 */
 	if (t->alignment_offset != alignment) {
 
 		top = max(t->physical_block_size, t->io_min)
 			+ t->alignment_offset;
-		bottom = granularity + alignment;
+		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
+		/* Verify that top and bottom intervals line up */
 		if (max(top, bottom) & (min(top, bottom) - 1))
 			t->misaligned = 1;
 	}
@@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->no_cluster |= b->no_cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
+	/* Physical block size a multiple of the logical block size? */
 	if (t->physical_block_size & (t->logical_block_size - 1)) {
 		t->physical_block_size = t->logical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Minimum I/O a multiple of the physical block size? */
 	if (t->io_min & (t->physical_block_size - 1)) {
 		t->io_min = t->physical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Optimal I/O a multiple of the physical block size? */
 	if (t->io_opt & (t->physical_block_size - 1)) {
 		t->io_opt = 0;
 		t->misaligned = 1;
 	}
 
+	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
 
+	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1))
 		t->misaligned = 1;
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
+		unsigned int granularity = b->discard_granularity;
+		offset &= granularity - 1;
 
-		alignment = b->discard_alignment -
-			(offset & (b->discard_granularity - 1));
+		alignment = (granularity + b->discard_alignment - offset)
+			& (granularity - 1);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				t->discard_misaligned = 1;
 		}
 
+		t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
 		t->discard_alignment = lcm(t->discard_alignment, alignment) &
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d0..59b832be3044 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+	offset &= granularity - 1;
+	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
 static inline int queue_sector_alignment_offset(struct request_queue *q,
 						sector_t sector)
 {
-	return ((sector << 9) - q->limits.alignment_offset)
-		& (q->limits.io_min - 1);
+	return queue_limit_alignment_offset(&q->limits, sector << 9);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <cladisch@fastmail.net>
Date: Thu, 24 Dec 2009 12:05:58 +0100
Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP
 listeners

Control of more than one AV/C device at once --- e.g. camcorders, tape
decks, audio devices, TV tuners --- failed or worked only unreliably,
depending on driver implementation.  This affected kernelspace and
userspace drivers alike and was caused by firewire-core's inability to
accept multiple registrations of FCP listeners.

The fix allows multiple address handlers to be registered for the FCP
command and response registers.  When a request for these registers is
received, all handlers are invoked, and the Firewire response is
generated by the core and not by any handler.

The cdev API does not change, i.e., userspace is still expected to send
a response for FCP requests; this response is silently ignored.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, rebased, whitespace)
---
 drivers/firewire/core-cdev.c            |  26 ++++---
 drivers/firewire/core-transaction.c     | 118 ++++++++++++++++++++++++++------
 drivers/media/dvb/firewire/firedtv-fw.c |  12 +---
 include/linux/firewire-cdev.h           |   3 +
 include/linux/firewire.h                |   4 +-
 5 files changed, 119 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 231e6ee5ba43..2cb22d160f6e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -601,8 +601,9 @@ static void release_request(struct client *client,
 	struct inbound_transaction_resource *r = container_of(resource,
 			struct inbound_transaction_resource, resource);
 
-	fw_send_response(client->device->card, r->request,
-			 RCODE_CONFLICT_ERROR);
+	if (r->request)
+		fw_send_response(client->device->card, r->request,
+				 RCODE_CONFLICT_ERROR);
 	kfree(r);
 }
 
@@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
  failed:
 	kfree(r);
 	kfree(e);
-	fw_send_response(card, request, RCODE_CONFLICT_ERROR);
+	if (request)
+		fw_send_response(card, request, RCODE_CONFLICT_ERROR);
 }
 
 static void release_address_handler(struct client *client,
@@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
 
 	r = container_of(resource, struct inbound_transaction_resource,
 			 resource);
-	if (request->length < r->length)
-		r->length = request->length;
-
-	if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
-		ret = -EFAULT;
-		goto out;
+	if (r->request) {
+		if (request->length < r->length)
+			r->length = request->length;
+		if (copy_from_user(r->data, u64_to_uptr(request->data),
+				   r->length)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		fw_send_response(client->device->card, r->request,
+				 request->rcode);
 	}
-
-	fw_send_response(client->device->card, r->request, request->rcode);
  out:
 	kfree(r);
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 842739df23e2..495849eb13cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler(
 	return NULL;
 }
 
+static bool is_enclosing_handler(struct fw_address_handler *handler,
+				 unsigned long long offset, size_t length)
+{
+	return handler->offset <= offset &&
+		offset + length <= handler->offset + handler->length;
+}
+
 static struct fw_address_handler *lookup_enclosing_address_handler(
 	struct list_head *list, unsigned long long offset, size_t length)
 {
 	struct fw_address_handler *handler;
 
 	list_for_each_entry(handler, list, link) {
-		if (handler->offset <= offset &&
-		    offset + length <= handler->offset + handler->length)
+		if (is_enclosing_handler(handler, offset, length))
 			return handler;
 	}
 
@@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
 #endif  /*  0  */
 
+static bool is_in_fcp_region(u64 offset, size_t length)
+{
+	return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+		offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END);
+}
+
 /**
  * fw_core_add_address_handler - register for incoming requests
  * @handler: callback
@@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region =
  * give the details of the particular request.
  *
  * Return value:  0 on success, non-zero otherwise.
+ *
  * The start offset of the handler's address region is determined by
  * fw_core_add_address_handler() and is returned in handler->offset.
+ *
+ * Address allocations are exclusive, except for the FCP registers.
  */
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region)
@@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler,
 
 	handler->offset = region->start;
 	while (handler->offset + handler->length <= region->end) {
-		other =
-		    lookup_overlapping_address_handler(&address_handler_list,
-						       handler->offset,
-						       handler->length);
+		if (is_in_fcp_region(handler->offset, handler->length))
+			other = NULL;
+		else
+			other = lookup_overlapping_address_handler
+					(&address_handler_list,
+					 handler->offset, handler->length);
 		if (other != NULL) {
 			handler->offset += other->length;
 		} else {
@@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode)
 {
+	if (WARN_ONCE(!request, "invalid for FCP address handlers"))
+		return;
+
 	/* unified transaction or broadcast transaction: don't respond */
 	if (request->ack != ACK_PENDING ||
 	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
@@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_send_response);
 
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+static void handle_exclusive_region_request(struct fw_card *card,
+					    struct fw_packet *p,
+					    struct fw_request *request,
+					    unsigned long long offset)
 {
 	struct fw_address_handler *handler;
-	struct fw_request *request;
-	unsigned long long offset;
 	unsigned long flags;
 	int tcode, destination, source;
 
-	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
-		return;
-
-	request = allocate_request(p);
-	if (request == NULL) {
-		/* FIXME: send statically allocated busy packet. */
-		return;
-	}
-
-	offset      =
-		((unsigned long long)
-		 HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2];
 	tcode       = HEADER_GET_TCODE(p->header[0]);
 	destination = HEADER_GET_DESTINATION(p->header[0]);
 	source      = HEADER_GET_SOURCE(p->header[1]);
@@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 					  request->data, request->length,
 					  handler->callback_data);
 }
+
+static void handle_fcp_region_request(struct fw_card *card,
+				      struct fw_packet *p,
+				      struct fw_request *request,
+				      unsigned long long offset)
+{
+	struct fw_address_handler *handler;
+	unsigned long flags;
+	int tcode, destination, source;
+
+	if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+	     offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) ||
+	    request->length > 0x200) {
+		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+
+		return;
+	}
+
+	tcode       = HEADER_GET_TCODE(p->header[0]);
+	destination = HEADER_GET_DESTINATION(p->header[0]);
+	source      = HEADER_GET_SOURCE(p->header[1]);
+
+	if (tcode != TCODE_WRITE_QUADLET_REQUEST &&
+	    tcode != TCODE_WRITE_BLOCK_REQUEST) {
+		fw_send_response(card, request, RCODE_TYPE_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&address_handler_lock, flags);
+	list_for_each_entry(handler, &address_handler_list, link) {
+		if (is_enclosing_handler(handler, offset, request->length))
+			handler->address_callback(card, NULL, tcode,
+						  destination, source,
+						  p->generation, p->speed,
+						  offset, request->data,
+						  request->length,
+						  handler->callback_data);
+	}
+	spin_unlock_irqrestore(&address_handler_lock, flags);
+
+	fw_send_response(card, request, RCODE_COMPLETE);
+}
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+{
+	struct fw_request *request;
+	unsigned long long offset;
+
+	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
+		return;
+
+	request = allocate_request(p);
+	if (request == NULL) {
+		/* FIXME: send statically allocated busy packet. */
+		return;
+	}
+
+	offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) |
+		p->header[2];
+
+	if (!is_in_fcp_region(offset, request->length))
+		handle_exclusive_region_request(card, p, request, offset);
+	else
+		handle_fcp_region_request(card, p, request, offset);
+
+}
 EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index fe44789ab037..6223bf01efe9 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	unsigned long flags;
 	int su;
 
-	if ((tcode != TCODE_WRITE_QUADLET_REQUEST &&
-	     tcode != TCODE_WRITE_BLOCK_REQUEST) ||
-	    offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE ||
-	    length == 0 ||
-	    (((u8 *)payload)[0] & 0xf0) != 0) {
-		fw_send_response(card, request, RCODE_TYPE_ERROR);
+	if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0)
 		return;
-	}
 
 	su = ((u8 *)payload)[1] & 0x7;
 
@@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&node_list_lock, flags);
 
-	if (fdtv) {
+	if (fdtv)
 		avc_recv(fdtv, payload, length);
-		fw_send_response(card, request, RCODE_COMPLETE);
-	}
 }
 
 static struct fw_address_handler fcp_handler = {
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index c6b3ca3af6df..1f716d9f714b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -340,6 +340,9 @@ struct fw_cdev_send_response {
  * The @closure field is passed back to userspace in the response event.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
+ *
+ * The address range is allocated on all local nodes.  The address allocation
+ * is exclusive except for the FCP command and response registers.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9416a461b696..a0e67150a729 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
 /*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
+ * Important note:  Except for the FCP registers, the callback must guarantee
+ * that either fw_send_response() or kfree() is called on the @request.
  */
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
-- 
cgit v1.2.3


From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 08:41:07 +0100
Subject: block: blk_rq_err_sectors cleanup

blk_rq_err_sectors() seems useless, get rid of it.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b832be3044..9b98173a8184 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
-	return blk_rq_err_bytes(rq) >> 9;
-}
-
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3


From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:26 +0800
Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0

The struct syscall_metadata variable name in SYSCALL_DEFINE0
should be __syscall_meta__##sname instead of __syscall_meta_##sname
to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6.

This error causes event_enter_##sname->data to point to the wrong
location, which causes syscalls which are defined by SYSCALL_DEFINE0()
not to be traced.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..207466a49f3d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
-- 
cgit v1.2.3


From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sat, 26 Dec 2009 17:58:11 +0100
Subject: kfifo: Fix typo in comment

It's DECLARE_KFIFO, not DECLARED_KFIFO.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 3d44e9c65a8e..7c6b32a1421c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -81,7 +81,7 @@ union { \
 }
 
 /**
- * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO
  * @name: name of the declared kfifo datatype
  */
 #define INIT_KFIFO(name) \
-- 
cgit v1.2.3


From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 30 Dec 2009 15:36:42 +0800
Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable

Introduce kernel parameter acpi_sleep=sci_force_enable

some laptop requires SCI_EN being set directly on resume,
or else they hung somewhere in the resume code path.

We already have a blacklist for these laptops but we still need
this option, especially when debugging some suspend/resume problems,
in case there are systems that need this workaround and are not yet
in the blacklist.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt |  5 ++++-
 arch/x86/kernel/acpi/sleep.c        |  2 ++
 drivers/acpi/sleep.c                | 29 +++++++++++++++++------------
 include/linux/acpi.h                |  1 +
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ba4d9dff113..736d45602886 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs }
+				  old_ordering, s4_nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			of _PTS is used by default).
 			s4_nonvs prevents the kernel from saving/restoring the
 			ACPI NVS memory during hibernation.
+			sci_force_enable causes the kernel to set SCI_EN directly
+			on resume from S1/S3 (which is against the ACPI spec,
+			but some broken systems don't work without it).
 
 	acpi_use_timer_override [HW,ACPI]
 			Use timer override. For some broken Nvidia NF5 boards
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "sci_force_enable", 16) == 0)
+			acpi_set_sci_en_on_resume();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5f2c379ab7bf..79d33d908b5a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state)
 
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+/*
+ * According to the ACPI specification the BIOS should make sure that ACPI is
+ * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
+ * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
+ * on such systems during resume.  Unfortunately that doesn't help in
+ * particularly pathological cases in which SCI_EN has to be set directly on
+ * resume, although the specification states very clearly that this flag is
+ * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
+ * cases.
+ */
+static bool set_sci_en_on_resume;
+
+void __init acpi_set_sci_en_on_resume(void)
+{
+	set_sci_en_on_resume = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -170,18 +187,6 @@ static void acpi_pm_end(void)
 #endif /* CONFIG_ACPI_SLEEP */
 
 #ifdef CONFIG_SUSPEND
-/*
- * According to the ACPI specification the BIOS should make sure that ACPI is
- * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
- * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
- * on such systems during resume.  Unfortunately that doesn't help in
- * particularly pathological cases in which SCI_EN has to be set directly on
- * resume, although the specification states very clearly that this flag is
- * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
- * cases.
- */
-static bool set_sci_en_on_resume;
-
 extern void do_suspend_lowlevel(void);
 
 static u32 acpi_suspend_states[] = {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ce945d4845fc..36924255c0d5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
+void __init acpi_set_sci_en_on_resume(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3


From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 30 Dec 2009 08:23:30 +0000
Subject: phylib: Properly reinitialize PHYs after hibernation

Since hibernation assumes power loss, we should fully reinitialize
PHYs (including platform fixups), as if PHYs were just attached.

This patch factors phy_init_hw() out of phy_attach_direct(), then
converts mdio_bus to dev_pm_ops and adds an appropriate restore()
callback.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 50 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/phy_device.c | 30 +++++++++++++-------------
 include/linux/phy.h          |  1 +
 3 files changed, 59 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 49252d390903..e17b70291bbc 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 		(phydev->phy_id & phydrv->phy_id_mask));
 }
 
+#ifdef CONFIG_PM
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->dev.driver;
@@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	return true;
 }
 
-/* Suspend and resume.  Copied from platform_suspend and
- * platform_resume
- */
-static int mdio_bus_suspend(struct device * dev, pm_message_t state)
+static int mdio_bus_suspend(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 	return phydrv->suspend(phydev);
 }
 
-static int mdio_bus_resume(struct device * dev)
+static int mdio_bus_resume(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -338,11 +337,48 @@ no_resume:
 	return 0;
 }
 
+static int mdio_bus_restore(struct device *dev)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+	struct net_device *netdev = phydev->attached_dev;
+	int ret;
+
+	if (!netdev)
+		return 0;
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* The PHY needs to renegotiate. */
+	phydev->link = 0;
+	phydev->state = PHY_UP;
+
+	phy_start_machine(phydev, NULL);
+
+	return 0;
+}
+
+static struct dev_pm_ops mdio_bus_pm_ops = {
+	.suspend = mdio_bus_suspend,
+	.resume = mdio_bus_resume,
+	.freeze = mdio_bus_suspend,
+	.thaw = mdio_bus_resume,
+	.restore = mdio_bus_restore,
+};
+
+#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
+
+#else
+
+#define MDIO_BUS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
-	.suspend	= mdio_bus_suspend,
-	.resume		= mdio_bus_resume,
+	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b10fedd82143..8212b2b93422 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_disconnect);
 
+int phy_init_hw(struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->drv || !phydev->drv->config_init)
+		return 0;
+
+	ret = phy_scan_fixups(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phydev->drv->config_init(phydev);
+}
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */
-	if (phydev->drv->config_init) {
-		int err;
-
-		err = phy_scan_fixups(phydev);
-
-		if (err < 0)
-			return err;
-
-		err = phydev->drv->config_init(phydev);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return phy_init_hw(phydev);
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b1368b8f6572..7968defd2fa7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_init_hw(struct phy_device *phydev);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3


From 0f4bd46ec252887f44f1f065b41867cac8f70dfb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 22 Dec 2009 03:15:43 +0000
Subject: kmsg_dump: Dump on crash_kexec as well

crash_kexec gets called before kmsg_dump(KMSG_DUMP_OOPS) if
panic_on_oops is set, so the kernel log buffer is not stored
for this case.

This patch adds a KMSG_DUMP_KEXEC dump type which gets called
when crash_kexec() is invoked. To avoid getting double dumps,
the old KMSG_DUMP_PANIC is moved below crash_kexec(). The
mtdoops driver is modified to handle KMSG_DUMP_KEXEC in the
same way as a panic.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c     | 2 +-
 include/linux/kmsg_dump.h | 1 +
 kernel/kexec.c            | 4 ++++
 kernel/panic.c            | 3 ++-
 kernel/printk.c           | 1 +
 5 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index a714ec482761..92e12df0917f 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -322,7 +322,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
 	/* Panics must be written immediately */
-	if (reason == KMSG_DUMP_PANIC) {
+	if (reason != KMSG_DUMP_OOPS) {
 		if (!cxt->mtd->panic_write)
 			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
 		else
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index e32aa268efac..24b44145a886 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -17,6 +17,7 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
+	KMSG_DUMP_KEXEC,
 };
 
 /**
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 433e9fcc1fc5..ae217488fef8 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
 #include <linux/console.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
+#include <linux/kmsg_dump.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1074,6 +1075,9 @@ void crash_kexec(struct pt_regs *regs)
 	if (mutex_trylock(&kexec_mutex)) {
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
+
+			kmsg_dump(KMSG_DUMP_KEXEC);
+
 			crash_setup_regs(&fixed_regs, regs);
 			crash_save_vmcoreinfo();
 			machine_crash_shutdown(&fixed_regs);
diff --git a/kernel/panic.c b/kernel/panic.c
index 5827f7b97254..c787333282b8 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,7 +75,6 @@ NORET_TYPE void panic(const char * fmt, ...)
 	dump_stack();
 #endif
 
-	kmsg_dump(KMSG_DUMP_PANIC);
 	/*
 	 * If we have crashed and we have a crash kernel loaded let it handle
 	 * everything else.
@@ -83,6 +82,8 @@ NORET_TYPE void panic(const char * fmt, ...)
 	 */
 	crash_kexec(NULL);
 
+	kmsg_dump(KMSG_DUMP_PANIC);
+
 	/*
 	 * Note smp_send_stop is the usual smp shutdown function, which
 	 * unfortunately means it may not be hardened to work in a panic
diff --git a/kernel/printk.c b/kernel/printk.c
index 1ded8e7dd19b..2c9dc0b03a5e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1467,6 +1467,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
 static const char const *kmsg_reasons[] = {
 	[KMSG_DUMP_OOPS]	= "oops",
 	[KMSG_DUMP_PANIC]	= "panic",
+	[KMSG_DUMP_KEXEC]	= "kexec",
 };
 
 static const char *kmsg_to_str(enum kmsg_dump_reason reason)
-- 
cgit v1.2.3


From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 00:39:22 +0100
Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion

i_xattr_sem depends on the reiserfs lock. But after we grab
i_xattr_sem, we may relax/relock the reiserfs lock while waiting
on a freezed filesystem, creating a dependency inversion between
the two locks.

In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's
create a reiserfs_down_read_safe() that acts like
reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing
another lock to avoid undesired dependencies induced by the
heivyweight reiserfs lock.

This fixes the following warning:

[  990.005931] =======================================================
[  990.012373] [ INFO: possible circular locking dependency detected ]
[  990.013233] 2.6.33-rc1 #1
[  990.013233] -------------------------------------------------------
[  990.013233] dbench/1891 is trying to acquire lock:
[  990.013233]  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]
[  990.013233] but task is already holding lock:
[  990.013233]  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] which lock already depends on the new lock.
[  990.013233]
[  990.013233]
[  990.013233] the existing dependency chain (in reverse order) is:
[  990.013233]
[  990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}:
[  990.013233]        [<ffffffff81063afc>] __lock_acquire+0xf9c/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814ac194>] down_write+0x44/0x80
[  990.013233]        [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
[  990.013233]        [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]        [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]        [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]        [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]        [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] other info that might help us debug this:
[  990.013233]
[  990.013233] 2 locks held by dbench/1891:
[  990.013233]  #0:  (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff810e2678>] vfs_setxattr+0x78/0xc0
[  990.013233]  #1:  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] stack backtrace:
[  990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1
[  990.013233] Call Trace:
[  990.013233]  [<ffffffff81061639>] print_circular_bug+0xe9/0xf0
[  990.013233]  [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81062592>] ? mark_held_locks+0x72/0xa0
[  990.013233]  [<ffffffff814ab81d>] ? __mutex_unlock_slowpath+0xbd/0x140
[  990.013233]  [<ffffffff810628ad>] ? trace_hardirqs_on_caller+0x14d/0x1a0
[  990.013233]  [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]  [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]  [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]  [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]  [<ffffffff814abcb4>] ? __mutex_lock_common+0x284/0x3b0
[  990.013233]  [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]  [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]  [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]  [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]  [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]  [<ffffffff81056018>] ? sched_clock_cpu+0xb8/0x100
[  990.013233]  [<ffffffff8105eded>] ? trace_hardirqs_off+0xd/0x10
[  990.013233]  [<ffffffff810560a3>] ? cpu_clock+0x43/0x50
[  990.013233]  [<ffffffff810c6820>] ? fget+0xb0/0x110
[  990.013233]  [<ffffffff810c6770>] ? fget+0x0/0x110
[  990.013233]  [<ffffffff81002ddc>] ? sysret_check+0x27/0x62
[  990.013233]  [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]  [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b

Reported-and-tested-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/xattr.c         | 2 +-
 include/linux/reiserfs_fs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8891cd88a3f4..a0e2e7acdc75 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	down_write(&REISERFS_I(inode)->i_xattr_sem);
+	reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb);
 
 	xahash = xattr_hash(buffer, buffer_size);
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4351b49e2b1e..35d3f459b0ac 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 03:20:19 +0100
Subject: reiserfs: Warn on lock relax if taken recursively

When we relax the reiserfs lock to avoid creating unwanted
dependencies against others locks while grabbing these,
we want to ensure it has not been taken recursively, otherwise
the lock won't be really relaxed. Only its depth will be decreased.
The unwanted dependency would then actually happen.

To prevent from that, add a reiserfs_lock_check_recursive() call
in the places that need it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 9 +++++++++
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 		reiserfs_panic(sb, "%s called without kernel lock held %d",
 			       caller);
 }
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 35d3f459b0ac..793bf8351ab8 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
 /*
  * Several mutexes depend on the write lock.
  * However sometimes we want to relax the write lock while we hold
@@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock(m);
 	reiserfs_write_lock(s);
@@ -101,6 +108,7 @@ static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock_nested(m, subclass);
 	reiserfs_write_lock(s);
@@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	down_read(sem);
 	reiserfs_write_lock(s);
-- 
cgit v1.2.3


From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 20 Nov 2009 14:16:33 +0100
Subject: resource: move kernel function inside __KERNEL__

It is an internal function. Move it inside __KERNEL__ ifdef, along
with task_struct declaration.

Then we get:
--- /usr/include/linux/resource.h       2009-09-14 15:09:29.000000000 +0200
+++ usr/include/linux/resource.h       2010-01-04 11:30:54.000000000 +0100
@@ -3,8 +3,6 @@

 #include <linux/time.h>

-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,5 @@
  */
 #include <asm/resource.h>

-int getrusage(struct task_struct *p, int who, struct rusage *ru);

 #endif

***********

include/linux/Kbuild is untouched, since unifdef is run even on
headers-y nowadays.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/linux/resource.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e626082..f1e914eefeab 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -3,8 +3,6 @@
 
 #include <linux/time.h>
 
-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,12 @@ struct rlimit {
  */
 #include <asm/resource.h>
 
+#ifdef __KERNEL__
+
+struct task_struct;
+
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3


From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Nov 2009 17:16:37 +0100
Subject: resource: add helpers for fetching rlimits

We want to be sure that compiler fetches the limit variable only
once, so add helpers for fetching current and maximal resource
limits which do that.

Add them to sched.h (instead of resource.h) due to circular dependency
 sched.h->resource.h->task_struct
Alternative would be to create a separate res_access.h or similar.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..8d4991be9d53 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 31 Dec 2009 12:15:54 +0100
Subject: PCI/PM: Use per-device D3 delays

It turns out that some PCI devices require extra delays when changing
power state from D3 to D0 (and the other way around).  Although this
is against the PCI specification, we can handle it quite easily by
allowing drivers to define arbitrary D3 delays for devices known to
require extra time for switching power states.

Introduce additional field d3_delay in struct pci_dev and use it to
store the value of the device's D0->D3 delay, in miliseconds.  Make
the PCI PM core code use the per-device d3_delay unless
pci_pm_d3_delay is greater (in which case the latter is used).
[This also allows the driver to specify d3_delay shorter than the
 10 ms required by the PCI standard if the device is known to be able
 to handle that.]

Make the sky2 driver set d3_delay to 150 for devices handled by it.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a
listed regression from 2.6.30.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/sky2.c  |  1 +
 drivers/pci/pci.c   | 19 +++++++++++++++----
 include/linux/pci.h |  1 +
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 1c01b96c9611..2d28d58200d0 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
+	pdev->d3_delay = 150;
 
 	return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0906599ebfde..315fea47e784 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -29,7 +29,17 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
-unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
+unsigned int pci_pm_d3_delay;
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay = dev->d3_delay;
+
+	if (delay < pci_pm_d3_delay)
+		delay = pci_pm_d3_delay;
+
+	msleep(delay);
+}
 
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
@@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 	/* Mandatory power management transition delays */
 	/* see PCI PM 1.1 5.6.1 table 18 */
 	if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
-		msleep(pci_pm_d3_delay);
+		pci_dev_d3_sleep(dev);
 	else if (state == PCI_D2 || dev->current_state == PCI_D2)
 		udelay(PCI_PM_D2_DELAY);
 
@@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
+	dev->d3_delay = PCI_PM_D3_WAIT;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
@@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5da0690d9cee..174e5392e51e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -243,6 +243,7 @@ struct pci_dev {
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
 	unsigned int	wakeup_prepared:1;
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Jan 2010 17:56:02 -0800
Subject: Input: serio - fix potential deadlock when unbinding drivers

sysfs_remove_group() waits for sysfs attributes to be removed, therefore
we do not need to worry about driver-specific attributes being accessed
after driver has been detached from the device. In fact, attempts to take
serio->drv_mutex in attribute methods may lead to the following deadlock:

                                          sysfs_read_file()
                                            fill_read_buffer()
                                              sysfs_get_active_two()
                                                psmouse_attr_show_helper()
                                                  serio_pin_driver()
serio_disconnect_driver()
  mutex_lock(&serio->drv_mutex);
                                <-------->        mutex_lock(&serio_drv_mutex);
    psmouse_disconnect()
      sysfs_remove_group(... psmouse_attr_group);
        ....
        sysfs_deactivate();
          wait_for_completion();

Fix this by removing calls to serio_[un]pin_driver() and functions themselves
and using driver-private mutexes to serialize access to attribute's set()
methods that may change device state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c     | 59 ++++++++++++++++----------------------
 drivers/input/mouse/psmouse-base.c | 32 ++-------------------
 include/linux/serio.h              | 19 ------------
 3 files changed, 28 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 1f5e2ce327d6..1cf32a7814d0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -225,8 +225,10 @@ struct atkbd {
 
 	struct delayed_work event_work;
 	unsigned long event_jiffies;
-	struct mutex event_mutex;
 	unsigned long event_mask;
+
+	/* Serializes reconnect(), attr->set() and event work */
+	struct mutex mutex;
 };
 
 /*
@@ -577,7 +579,7 @@ static void atkbd_event_work(struct work_struct *work)
 {
 	struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);
 
-	mutex_lock(&atkbd->event_mutex);
+	mutex_lock(&atkbd->mutex);
 
 	if (!atkbd->enabled) {
 		/*
@@ -596,7 +598,7 @@ static void atkbd_event_work(struct work_struct *work)
 			atkbd_set_repeat_rate(atkbd);
 	}
 
-	mutex_unlock(&atkbd->event_mutex);
+	mutex_unlock(&atkbd->mutex);
 }
 
 /*
@@ -612,7 +614,7 @@ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
 
 	atkbd->event_jiffies = jiffies;
 	set_bit(event_bit, &atkbd->event_mask);
-	wmb();
+	mb();
 	schedule_delayed_work(&atkbd->event_work, delay);
 }
 
@@ -849,12 +851,13 @@ static void atkbd_disconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 
+	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
+
 	atkbd_disable(atkbd);
 
 	/* make sure we don't have a command in flight */
 	cancel_delayed_work_sync(&atkbd->event_work);
 
-	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
 	input_unregister_device(atkbd->dev);
 	serio_close(serio);
 	serio_set_drvdata(serio, NULL);
@@ -1087,7 +1090,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
-	mutex_init(&atkbd->event_mutex);
+	mutex_init(&atkbd->mutex);
 
 	switch (serio->id.type) {
 
@@ -1160,19 +1163,23 @@ static int atkbd_reconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 	struct serio_driver *drv = serio->drv;
+	int retval = -1;
 
 	if (!atkbd || !drv) {
 		printk(KERN_DEBUG "atkbd: reconnect request, but serio is disconnected, ignoring...\n");
 		return -1;
 	}
 
+	mutex_lock(&atkbd->mutex);
+
 	atkbd_disable(atkbd);
 
 	if (atkbd->write) {
 		if (atkbd_probe(atkbd))
-			return -1;
+			goto out;
+
 		if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
-			return -1;
+			goto out;
 
 		atkbd_activate(atkbd);
 
@@ -1190,8 +1197,11 @@ static int atkbd_reconnect(struct serio *serio)
 	}
 
 	atkbd_enable(atkbd);
+	retval = 0;
 
-	return 0;
+ out:
+	mutex_unlock(&atkbd->mutex);
+	return retval;
 }
 
 static struct serio_device_id atkbd_serio_ids[] = {
@@ -1235,47 +1245,28 @@ static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
 				ssize_t (*handler)(struct atkbd *, char *))
 {
 	struct serio *serio = to_serio_port(dev);
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	retval = handler((struct atkbd *)serio_get_drvdata(serio), buf);
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return handler(atkbd, buf);
 }
 
 static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
 				ssize_t (*handler)(struct atkbd *, const char *, size_t))
 {
 	struct serio *serio = to_serio_port(dev);
-	struct atkbd *atkbd;
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 	int retval;
 
-	retval = serio_pin_driver(serio);
+	retval = mutex_lock_interruptible(&atkbd->mutex);
 	if (retval)
 		return retval;
 
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	atkbd = serio_get_drvdata(serio);
 	atkbd_disable(atkbd);
 	retval = handler(atkbd, buf, count);
 	atkbd_enable(atkbd);
 
-out:
-	serio_unpin_driver(serio);
+	mutex_unlock(&atkbd->mutex);
+
 	return retval;
 }
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 401ac6b6edd4..d59e18b24ede 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1450,24 +1450,10 @@ ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *de
 	struct serio *serio = to_serio_port(dev);
 	struct psmouse_attribute *attr = to_psmouse_attr(devattr);
 	struct psmouse *psmouse;
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
 
 	psmouse = serio_get_drvdata(serio);
 
-	retval = attr->show(psmouse, attr->data, buf);
-
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return attr->show(psmouse, attr->data, buf);
 }
 
 ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr,
@@ -1478,18 +1464,9 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 	struct psmouse *psmouse, *parent = NULL;
 	int retval;
 
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out_unpin;
-	}
-
 	retval = mutex_lock_interruptible(&psmouse_mutex);
 	if (retval)
-		goto out_unpin;
+		goto out;
 
 	psmouse = serio_get_drvdata(serio);
 
@@ -1519,8 +1496,7 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 
  out_unlock:
 	mutex_unlock(&psmouse_mutex);
- out_unpin:
-	serio_unpin_driver(serio);
+ out:
 	return retval;
 }
 
@@ -1582,9 +1558,7 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 		}
 
 		mutex_unlock(&psmouse_mutex);
-		serio_unpin_driver(serio);
 		serio_unregister_child_port(serio);
-		serio_pin_driver_uninterruptible(serio);
 		mutex_lock(&psmouse_mutex);
 
 		if (serio->drv != &psmouse_drv) {
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..813d26c247ec 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
-/*
- * Use the following functions to pin serio's driver in process context
- */
-static inline int serio_pin_driver(struct serio *serio)
-{
-	return mutex_lock_interruptible(&serio->drv_mutex);
-}
-
-static inline void serio_pin_driver_uninterruptible(struct serio *serio)
-{
-	mutex_lock(&serio->drv_mutex);
-}
-
-static inline void serio_unpin_driver(struct serio *serio)
-{
-	mutex_unlock(&serio->drv_mutex);
-}
-
-
 #endif
 
 /*
-- 
cgit v1.2.3


From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jan 2010 16:11:06 -0500
Subject: x86, ACPI: delete acpi_boot_table_init() return value

cleanup only.

setup_arch(), doesn't care care if ACPI initialization succeeded
or failed, so delete acpi_boot_table_init()'s return value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 22 ++++++----------------
 include/linux/acpi.h        |  6 +++---
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb1035cd9a6a..036d28adf59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
  *	if acpi_blacklisted() acpi_disabled = 1;
  *	acpi_irq_model=...
  *	...
- *
- * return value: (currently ignored)
- *	0: success
- *	!0: failure
  */
 
-int __init acpi_boot_table_init(void)
+void __init acpi_boot_table_init(void)
 {
-	int error;
-
 	dmi_check_system(acpi_dmi_table);
 
 	/*
@@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void)
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
 	 */
 	if (acpi_disabled && !acpi_ht)
-		return 1;
+		return; 
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	error = acpi_table_init();
-	if (error) {
+	if (acpi_table_init()) {
 		disable_acpi();
-		return error;
+		return;
 	}
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void)
 	/*
 	 * blacklist may disable ACPI entirely
 	 */
-	error = acpi_blacklisted();
-	if (error) {
+	if (acpi_blacklisted()) {
 		if (acpi_force) {
 			printk(KERN_WARNING PREFIX "acpi=force override\n");
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return error;
+			return;
 		}
 	}
-
-	return 0;
 }
 
 int __init early_acpi_boot_init(void)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 36924255c0d5..b926afe8c03e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
-int acpi_boot_table_init (void);
+void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
@@ -321,9 +321,9 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
-static inline int acpi_boot_table_init(void)
+static inline void acpi_boot_table_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline int acpi_mps_check(void)
-- 
cgit v1.2.3


From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 6 Jan 2010 17:23:23 +0000
Subject: NOMMU: Avoiding duplicate icache flushes of shared maps

When working with FDPIC, there are many shared mappings of read-only
code regions between applications (the C library, applet packages like
busybox, etc.), but the current do_mmap_pgoff() function will issue an
icache flush whenever a VMA is added to an MM instead of only doing it
when the map is initially created.

The flush can instead be done when a region is first mmapped PROT_EXEC.
Note that we may not rely on the first mapping of a region being
executable - it's possible for it to be PROT_READ only, so we have to
remember whether we've flushed the region or not, and then flush the
entire region when a bit of it is made executable.

However, this also affects the brk area.  That will no longer be
executable.  We can mprotect() it to PROT_EXEC on MPU-mode kernels, but
for NOMMU mode kernels, when it increases the brk allocation, making
sys_brk() flush the extra from the icache should suffice.  The brk area
probably isn't used by NOMMU programs since the brk area can only use up
the leavings from the stack allocation, where the stack allocation is
larger than requested.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 mm/nommu.c               | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524afb3dc..84d020bed083 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -123,6 +123,8 @@ struct vm_region {
 	struct file	*vm_file;	/* the backing file or NULL */
 
 	atomic_t	vm_usage;	/* region usage count */
+	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+						* this region */
 };
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 6f9248f89bde..a8d17521624a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	/*
 	 * Ok, looks good - let it rip.
 	 */
+	flush_icache_range(mm->brk, brk);
 	return mm->brk = brk;
 }
 
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file,
 share:
 	add_vma_to_mm(current->mm, vma);
 
-	up_write(&nommu_region_sem);
+	/* we flush the region from the icache only when the first executable
+	 * mapping of it is made  */
+	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
+		flush_icache_range(region->vm_start, region->vm_end);
+		region->vm_icache_flushed = true;
+	}
 
-	if (prot & PROT_EXEC)
-		flush_icache_range(result, result + len);
+	up_write(&nommu_region_sem);
 
 	kleave(" = %lx", result);
 	return result;
-- 
cgit v1.2.3


From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 7 Jan 2010 11:58:36 -0600
Subject: maccess,probe_kernel: Allow arch specific override
 probe_kernel_(read|write)

Some archs such as blackfin, would like to have an arch specific
probe_kernel_read() and probe_kernel_write() implementation which can
fall back to the generic implementation if no special operations are
needed.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/uaccess.h |  4 +++-
 mm/maccess.c            | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367d145e..d512d98dfb7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, void *src, size_t size);
+extern long __probe_kernel_read(void *dst, void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 9073695ff25f..4e348dbaecd7 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -14,7 +14,11 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_read(void *dst, void *src, size_t size)
+
+long __weak probe_kernel_read(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
+long __probe_kernel_read(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_write")));
+
+long __probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3


From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Jan 2010 11:58:37 -0600
Subject: kgdb: Fix kernel-doc format error in kgdb.h

linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description'

and fix typos in that file's kernel-doc comments.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6adcc297e354..19ec41a183f5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -29,8 +29,7 @@ struct pt_regs;
  *
  *	On some architectures it is required to skip a breakpoint
  *	exception when it occurs after a breakpoint has been removed.
- *	This can be implemented in the architecture specific portion of
- *	for kgdb.
+ *	This can be implemented in the architecture specific portion of kgdb.
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
@@ -65,7 +64,7 @@ struct uart_port;
 /**
  *	kgdb_breakpoint - compiled in breakpoint
  *
- *	This will be impelmented a static inline per architecture.  This
+ *	This will be implemented as a static inline per architecture.  This
  *	function is called by the kgdb core to execute an architecture
  *	specific trap to cause kgdb to enter the exception processing.
  *
@@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  *	@flags: Current IRQ state
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
+ *	and get them into a known state.  This should do what is needed
  *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
  *	the NMI approach is not used for rounding up all the CPUs. For example,
  *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
-- 
cgit v1.2.3


From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:48 -0500
Subject: block: Fix discard alignment calculation and printing

Discard alignment reporting for partitions was incorrect.  Update to
match the algorithm used elsewhere.

The alignment can be negative (misaligned).  Fix format string
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 2 +-
 include/linux/blkdev.h | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b98173a8184..a41bcc8e140f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q)
 static inline int queue_sector_discard_alignment(struct request_queue *q,
 						 sector_t sector)
 {
-	return ((sector << 9) - q->limits.discard_alignment)
-		& (q->limits.discard_granularity - 1);
+	struct queue_limits *lim = &q->limits;
+	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+
+	return (lim->discard_granularity + lim->discard_alignment - alignment)
+		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-- 
cgit v1.2.3


From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:49 -0500
Subject: block: bdev_stack_limits wrapper

DM does not want to know about partition offsets.  Add a partition-aware
wrapper that DM can use when stacking block devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 22 ++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 127f82551855..5eeb9e0d256e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -639,6 +639,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
+/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t:	the stacking driver limits (top device)
+ * @bdev:  the component block_device (bottom)
+ * @start:  first data sector within component device
+ *
+ * Description:
+ *    Merges queue limits for a top device and a block_device.  Returns
+ *    0 if alignment didn't change.  Returns -1 if adding the bottom
+ *    device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+		      sector_t start)
+{
+	struct request_queue *bq = bdev_get_queue(bdev);
+
+	start += get_start_sect(bdev);
+
+	return blk_stack_limits(t, &bq->limits, start << 9);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a41bcc8e140f..5c8018977efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
+extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-- 
cgit v1.2.3


From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001
From: Kirill Afonshin <kirill_nnov@mail.ru>
Date: Fri, 8 Jan 2010 22:09:59 +0300
Subject: block: removed unused as_io_context

It isn't used anymore, since AS was deleted.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           |  5 -----
 include/linux/iocontext.h | 27 ---------------------------
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..98e6bf61b0ac 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -39,8 +39,6 @@ int put_io_context(struct io_context *ioc)
 
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
-		if (ioc->aic && ioc->aic->dtor)
-			ioc->aic->dtor(ioc->aic);
 		cfq_dtor(ioc);
 		rcu_read_unlock();
 
@@ -76,8 +74,6 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
-		if (ioc->aic && ioc->aic->exit)
-			ioc->aic->exit(ioc->aic);
 		cfq_exit(ioc);
 
 	}
@@ -97,7 +93,6 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->ioprio = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
-		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a63235996309..78ef023227d4 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -4,32 +4,6 @@
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
-	spinlock_t lock;
-
-	void (*dtor)(struct as_io_context *aic); /* destructor */
-	void (*exit)(struct as_io_context *aic); /* called on task exit */
-
-	unsigned long state;
-	atomic_t nr_queued; /* queued reads & sync writes */
-	atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
-	/* IO History tracking */
-	/* Thinktime */
-	unsigned long last_end_request;
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-	/* Layout pattern */
-	unsigned int seek_samples;
-	sector_t last_request_pos;
-	u64 seek_total;
-	sector_t seek_mean;
-};
-
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
@@ -78,7 +52,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void *ioc_data;
-- 
cgit v1.2.3


From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 6 Jan 2010 15:45:55 -0800
Subject: genhd: overlapping variable definition

This fixes the sparse warning:
fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one
fs/ext4/super.c:2368:22: originally declared here

Using 'i' in a macro is dubious practice.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41af35f..9717081c75ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
 #define part_stat_read(part, field)					\
 ({									\
 	typeof((part)->dkstats->field) res = 0;				\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr((part)->dkstats, i)->field;		\
+	unsigned int _cpu;						\
+	for_each_possible_cpu(_cpu)					\
+		res += per_cpu_ptr((part)->dkstats, _cpu)->field;	\
 	res;								\
 })
 
-- 
cgit v1.2.3


From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Date: Fri, 8 Jan 2010 14:42:31 -0800
Subject: mm: make totalhigh_pages unsigned long

Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM
is set Removes redundant casts in printout messages

Signed-off-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               | 2 +-
 arch/mips/mm/init.c              | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c | 2 +-
 arch/mn10300/mm/init.c           | 3 +--
 arch/score/mm/init.c             | 2 +-
 arch/x86/mm/init_32.c            | 3 +--
 include/linux/highmem.h          | 2 +-
 7 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 52c40d155672..a04ffbbbe253 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -616,7 +616,7 @@ void __init mem_init(void)
 		"%dK data, %dK init, %luK highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
 		datasize >> 10, initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9e8d00389eef..1651942f7feb 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -424,7 +424,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f61c164d1e67..bc1297109cc5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -505,5 +505,5 @@ void __init mem_init(void)
 	       (num_physpages - tmp) << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index ec1420562dc7..dd27a9a35152 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -118,8 +118,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT - 10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
-	       );
+	       totalhigh_pages << (PAGE_SHIFT - 10));
 }
 
 /*
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 8c15b2c85d5a..dfaf458d6702 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -106,7 +106,7 @@ void __init mem_init(void)
 			ram << (PAGE_SHIFT-10), codesize >> 10,
 			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
 			initsize >> 10,
-			(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+			totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..9a0c258a86be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,8 +892,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..ab2cc20e21a5 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -46,7 +46,7 @@ void kmap_flush_unused(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-#define totalhigh_pages 0
+#define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
-- 
cgit v1.2.3


From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 8 Jan 2010 14:42:35 -0800
Subject: kmemcheck: make bitfield annotations truly no-ops when disabled

It turns out that even zero-sized struct members (int foo[0];) will affect
the struct layout, causing us in particular to lose 4 bytes in struct
sock.

This patch fixes the regression in CONFIG_KMEMCHECK=n case.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..08d7dc4ddf40 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address);
 
 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do {								\
+		int _n;							\
+									\
+		if (!ptr)						\
+			break;						\
+									\
+		_n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #else
 #define kmemcheck_enabled 0
 
@@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 	return true;
 }
 
-#endif /* CONFIG_KMEMCHECK */
-
-/*
- * Bitfield annotations
- *
- * How to use: If you have a struct using bitfields, for example
- *
- *     struct a {
- *             int x:8, y:8;
- *     };
- *
- * then this should be rewritten as
- *
- *     struct a {
- *             kmemcheck_bitfield_begin(flags);
- *             int x:8, y:8;
- *             kmemcheck_bitfield_end(flags);
- *     };
- *
- * Now the "flags_begin" and "flags_end" members may be used to refer to the
- * beginning and end, respectively, of the bitfield (and things like
- * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
- * fields should be annotated:
- *
- *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
- *     kmemcheck_annotate_bitfield(a, flags);
- *
- * Note: We provide the same definitions for both kmemcheck and non-
- * kmemcheck kernels. This makes it harder to introduce accidental errors. It
- * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
- */
-#define kmemcheck_bitfield_begin(name)	\
-	int name##_begin[0];
-
-#define kmemcheck_bitfield_end(name)	\
-	int name##_end[0];
+#define kmemcheck_bitfield_begin(name)
+#define kmemcheck_bitfield_end(name)
+#define kmemcheck_annotate_bitfield(ptr, name)	\
+	do {					\
+	} while (0)
 
-#define kmemcheck_annotate_bitfield(ptr, name)				\
-	do {								\
-		int _n;							\
-									\
-		if (!ptr)						\
-			break;						\
-									\
-		_n = (long) &((ptr)->name##_end)			\
-			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
-									\
-		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+#define kmemcheck_annotate_variable(var)	\
+	do {					\
 	} while (0)
 
-#define kmemcheck_annotate_variable(var)				\
-	do {								\
-		kmemcheck_mark_initialized(&(var), sizeof(var));	\
-	} while (0)							\
+#endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3


From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 8 Jan 2010 14:42:42 -0800
Subject: lib: add support for LZO-compressed kernels

This patch series adds generic support for creating and extracting
LZO-compressed kernel images, as well as support for using such images on
the x86 and ARM architectures, and support for creating and using
LZO-compressed initrd and initramfs images.

Russell King said:

: Testing on a Cortex A9 model:
: - lzo decompressor is 65% of the time gzip takes to decompress a kernel
: - lzo kernel is 9% larger than a gzip kernel
:
: which I'm happy to say confirms your figures when comparing the two.
:
: However, when comparing your new gzip code to the old gzip code:
: - new is 99% of the size of the old code
: - new takes 42% of the time to decompress than the old code
:
: What this means is that for a proper comparison, the results get even better:
: - lzo is 7.5% larger than the old gzip'd kernel image
: - lzo takes 28% of the time that the old gzip code took
:
: So the expense seems definitely worth the effort.  The only reason I
: can think of ever using gzip would be if you needed the additional
: compression (eg, because you have limited flash to store the image.)
:
: I would argue that the default for ARM should therefore be LZO.

This patch:

The lzo compressor is worse than gzip at compression, but faster at
extraction.  Here are some figures for an ARM board I'm working on:

Uncompressed size: 3.24Mo
gzip  1.61Mo 0.72s
lzo   1.75Mo 0.48s

So for a compression ratio that is still relatively close to gzip, it's
much faster to extract, at least in that case.

This part contains:
 - Makefile routine to support lzo compression
 - Fixes to the existing lzo compressor so that it can be used in
   compressed kernels
 - wrapper around the existing lzo1x_decompress, as it only extracts one
   block at a time, while we need to extract a whole file here
 - config dialog for kernel compression

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: Wu Zhangjin <wuzhangjin@gmail.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Russell King <rmk@arm.linux.org.uk>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/unlzo.h |  10 ++
 init/Kconfig                     |  18 +++-
 lib/decompress_unlzo.c           | 209 +++++++++++++++++++++++++++++++++++++++
 lib/lzo/lzo1x_decompress.c       |   9 +-
 scripts/Makefile.lib             |   5 +
 5 files changed, 244 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/decompress/unlzo.h
 create mode 100644 lib/decompress_unlzo.c

(limited to 'include/linux')

diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
new file mode 100644
index 000000000000..987229752519
--- /dev/null
+++ b/include/linux/decompress/unlzo.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZO_H
+#define DECOMPRESS_UNLZO_H
+
+int unlzo(unsigned char *inbuf, int len,
+	int(*fill)(void*, unsigned int),
+	int(*flush)(void*, unsigned int),
+	unsigned char *output,
+	int *pos,
+	void(*error)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..d95ca7cd5d45 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_LZO
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -141,9 +144,8 @@ config KERNEL_GZIP
 	bool "Gzip"
 	depends on HAVE_KERNEL_GZIP
 	help
-	  The old and tried gzip compression. Its compression ratio is
-	  the poorest among the 3 choices; however its speed (both
-	  compression and decompression) is the fastest.
+	  The old and tried gzip compression. It provides a good balance
+	  between compression ratio and decompression speed.
 
 config KERNEL_BZIP2
 	bool "Bzip2"
@@ -164,6 +166,14 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_LZO
+	bool "LZO"
+	depends on HAVE_KERNEL_LZO
+	help
+	  Its compression ratio is the poorest among the 4. The kernel
+	  size is about about 10% bigger than gzip; however its speed
+	  (both compression and decompression) is the fastest.
+
 endchoice
 
 config SWAP
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..db521f45626e
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,209 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int obytes_processed = 0;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+		if (r != LZO_E_OK || dst_len != tmp) {
+			error("Compressed data violation");
+			goto exit_2;
+		}
+
+		obytes_processed += dst_len;
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return obytes_processed;
+}
+
+#define decompress unlzo
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd815ac2a50b..0fe48cd91ffa 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA    $@
 cmd_lzma = (cat $(filter-out FORCE,$^) | \
 	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
+
+quiet_cmd_lzo = LZO    $@
+cmd_lzo = (cat $(filter-out FORCE,$^) | \
+	lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
-- 
cgit v1.2.3


From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 8 Jan 2010 14:43:08 -0800
Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   9 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h |  12 ++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/gpio/adp5588-gpio.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a019b49ecc9b..1f1d88ae68d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -172,6 +172,15 @@ config GPIO_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp5520-gpio.
 
+config GPIO_ADP5588
+	tristate "ADP5588 I2C GPIO expander"
+	depends on I2C
+	help
+	  This option enables support for 18 GPIOs found
+	  on Analog Devices ADP5588 GPIO Expanders.
+	  To compile this driver as a module, choose M here: the module will be
+	  called adp5588-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 52fe4cf734c7..48687238edb1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
+obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
new file mode 100644
index 000000000000..afc097a16b33
--- /dev/null
+++ b/drivers/gpio/adp5588-gpio.c
@@ -0,0 +1,266 @@
+/*
+ * GPIO Chip driver for Analog Devices
+ * ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c/adp5588.h>
+
+#define DRV_NAME		"adp5588-gpio"
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
+struct adp5588_gpio {
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+	struct mutex lock;	/* protect cached dir, dat_out */
+	unsigned gpio_start;
+	uint8_t dat_out[3];
+	uint8_t dir[3];
+};
+
+static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Write Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
+		  & ADP_BIT(off));
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+			   dev->dat_out[bank]);
+	mutex_unlock(&dev->lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	int ret;
+	unsigned bank;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] &= ~ADP_BIT(off);
+	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	int ret;
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] |= bit;
+
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+				 dev->dat_out[bank]);
+	ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank,
+				 dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, revid;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->client = client;
+
+	gc = &dev->gpio_chip;
+	gc->direction_input = adp5588_gpio_direction_input;
+	gc->direction_output = adp5588_gpio_direction_output;
+	gc->get = adp5588_gpio_get_value;
+	gc->set = adp5588_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = MAXGPIO;
+	gc->label = client->name;
+	gc->owner = THIS_MODULE;
+
+	mutex_init(&dev->lock);
+
+
+	ret = adp5588_gpio_read(dev->client, DEV_ID);
+	if (ret < 0)
+		goto err;
+
+	revid = ret & ADP5588_DEVICE_ID_MASK;
+
+	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
+		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
+		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
+		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
+				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+			gc->base, gc->base + gc->ngpio - 1,
+			client->name, revid);
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5588_gpio_remove(struct i2c_client *client)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client,
+				      dev->gpio_chip.base, dev->gpio_chip.ngpio,
+				      pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "teardown failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static const struct i2c_device_id adp5588_gpio_id[] = {
+	{DRV_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id);
+
+static struct i2c_driver adp5588_gpio_driver = {
+	.driver = {
+		   .name = DRV_NAME,
+		   },
+	.probe = adp5588_gpio_probe,
+	.remove = __devexit_p(adp5588_gpio_remove),
+	.id_table = adp5588_gpio_id,
+};
+
+static int __init adp5588_gpio_init(void)
+{
+	return i2c_add_driver(&adp5588_gpio_driver);
+}
+
+module_init(adp5588_gpio_init);
+
+static void __exit adp5588_gpio_exit(void)
+{
+	i2c_del_driver(&adp5588_gpio_driver);
+}
+
+module_exit(adp5588_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5588 Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index fc5db826b48e..02c9af374741 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 };
 
+struct adp5588_gpio_platform_data {
+	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
 #endif
-- 
cgit v1.2.3


From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 10 Jan 2010 16:28:09 +0200
Subject: core, x86: make LIST_POISON less deadly

The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable
pointers in order to trap erronous use of freed list_heads.  Unfortunately
userspace can arrange for those pointers to actually be dereferencable,
potentially turning an oops to an expolit.

To avoid this allow architectures (currently x86_64 only) to override
the default values for these pointers with truly-undereferencable values.
This is easy on x86_64 as the virtual address space is large and contains
areas that cannot be mapped.

Other 64-bit architectures will likely find similar unmapped ranges.

[ingo: switch to 0xdead000000000000 as the unmapped area]
[ingo: add comments, cleanup]
[jaswinder: eliminate sparse warnings]

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig       |  5 +++++
 include/linux/poison.h | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bf1f1ac478c..cbcbfdee3ee0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ILLEGAL_POINTER_VALUE
+       hex
+       default 0 if X86_32
+       default 0xdead000000000000 if X86_64
+
 source "mm/Kconfig"
 
 config HIGHPTE
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 7fc194aef8c2..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -2,13 +2,25 @@
 #define _LINUX_POISON_H
 
 /********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
 /*
  * These are non-NULL pointers that will result in page faults
  * under normal circumstances, used to verify that nobody uses
  * non-initialized list entries.
  */
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
+#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
 /********** include/linux/timer.h **********/
 /*
-- 
cgit v1.2.3


From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Dec 2009 13:35:52 +0100
Subject: drbd: Allow online resizing of DRBD devices while peer not reachable
 (needs to be explicitly forced)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  5 ++---
 drivers/block/drbd/drbd_nl.c       | 17 +++++++++++------
 drivers/block/drbd/drbd_receiver.c |  4 ++--
 include/linux/drbd_nl.h            |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 79d8e22c4d0d..2bf3a6ef3684 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1371,10 +1371,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *,
-		struct drbd_backing_dev *);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3313901a4861..1292e0620663 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -541,7 +541,7 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev);
+	size = drbd_new_dev_size(mdev, mdev->ldev, force);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -596,7 +596,7 @@ out:
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
@@ -606,6 +606,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	m_size = drbd_get_max_capacity(bdev);
 
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
 	if (p_size && m_size) {
 		size = min_t(sector_t, p_size, m_size);
 	} else {
@@ -965,7 +970,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TO_SMALL;
 		goto force_diskless_dec;
@@ -1052,7 +1057,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1504,7 +1509,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, rs.resize_force);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e3716fadc6a5..f22a5283128a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2870,7 +2870,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
 		   drbd_get_capacity(mdev->this_bdev) &&
 		   mdev->state.disk >= D_OUTDATED &&
 		   mdev->state.conn < C_CONNECTED) {
@@ -2885,7 +2885,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 #undef min_not_zero
 
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev);
+	  dd = drbd_determin_dev_size(mdev, 0);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return FALSE;
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index db5721ad50d1..a4d82f895994 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, )
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
 )
 
 NL_PACKET(syncer_conf, 8,
-- 
cgit v1.2.3


From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 12 Jan 2010 10:09:07 +0100
Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e84f4733cb55..78962272338a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.6"
+#define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Jan 2010 11:14:44 +0900
Subject: libata: retry link resume if necessary

Interestingly, when SIDPR is used in ata_piix, writes to DET in
SControl sometimes get ignored leading to detection failure.  Update
sata_link_resume() such that it reads back SControl after clearing DET
and retry if it's not clear.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++-------
 include/linux/libata.h    |  3 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 22ff51bdbc8a..6728328f3bea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		     unsigned long deadline)
 {
+	int tries = ATA_LINK_RESUME_TRIES;
 	u32 scontrol, serror;
 	int rc;
 
 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
 		return rc;
 
-	scontrol = (scontrol & 0x0f0) | 0x300;
+	/*
+	 * Writes to SControl sometimes get ignored under certain
+	 * controllers (ata_piix SIDPR).  Make sure DET actually is
+	 * cleared.
+	 */
+	do {
+		scontrol = (scontrol & 0x0f0) | 0x300;
+		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+			return rc;
+		/*
+		 * Some PHYs react badly if SStatus is pounded
+		 * immediately after resuming.  Delay 200ms before
+		 * debouncing.
+		 */
+		msleep(200);
 
-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
-		return rc;
+		/* is SControl restored correctly? */
+		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+			return rc;
+	} while ((scontrol & 0xf0f) != 0x300 && --tries);
 
-	/* Some PHYs react badly if SStatus is pounded immediately
-	 * after resuming.  Delay 200ms before debouncing.
-	 */
-	msleep(200);
+	if ((scontrol & 0xf0f) != 0x300) {
+		ata_link_printk(link, KERN_ERR,
+				"failed to resume link (SControl %X)\n",
+				scontrol);
+		return 0;
+	}
+
+	if (tries < ATA_LINK_RESUME_TRIES)
+		ata_link_printk(link, KERN_WARNING,
+				"link resume succeeded after %d retries\n",
+				ATA_LINK_RESUME_TRIES - tries);
 
 	if ((rc = sata_link_debounce(link, params, deadline)))
 		return rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6a9c4ddd3d95..73112250862c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -354,6 +354,9 @@ enum {
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
+	/* sometimes resuming a link requires several retries */
+	ATA_LINK_RESUME_TRIES	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
-- 
cgit v1.2.3


From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Tue, 12 Jan 2010 17:39:16 +1100
Subject: lib: Introduce generic list_sort function

There are two copies of list_sort() in the tree already, one in the DRM
code, another in ubifs.  Now XFS needs this as well.  Create a generic
list_sort() function from the ubifs version and convert existing users
to it so we don't end up with yet another copy in the tree.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_modes.c |  90 ++------------------------------------
 fs/ubifs/gc.c               |  96 +----------------------------------------
 include/linux/list_sort.h   |  11 +++++
 lib/Makefile                |   2 +-
 lib/list_sort.c             | 102 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 182 deletions(-)
 create mode 100644 include/linux/list_sort.h
 create mode 100644 lib/list_sort.c

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6d81a02463a3..76d63394c776 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1,9 +1,4 @@
 /*
- * The list_sort function is (presumably) licensed under the GPL (see the
- * top level "COPYING" file for details).
- *
- * The remainder of this file is:
- *
  * Copyright © 1997-2003 by The XFree86 Project, Inc.
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
@@ -36,6 +31,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
@@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 
 /**
  * drm_mode_compare - compare modes for favorability
+ * @priv: unused
  * @lh_a: list_head for first mode
  * @lh_b: list_head for second mode
  *
@@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
  * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
  * positive if @lh_b is better than @lh_a.
  */
-static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b)
 {
 	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
 	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
@@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
 	return diff;
 }
 
-/* FIXME: what we don't have a list sort function? */
-/* list sort from Mark J Roberts (mjr@znex.org) */
-void list_sort(struct list_head *head,
-	       int (*cmp)(struct list_head *a, struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * drm_mode_sort - sort mode list
  * @mode_list: list to sort
@@ -975,7 +893,7 @@ void list_sort(struct list_head *head,
  */
 void drm_mode_sort(struct list_head *mode_list)
 {
-	list_sort(mode_list, drm_mode_compare);
+	list_sort(NULL, mode_list, drm_mode_compare);
 }
 EXPORT_SYMBOL(drm_mode_sort);
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/list_sort.h>
 #include "ubifs.h"
 
 /*
@@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
-/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
-		      int (*cmp)(void *priv, struct list_head *a,
-				 struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	if (list_empty(head))
-		return;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * data_nodes_cmp - compare 2 data nodes.
  * @priv: UBIFS file-system description object
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
new file mode 100644
index 000000000000..1a2df2efb771
--- /dev/null
+++ b/include/linux/list_sort.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b));
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 911b25aed1e7..3b0b4a696db9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..19d11e0bb958
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,102 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+/**
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * are equivalent, then it does not matter what this function returns.
+ */
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	if (list_empty(head))
+		return;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(priv, p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+EXPORT_SYMBOL(list_sort);
-- 
cgit v1.2.3


From f6a8c60960bbea378142d1fa1b3d111555ee41c7 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 29 Nov 2009 15:23:51 +0000
Subject: mtd: Really add ARM pismo support

(Commit 7cb777a3d71f9d1f7eb149c7a504d21f24219ae8 (mtd: add ARM pismo support)
intended to add this, but seems only to have patched the Makefile without
touching Kconfig or providing any code...)

The following patch adds support for PISMO modules found on ARM Ltd
development platforms.  These are MTD modules, and can have a
selection of SRAM, flash or DOC devices as described by an on-board
I2C EEPROM.

We support SRAM and NOR flash devices only by registering appropriate
conventional MTD platform devices as children of the 'pismo' device.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig  |  17 +++
 drivers/mtd/maps/pismo.c  | 320 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/pismo.h |  17 +++
 3 files changed, 354 insertions(+)
 create mode 100644 drivers/mtd/maps/pismo.c
 create mode 100644 include/linux/mtd/pismo.h

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 4c364d44ad59..2de0cc823d60 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -549,4 +549,21 @@ config MTD_VMU
 	  To build this as a module select M here, the module will be called
 	  vmu-flash.
 
+config MTD_PISMO
+	tristate "MTD discovery driver for PISMO modules"
+	depends on I2C
+	depends on ARCH_VERSATILE
+	help
+	  This driver allows for discovery of PISMO modules - see
+	  <http://www.pismoworld.org/>.  These are small modules containing
+	  up to five memory devices (eg, SRAM, flash, DOC) described by an
+	  I2C EEPROM.
+
+	  This driver does not create any MTD maps itself; instead it
+	  creates MTD physmap and MTD SRAM platform devices.  If you
+	  enable this option, you should consider enabling MTD_PHYSMAP
+	  and/or MTD_PLATRAM according to the devices on your module.
+
+	  When built as a module, it will be called pismo.ko
+
 endmenu
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
new file mode 100644
index 000000000000..c48cad271f5d
--- /dev/null
+++ b/drivers/mtd/maps/pismo.c
@@ -0,0 +1,320 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * For ARM Realview and Versatile platforms
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/pismo.h>
+
+#define PISMO_NUM_CS	5
+
+struct pismo_cs_block {
+	u8	type;
+	u8	width;
+	__le16	access;
+	__le32	size;
+	u32	reserved[2];
+	char	device[32];
+} __packed;
+
+struct pismo_eeprom {
+	struct pismo_cs_block cs[PISMO_NUM_CS];
+	char	board[15];
+	u8	sum;
+} __packed;
+
+struct pismo_mem {
+	phys_addr_t base;
+	u32	size;
+	u16	access;
+	u8	width;
+	u8	type;
+};
+
+struct pismo_data {
+	struct i2c_client	*client;
+	void			(*vpp)(void *, int);
+	void			*vpp_data;
+	struct platform_device	*dev[PISMO_NUM_CS];
+};
+
+/* FIXME: set_vpp could do with a better calling convention */
+static struct pismo_data *vpp_pismo;
+static DEFINE_MUTEX(pismo_mutex);
+
+static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo) {
+		mutex_unlock(&pismo_mutex);
+		kfree(pismo);
+		return -EBUSY;
+	}
+	vpp_pismo = pismo;
+	mutex_unlock(&pismo_mutex);
+	return 0;
+}
+
+static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo == pismo)
+		vpp_pismo = NULL;
+	mutex_unlock(&pismo_mutex);
+}
+
+static void pismo_set_vpp(struct map_info *map, int on)
+{
+	struct pismo_data *pismo = vpp_pismo;
+
+	pismo->vpp(pismo->vpp_data, on);
+}
+/* end of hack */
+
+
+static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
+{
+	width &= 15;
+	if (width > 2)
+		return 0;
+	return 1 << width;
+}
+
+static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf,
+	u8 addr, size_t size)
+{
+	int ret;
+	struct i2c_msg msg[] = {
+		{
+			.addr = client->addr,
+			.len = sizeof(addr),
+			.buf = &addr,
+		}, {
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = size,
+			.buf = buf,
+		},
+	};
+
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+
+	return ret == ARRAY_SIZE(msg) ? size : -EIO;
+}
+
+static int __devinit pismo_add_device(struct pismo_data *pismo, int i,
+	struct pismo_mem *region, const char *name, void *pdata, size_t psize)
+{
+	struct platform_device *dev;
+	struct resource res = { };
+	phys_addr_t base = region.base;
+	int ret;
+
+	if (base == ~0)
+		return -ENXIO;
+
+	res.start = base;
+	res.end = base + region->size - 1;
+	res.flags = IORESOURCE_MEM;
+
+	dev = platform_device_alloc(name, i);
+	if (!dev)
+		return -ENOMEM;
+	dev->dev.parent = &pismo->client->dev;
+
+	do {
+		ret = platform_device_add_resources(dev, &res, 1);
+		if (ret)
+			break;
+
+		ret = platform_device_add_data(dev, pdata, psize);
+		if (ret)
+			break;
+
+		ret = platform_device_add(dev);
+		if (ret)
+			break;
+
+		pismo->dev[i] = dev;
+		return 0;
+	} while (0);
+
+	platform_device_put(dev);
+	return ret;
+}
+
+static int __devinit pismo_add_nor(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct physmap_flash_data data = {
+		.width = region->width,
+	};
+
+	if (pismo->vpp)
+		data.set_vpp = pismo_set_vpp;
+
+	return pismo_add_device(pismo, i, region, "physmap-flash",
+		&data, sizeof(data));
+}
+
+static int __devinit pismo_add_sram(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct platdata_mtd_ram data = {
+		.bankwidth = region->width,
+	};
+
+	return pismo_add_device(pismo, i, region, "mtd-ram",
+		&data, sizeof(data));
+}
+
+static void __devinit pismo_add_one(struct pismo_data *pismo, int i,
+	const struct pismo_cs_block *cs, phys_addr_t base)
+{
+	struct device *dev = &pismo->client->dev;
+	struct pismo_mem region;
+
+	region.base = base;
+	region.type = cs->type;
+	region.width = pismo_width_to_bytes(cs->width);
+	region.access = le16_to_cpu(cs->access);
+	region.size = le32_to_cpu(cs->size);
+
+	if (region.width == 0) {
+		dev_err(dev, "cs%u: bad width: %02x, ignoring\n", i, cs->width);
+		return;
+	}
+
+	/*
+	 * FIXME: may need to the platforms memory controller here, but at
+	 * the moment we assume that it has already been correctly setup.
+	 * The memory controller can also tell us the base address as well.
+	 */
+
+	dev_info(dev, "cs%u: %.32s: type %02x access %u00ps size %uK\n",
+		i, cs->device, region.type, region.access, region.size / 1024);
+
+	switch (region.type) {
+	case 0:
+		break;
+	case 1:
+		/* static DOC */
+		break;
+	case 2:
+		/* static NOR */
+		pismo_add_nor(pismo, i, &region);
+		break;
+	case 3:
+		/* static RAM */
+		pismo_add_sram(pismo, i, &region);
+		break;
+	}
+}
+
+static int __devexit pismo_remove(struct i2c_client *client)
+{
+	struct pismo_data *pismo = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
+		platform_device_unregister(pismo->dev[i]);
+
+	/* FIXME: set_vpp needs saner arguments */
+	pismo_setvpp_remove_fix(pismo);
+
+	kfree(pismo);
+
+	return 0;
+}
+
+static int __devinit pismo_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct pismo_pdata *pdata = client->dev.platform_data;
+	struct pismo_eeprom eeprom;
+	struct pismo_data *pismo;
+	int ret, i;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "functionality mismatch\n");
+		return -EIO;
+	}
+
+	pismo = kzalloc(sizeof(*pismo), GFP_KERNEL);
+	if (!pismo)
+		return -ENOMEM;
+
+	/* FIXME: set_vpp needs saner arguments */
+	ret = pismo_setvpp_probe_fix(pismo);
+	if (ret)
+		return ret;
+
+	pismo->client = client;
+	if (pdata) {
+		pismo->vpp = pdata->set_vpp;
+		pismo->vpp_data = pdata->vpp_data;
+	}
+	i2c_set_clientdata(client, pismo);
+
+	ret = pismo_eeprom_read(client, &eeprom, 0, sizeof(eeprom));
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading EEPROM: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(&client->dev, "%.15s board found\n", eeprom.board);
+
+	for (i = 0; i < ARRAY_SIZE(eeprom.cs); i++)
+		if (eeprom.cs[i].type != 0xff)
+			pismo_add_one(pismo, i, &eeprom.cs[i],
+				      pdata->cs_addrs[i]);
+
+	return 0;
+}
+
+static const struct i2c_device_id pismo_id[] = {
+	{ "pismo" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, pismo_id);
+
+static struct i2c_driver pismo_driver = {
+	.driver	= {
+		.name	= "pismo",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pismo_probe,
+	.remove		= __devexit_p(pismo_remove),
+	.id_table	= pismo_id,
+};
+
+static int __init pismo_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct pismo_cs_block) != 48);
+	BUILD_BUG_ON(sizeof(struct pismo_eeprom) != 256);
+
+	return i2c_add_driver(&pismo_driver);
+}
+module_init(pismo_init);
+
+static void __exit pismo_exit(void)
+{
+	i2c_del_driver(&pismo_driver);
+}
+module_exit(pismo_exit);
+
+MODULE_AUTHOR("Russell King <linux@arm.linux.org.uk>");
+MODULE_DESCRIPTION("PISMO memory driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/pismo.h b/include/linux/mtd/pismo.h
new file mode 100644
index 000000000000..8dfb7e1421c5
--- /dev/null
+++ b/include/linux/mtd/pismo.h
@@ -0,0 +1,17 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef __LINUX_MTD_PISMO_H
+#define __LINUX_MTD_PISMO_H
+
+struct pismo_pdata {
+	void			(*set_vpp)(void *, int);
+	void			*vpp_data;
+	phys_addr_t		cs_addrs[5];
+};
+
+#endif
-- 
cgit v1.2.3


From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Dec 2009 06:58:56 -0500
Subject: Fix ACC_MODE() for real

commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather
nasty mess: definitions of ACC_MODE used to be different.  Fixed the
resulting breakage, converting them to variant that takes O_... value;
all callers have that and it actually simplifies life (see tomoyo part
of changes).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 2 +-
 include/linux/fs.h       | 2 +-
 security/tomoyo/tomoyo.c | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 1b26b1620664..d930f1856ed2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1620,7 +1620,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		open_flag |= O_DSYNC;
 
 	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(flag);
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca88f253..b1bcb275b596 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 #endif /* __KERNEL__ */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 8a00ade85166..2aceebf5f354 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -80,9 +80,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
-	 * '1' is the result of open_to_namei_flags(O_RDONLY).
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, 1);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
 static int tomoyo_path_truncate(struct path *path, loff_t length,
@@ -184,10 +183,6 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
-
-	if ((flags + 1) & O_ACCMODE)
-		flags++;
-	flags |= f->f_flags & (O_APPEND | O_TRUNC);
 	/* Don't check read permission here if called from do_execve(). */
 	if (current->in_execve)
 		return 0;
-- 
cgit v1.2.3


From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:55 +0800
Subject: lib: Introduce strnstr()

It differs strstr() in that it limits the length to be searched
in the first string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |  5 ++++-
 lib/string.c           | 27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a2a755..a716ee2a8adb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4ec50b8..a1cdcfcc42d0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
-- 
cgit v1.2.3


From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Fri, 15 Jan 2010 17:01:02 -0800
Subject: kfifo: fix kfifo_out_locked race bug

Fix a wrong optimization in include/linux/kfifo.h which could cause a race
in kfifo_out_locked.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-by: Johan Hovold <jhovold@gmail.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7c6b32a1421c..c4ac88b3c302 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 	ret = kfifo_out(fifo, to, n);
 
-	/*
-	 * optimization: if the FIFO is empty, set the indices to 0
-	 * so we don't wrap the next time
-	 */
-	if (kfifo_is_empty(fifo))
-		kfifo_reset(fifo);
-
 	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
-- 
cgit v1.2.3


From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 15 Jan 2010 17:01:11 -0800
Subject: tty.h: make tty_port_get() static inline

I get a few dozen of these warnings when using
  gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2):

In file included from mmotm-2010-0113-1217/init/do_mounts.c:5:
mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get':
mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static

so make the function static inline.

[akpm@linux-foundation.org: may as well convert tty_port_users() also]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ef3a2947b102..6abfcf5b5887 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
-extern inline struct tty_port *tty_port_get(struct tty_port *port)
+static inline struct tty_port *tty_port_get(struct tty_port *port)
 {
 	if (port)
 		kref_get(&port->kref);
@@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern int tty_port_open(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-extern inline int tty_port_users(struct tty_port *port)
+static inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
 }
-- 
cgit v1.2.3


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 +++++-----
 kernel/kfifo.c        |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c4ac88b3c302..6fb495ea956a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -104,15 +104,15 @@ union { \
 
 #undef __kfifo_initializer
 
-extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int kfifo_in(struct kfifo *fifo,
-				const unsigned char *from, unsigned int len);
+				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
-				unsigned char *to, unsigned int len);
+				void *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
-		const unsigned char *from, unsigned int n, spinlock_t *lock)
+		const void *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-	unsigned char *to, unsigned int n, spinlock_t *lock)
+	void *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  8 +++---
 kernel/kfifo.c        | 76 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6fb495ea956a..86ad50a900c8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
 
-extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int n);
+extern __must_check int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned *lenout);
 
-extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int n);
+extern __must_check int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned *lenout);
 
 /*
  * __kfifo_add_out internal helper function for updating the out offset
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  3 +++
 kernel/kfifo.c        | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 86ad50a900c8..7ad6d32dd673 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo,
 				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				void *to, unsigned int len);
+extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
+				void *to, unsigned int len, unsigned offset);
+
 
 /**
  * kfifo_reset - removes the entire FIFO contents
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3


From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: add kfifo_initialized

Simple inline that checks if kfifo_init() has been executed on a fifo.

This is useful for walking all per CPU fifos, when some of them might not
have been brought up yet.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7ad6d32dd673..c8618243ca5a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
 				void *to, unsigned int len, unsigned offset);
 
+/**
+ * kfifo_initialized - Check if kfifo is initialized.
+ * @fifo: fifo to check
+ * Return %true if FIFO is initialized, otherwise %false.
+ * Assumes the fifo was 0 before.
+ */
+static inline bool kfifo_initialized(struct kfifo *fifo)
+{
+	return fifo->buffer != 0;
+}
 
 /**
  * kfifo_reset - removes the entire FIFO contents
-- 
cgit v1.2.3


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c8618243ca5a..6f6c5f300af6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -67,7 +67,7 @@ struct kfifo {
 /**
  * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used inside struct or union declaration
  * Note2: the macro creates two objects:
@@ -91,7 +91,7 @@ union { \
 /**
  * DEFINE_KFIFO - macro to define and initialize a kfifo
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used for global and local kfifo data type variables
  * Note2: the macro creates two objects:
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3


From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 15 Jan 2010 17:01:22 -0800
Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2()

Add BUILD_BUG_ON_NOT_POWER_OF_2()

When code relies on a constant being a power of 2:

	#define FOO	512	/* must be a power of 2 */

it would be nice to be able to do:

	BUILD_BUG_ON(!is_power_of_2(FOO));

However applying an inline function does not result in a compile-time
constant that can be used with BUILD_BUG_ON(), so trying that gives
results in:

	error: bit-field '<anonymous>' width not an integer constant

As suggested by akpm, rather than monkeying around with is_power_of_2()
and risking gcc warts about constant expressions, just create a macro
BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: David Dillow <dave@thedillows.org>
Cc: "Robert P. J. Day" <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..328bca609b9b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,6 +734,10 @@ struct sysinfo {
 /* Force a compilation error if condition is constant and true */
 #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
-- 
cgit v1.2.3


From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:33 -0800
Subject: nommu: struct vm_region's vm_usage count need not be atomic

The vm_usage count field in struct vm_region does not need to be atomic as
it's only even modified whilst nommu_region_sem is write locked.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 mm/nommu.c               | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84d020bed083..80cfa78a8cf6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -122,7 +122,7 @@ struct vm_region {
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-	atomic_t	vm_usage;	/* region usage count */
+	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
 						* this region */
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..5e39294f8ea8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
 static void __put_nommu_region(struct vm_region *region)
 	__releases(nommu_region_sem)
 {
-	kenter("%p{%d}", region, atomic_read(&region->vm_usage));
+	kenter("%p{%d}", region, region->vm_usage);
 
 	BUG_ON(!nommu_region_tree.rb_node);
 
-	if (atomic_dec_and_test(&region->vm_usage)) {
+	if (--region->vm_usage == 0) {
 		if (region->vm_top > region->vm_start)
 			delete_nommu_region(region);
 		up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (!vma)
 		goto error_getting_vma;
 
-	atomic_set(&region->vm_usage, 1);
+	region->vm_usage = 1;
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			}
 
 			/* we've found a region we can share */
-			atomic_inc(&pregion->vm_usage);
+			pregion->vm_usage++;
 			vma->vm_region = pregion;
 			start = pregion->vm_start;
 			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 					vma->vm_region = NULL;
 					vma->vm_start = 0;
 					vma->vm_end = 0;
-					atomic_dec(&pregion->vm_usage);
+					pregion->vm_usage--;
 					pregion = NULL;
 					goto error_just_free;
 				}
@@ -1444,7 +1444,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* we're only permitted to split anonymous regions that have a single
 	 * owner */
 	if (vma->vm_file ||
-	    atomic_read(&vma->vm_region->vm_usage) != 1)
+	    vma->vm_region->vm_usage != 1)
 		return -ENOMEM;
 
 	if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1518,7 @@ static int shrink_vma(struct mm_struct *mm,
 
 	/* cut the backing region down to size */
 	region = vma->vm_region;
-	BUG_ON(atomic_read(&region->vm_usage) != 1);
+	BUG_ON(region->vm_usage != 1);
 
 	down_write(&nommu_region_sem);
 	delete_nommu_region(region);
-- 
cgit v1.2.3


From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:35 -0800
Subject: nommu: don't need get_unmapped_area() for NOMMU

get_unmapped_area() is unnecessary for NOMMU as no-one calls it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  7 +++++--
 mm/nommu.c               | 21 ---------------------
 mm/util.c                |  2 +-
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80cfa78a8cf6..36f96271306c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -205,10 +205,12 @@ struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+#ifdef CONFIG_MMU
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+#endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d4991be9d53..6f7bba93929b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int sysctl_max_map_count;
 
 #include <linux/aio.h>
 
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long flags);
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
 
 #if USE_SPLIT_PTLOCKS
 /*
@@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-
 #ifdef CONFIG_TRACING
 extern void
 __trace_special(void *__tr, void *__data,
diff --git a/mm/nommu.c b/mm/nommu.c
index d6dd656264a2..32be0cf51ba6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1760,27 +1760,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * ask for an unmapped area at which to create a mapping on a file
- */
-unsigned long get_unmapped_area(struct file *file, unsigned long addr,
-				unsigned long len, unsigned long pgoff,
-				unsigned long flags)
-{
-	unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-				  unsigned long, unsigned long);
-
-	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
-
-	if (!get_area)
-		return -ENOSYS;
-
-	return get_area(file, addr, len, pgoff, flags);
-}
-EXPORT_SYMBOL(get_unmapped_area);
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
 }
 EXPORT_SYMBOL(strndup_user);
 
-#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
-- 
cgit v1.2.3


From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:39 -0800
Subject: nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation.  The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

	fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
	ftruncate(fd, 32 * 1024);
	a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	munmap(a, 32 * 1024);
	ftruncate(fd, 16 * 1024);
	c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file.  Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'.  However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

	d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 31 +-------------------------
 include/linux/mm.h    |  1 +
 mm/nommu.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 266531343aae..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -121,35 +121,6 @@ add_error:
 	return ret;
 }
 
-/*****************************************************************************/
-/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
-				      size_t newsize, size_t size)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	down_write(&nommu_region_sem);
-
-	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      newsize >> PAGE_SHIFT,
-			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
-			      ) {
-		/* found one - only interested if it's shared out of the page
-		 * cache */
-		if (vma->vm_flags & VM_SHARED) {
-			up_write(&nommu_region_sem);
-			return -ETXTBSY; /* not quite true, but near enough */
-		}
-	}
-
-	up_write(&nommu_region_sem);
-	return 0;
-}
-
 /*****************************************************************************/
 /*
  *
@@ -169,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 
 	/* check that a decrease in size doesn't cut off any shared mappings */
 	if (newsize < size) {
-		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		ret = nommu_shrink_inode_mappings(inode, size, newsize);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..60c467bfbabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
+extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --git a/mm/nommu.c b/mm/nommu.c
index 32be0cf51ba6..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1914,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	mmput(mm);
 	return len;
 }
+
+/**
+ * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
+ * @inode: The inode to check
+ * @size: The current filesize of the inode
+ * @newsize: The proposed filesize of the inode
+ *
+ * Check the shared mappings on an inode on behalf of a shrinking truncate to
+ * make sure that that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * automatically grant mappings that are too large.
+ */
+int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
+				size_t newsize)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	struct vm_region *region;
+	pgoff_t low, high;
+	size_t r_size, r_top;
+
+	low = newsize >> PAGE_SHIFT;
+	high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	down_write(&nommu_region_sem);
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      low, high) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED) {
+			up_write(&nommu_region_sem);
+			return -ETXTBSY; /* not quite true, but near enough */
+		}
+	}
+
+	/* reduce any regions that overlap the dead zone - if in existence,
+	 * these will be pointed to by VMAs that don't overlap the dead zone
+	 *
+	 * we don't check for any regions that start beyond the EOF as there
+	 * shouldn't be any
+	 */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      0, ULONG_MAX) {
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		region = vma->vm_region;
+		r_size = region->vm_top - region->vm_start;
+		r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
+
+		if (r_top > newsize) {
+			region->vm_top -= r_top - newsize;
+			if (region->vm_end > region->vm_top)
+				region->vm_end = region->vm_top;
+		}
+	}
+
+	up_write(&nommu_region_sem);
+	return 0;
+}
-- 
cgit v1.2.3


From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:05:00 +0000
Subject: mfd: Correct WM835x ISINK ramp time defines

The constants used to specify ISINK ramp times for WM835x had the
wrong shifts so that the on times applied to the off ramp and vice
versa. The masks for the bitfields are correct.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index be3264e286e0..e786fe9841ef 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -666,20 +666,20 @@
 #define WM8350_ISINK_FLASH_DUR_64MS		(1 << 8)
 #define WM8350_ISINK_FLASH_DUR_96MS		(2 << 8)
 #define WM8350_ISINK_FLASH_DUR_1024MS		(3 << 8)
-#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 4)
-#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 4)
-#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 4)
-#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 0)
+#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 4)
 
 /*
  * Regulator Interrupts.
-- 
cgit v1.2.3


From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Wed, 6 Jan 2010 13:51:48 +0100
Subject: mfd: tmio_mmc hardware abstraction for CNF area

This patch abstracts out the CNF area code from tmio_mmc which
is not present in all hardware that can use this driver. This
is required so that we can support non-toshiba based hardware.

ASIC3 support by Philipp Zabel

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile        |   6 +--
 drivers/mfd/asic3.c         |  40 ++++++++++++---
 drivers/mfd/t7l66xb.c       |  55 +++++++++++++-------
 drivers/mfd/tc6387xb.c      | 119 ++++++++++++++++++++++++++++++++------------
 drivers/mfd/tc6393xb.c      |  56 +++++++++++++++++----
 drivers/mfd/tmio_core.c     |  52 +++++++++++++++++++
 drivers/mmc/host/tmio_mmc.c |  59 +++++++---------------
 drivers/mmc/host/tmio_mmc.h |  46 +++--------------
 include/linux/mfd/tmio.h    |  39 +++++++++++++++
 9 files changed, 323 insertions(+), 149 deletions(-)
 create mode 100644 drivers/mfd/tmio_core.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ca2f2c4ff05e..8f0d18409ede 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -11,9 +11,9 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
-obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
-obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
-obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
+obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index e22128c3e9a8..95c1e6bd1729 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -80,6 +80,7 @@ struct asic3 {
 	u16 irq_bothedge[4];
 	struct gpio_chip gpio;
 	struct device *dev;
+	void __iomem *tmio_cnf;
 
 	struct asic3_clk clocks[ARRAY_SIZE(asic3_clk_init)];
 };
@@ -685,8 +686,24 @@ static struct mfd_cell asic3_cell_ds1wm = {
 	.resources     = ds1wm_resources,
 };
 
+static void asic3_mmc_pwr(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_pwr(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
+static void asic3_mmc_clk_div(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_clk_div(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
 static struct tmio_mmc_data asic3_mmc_data = {
-	.hclk = 24576000,
+	.hclk           = 24576000,
+	.set_pwr        = asic3_mmc_pwr,
+	.set_clk_div    = asic3_mmc_clk_div,
 };
 
 static struct resource asic3_mmc_resources[] = {
@@ -695,11 +712,6 @@ static struct resource asic3_mmc_resources[] = {
 		.end   = ASIC3_SD_CTRL_BASE + 0x3ff,
 		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start = ASIC3_SD_CONFIG_BASE,
-		.end   = ASIC3_SD_CONFIG_BASE + 0x1ff,
-		.flags = IORESOURCE_MEM,
-	},
 	{
 		.start = 0,
 		.end   = 0,
@@ -743,6 +755,10 @@ static int asic3_mmc_enable(struct platform_device *pdev)
 	asic3_set_register(asic, ASIC3_OFFSET(SDHWCTRL, SDCONF),
 			   ASIC3_SDHWCTRL_SDPWR, 1);
 
+	/* ASIC3_SD_CTRL_BASE assumes 32-bit addressing, TMIO is 16-bit */
+	tmio_core_mmc_enable(asic->tmio_cnf, 1 - asic->bus_shift,
+			     ASIC3_SD_CTRL_BASE >> 1);
+
 	return 0;
 }
 
@@ -797,10 +813,15 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_cell_ds1wm.data_size = sizeof(asic3_cell_ds1wm);
 
 	/* MMC */
+	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
+				 mem_sdio->start, 0x400 >> asic->bus_shift);
+	if (!asic->tmio_cnf) {
+		ret = -ENOMEM;
+		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
+		goto out;
+	}
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
-	asic3_mmc_resources[1].start >>= asic->bus_shift;
-	asic3_mmc_resources[1].end   >>= asic->bus_shift;
 
 	asic3_cell_mmc.platform_data = &asic3_cell_mmc;
 	asic3_cell_mmc.data_size = sizeof(asic3_cell_mmc);
@@ -820,7 +841,10 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 static void asic3_mfd_remove(struct platform_device *pdev)
 {
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
 	mfd_remove_devices(&pdev->dev);
+	iounmap(asic->tmio_cnf);
 }
 
 /* Core */
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 0a255c1f1ce7..bcf4687d4af5 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -38,6 +38,19 @@ enum {
 	T7L66XB_CELL_MMC,
 };
 
+static const struct resource t7l66xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end	= 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_T7L66XB_MMC,
+		.end	= IRQ_T7L66XB_MMC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 #define SCR_REVID	0x08		/* b Revision ID	*/
 #define SCR_IMR		0x42		/* b Interrupt Mask	*/
 #define SCR_DEV_CTL	0xe0		/* b Device control	*/
@@ -83,6 +96,9 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 
@@ -106,28 +122,28 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 	return 0;
 }
 
+static void t7l66xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(t7l66xb->scr + 0x200, 0, state);
+}
+
+static void t7l66xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(t7l66xb->scr + 0x200, 0, state);
+}
+
 /*--------------------------------------------------------------------------*/
 
 static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
-};
-
-static const struct resource t7l66xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end	= 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end	= 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = IRQ_T7L66XB_MMC,
-		.end	= IRQ_T7L66XB_MMC,
-		.flags = IORESOURCE_IRQ,
-	},
+	.set_pwr = t7l66xb_mmc_pwr,
+	.set_clk_div = t7l66xb_mmc_clk_div,
 };
 
 static const struct resource t7l66xb_nand_resources[] = {
@@ -282,6 +298,9 @@ static int t7l66xb_resume(struct platform_device *dev)
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 3280ab33f88a..5c7f04343d5c 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -22,28 +22,52 @@ enum {
 	TC6387XB_CELL_MMC,
 };
 
+struct tc6387xb {
+	void __iomem *scr;
+	struct clk *clk32k;
+	struct resource rscr;
+};
+
+static struct resource tc6387xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end   = 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*--------------------------------------------------------------------------*/
+
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_resume(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
@@ -53,12 +77,32 @@ static int tc6387xb_resume(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
+static void tc6387xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6387xb->scr + 0x200, 0, state);
+}
+
+static void tc6387xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6387xb->scr + 0x200, 0, state);
+}
+
+
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
+
+	tmio_core_mmc_enable(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
 
 	return 0;
 }
@@ -66,36 +110,20 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
-/*--------------------------------------------------------------------------*/
-
 static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
+	.set_pwr = tc6387xb_mmc_pwr,
+	.set_clk_div = tc6387xb_mmc_clk_div,
 };
 
-static struct resource tc6387xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end   = 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end   = 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0,
-		.end   = 0,
-		.flags = IORESOURCE_IRQ,
-	},
-};
+/*--------------------------------------------------------------------------*/
 
 static struct mfd_cell tc6387xb_cells[] = {
 	[TC6387XB_CELL_MMC] = {
@@ -111,8 +139,9 @@ static struct mfd_cell tc6387xb_cells[] = {
 static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
-	struct resource *iomem;
+	struct resource *iomem, *rscr;
 	struct clk *clk32k;
+	struct tc6387xb *tc6387xb;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -120,18 +149,40 @@ static int tc6387xb_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
+	tc6387xb = kzalloc(sizeof *tc6387xb, GFP_KERNEL);
+	if (!tc6387xb)
+		return -ENOMEM;
+
 	ret  = platform_get_irq(dev, 0);
 	if (ret >= 0)
 		irq = ret;
 	else
-		goto err_resource;
+		goto err_no_irq;
 
 	clk32k = clk_get(&dev->dev, "CLK_CK32K");
 	if (IS_ERR(clk32k)) {
 		ret = PTR_ERR(clk32k);
+		goto err_no_clk;
+	}
+
+	rscr = &tc6387xb->rscr;
+	rscr->name = "tc6387xb-core";
+	rscr->start = iomem->start;
+	rscr->end = iomem->start + 0xff;
+	rscr->flags = IORESOURCE_MEM;
+
+	ret = request_resource(iomem, rscr);
+	if (ret)
 		goto err_resource;
+
+	tc6387xb->scr = ioremap(rscr->start, rscr->end - rscr->start + 1);
+	if (!tc6387xb->scr) {
+		ret = -ENOMEM;
+		goto err_ioremap;
 	}
-	platform_set_drvdata(dev, clk32k);
+
+	tc6387xb->clk32k = clk32k;
+	platform_set_drvdata(dev, tc6387xb);
 
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
@@ -149,8 +200,13 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
-	clk_put(clk32k);
+err_ioremap:
+	release_resource(&tc6387xb->rscr);
 err_resource:
+	clk_put(clk32k);
+err_no_clk:
+err_no_irq:
+	kfree(tc6387xb);
 	return ret;
 }
 
@@ -195,3 +251,4 @@ MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton");
 MODULE_ALIAS("platform:tc6387xb");
+
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1429a7341a9a..4bc5a08a2b09 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,10 +136,6 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-static struct tmio_mmc_data tc6393xb_mmc_data = {
-	.hclk = 24000000,
-};
-
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -164,11 +160,6 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 		.end	= 0x9ff,
 		.flags	= IORESOURCE_MEM,
 	},
-	{
-		.start	= 0x200,
-		.end	= 0x2ff,
-		.flags	= IORESOURCE_MEM,
-	},
 	{
 		.start	= IRQ_TC6393_MMC,
 		.end	= IRQ_TC6393_MMC,
@@ -346,6 +337,50 @@ int tc6393xb_lcd_mode(struct platform_device *fb,
 }
 EXPORT_SYMBOL(tc6393xb_lcd_mode);
 
+static int tc6393xb_mmc_enable(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_enable(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static int tc6393xb_mmc_resume(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_resume(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static void tc6393xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6393xb->scr + 0x200, 0, state);
+}
+
+static void tc6393xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6393xb->scr + 0x200, 0, state);
+}
+
+static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+	.set_pwr = tc6393xb_mmc_pwr,
+	.set_clk_div = tc6393xb_mmc_clk_div,
+};
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -355,6 +390,8 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.enable = tc6393xb_mmc_enable,
+		.resume = tc6393xb_mmc_resume,
 		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
@@ -836,3 +873,4 @@ MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov and Dirk Opfer");
 MODULE_DESCRIPTION("tc6393xb Toshiba Mobile IO Controller");
 MODULE_ALIAS("platform:tc6393xb");
+
diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c
new file mode 100644
index 000000000000..eddc19ae464b
--- /dev/null
+++ b/drivers/mfd/tmio_core.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2009 Ian Molton <spyro@f2s.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mfd/tmio.h>
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base)
+{
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	/* Disable SD power during suspend */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_3, 0x01);
+
+	/* The below is required but why? FIXME */
+	sd_config_write8(cnf, shift, CNF_STOP_CLK_CTL, 0x1f);
+
+	/* Power down SD bus */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, 0x00);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_enable);
+
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base)
+{
+
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_resume);
+
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, state ? 0x02 : 0x00);
+}
+EXPORT_SYMBOL(tmio_core_mmc_pwr);
+
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_SD_CLK_MODE, state ? 1 : 0);
+}
+EXPORT_SYMBOL(tmio_core_mmc_clk_div);
+
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 7cccc8523747..e22c3fa3516a 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -46,7 +46,9 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 		clk |= 0x100;
 	}
 
-	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk>>22) & 1);
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
@@ -427,12 +429,13 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
-
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -485,21 +488,15 @@ static int tmio_mmc_resume(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
 	/* Tell the MFD core we are ready to be enabled */
-	if (cell->enable) {
-		ret = cell->enable(dev);
+	if (cell->resume) {
+		ret = cell->resume(dev);
 		if (ret)
 			goto out;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	mmc_resume_host(mmc);
 
 out:
@@ -514,17 +511,16 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct tmio_mmc_data *pdata;
-	struct resource *res_ctl, *res_cnf;
+	struct resource *res_ctl;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
 	int ret = -EINVAL;
 
-	if (dev->num_resources != 3)
+	if (dev->num_resources != 2)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf)
+	if (!res_ctl)
 		goto out;
 
 	pdata = cell->driver_data;
@@ -539,8 +535,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->pdev = dev;
 	platform_set_drvdata(dev, mmc);
 
+	host->set_pwr = pdata->set_pwr;
+	host->set_clk_div = pdata->set_clk_div;
+
 	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
 	host->bus_shift = resource_size(res_ctl) >> 10;
 
@@ -548,10 +548,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (!host->ctl)
 		goto host_free;
 
-	host->cnf = ioremap(res_cnf->start, resource_size(res_cnf));
-	if (!host->cnf)
-		goto unmap_ctl;
-
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 	mmc->f_max = pdata->hclk;
@@ -562,23 +558,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (cell->enable) {
 		ret = cell->enable(dev);
 		if (ret)
-			goto unmap_cnf;
+			goto unmap_ctl;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
-	/* Disable SD power during suspend */
-	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
-
-	/* The below is required but why? FIXME */
-	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
-
-	/* Power down SD bus*/
-	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
-
 	tmio_mmc_clk_stop(host);
 	reset(host);
 
@@ -586,14 +568,14 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (ret >= 0)
 		host->irq = ret;
 	else
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	mmc_add_host(mmc);
 
@@ -605,8 +587,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	return 0;
 
-unmap_cnf:
-	iounmap(host->cnf);
 unmap_ctl:
 	iounmap(host->ctl);
 host_free:
@@ -626,7 +606,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 		mmc_remove_host(mmc);
 		free_irq(host->irq, host);
 		iounmap(host->ctl);
-		iounmap(host->cnf);
 		mmc_free_host(mmc);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9fa998594974..692dc23363b9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -11,26 +11,6 @@
 
 #include <linux/highmem.h>
 
-#define CNF_CMD     0x04
-#define CNF_CTL_BASE   0x10
-#define CNF_INT_PIN  0x3d
-#define CNF_STOP_CLK_CTL 0x40
-#define CNF_GCLK_CTL 0x41
-#define CNF_SD_CLK_MODE 0x42
-#define CNF_PIN_STATUS 0x44
-#define CNF_PWR_CTL_1 0x48
-#define CNF_PWR_CTL_2 0x49
-#define CNF_PWR_CTL_3 0x4a
-#define CNF_CARD_DETECT_MODE 0x4c
-#define CNF_SD_SLOT 0x50
-#define CNF_EXT_GCLK_CTL_1 0xf0
-#define CNF_EXT_GCLK_CTL_2 0xf1
-#define CNF_EXT_GCLK_CTL_3 0xf9
-#define CNF_SD_LED_EN_1 0xfa
-#define CNF_SD_LED_EN_2 0xfe
-
-#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
-
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
 #define CTL_STOP_INTERNAL_ACTION 0x08
@@ -110,7 +90,6 @@
 
 
 struct tmio_mmc_host {
-	void __iomem *cnf;
 	void __iomem *ctl;
 	unsigned long bus_shift;
 	struct mmc_command      *cmd;
@@ -119,10 +98,16 @@ struct tmio_mmc_host {
 	struct mmc_host         *mmc;
 	int                     irq;
 
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
+
+	struct platform_device *pdev;
 };
 
 #include <linux/io.h>
@@ -163,25 +148,6 @@ static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
-		u8 val)
-{
-	writeb(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
-}
-
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 6b9c5d06690c..9cb1834deffa 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,6 +2,8 @@
 #define MFD_TMIO_H
 
 #include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -18,11 +20,48 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+#define CNF_CMD     0x04
+#define CNF_CTL_BASE   0x10
+#define CNF_INT_PIN  0x3d
+#define CNF_STOP_CLK_CTL 0x40
+#define CNF_GCLK_CTL 0x41
+#define CNF_SD_CLK_MODE 0x42
+#define CNF_PIN_STATUS 0x44
+#define CNF_PWR_CTL_1 0x48
+#define CNF_PWR_CTL_2 0x49
+#define CNF_PWR_CTL_3 0x4a
+#define CNF_CARD_DETECT_MODE 0x4c
+#define CNF_SD_SLOT 0x50
+#define CNF_EXT_GCLK_CTL_1 0xf0
+#define CNF_EXT_GCLK_CTL_2 0xf1
+#define CNF_EXT_GCLK_CTL_3 0xf9
+#define CNF_SD_LED_EN_1 0xfa
+#define CNF_SD_LED_EN_2 0xfe
+
+#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
+
+#define sd_config_write8(base, shift, reg, val) \
+	tmio_iowrite8((val), (base) + ((reg) << (shift)))
+#define sd_config_write16(base, shift, reg, val) \
+	tmio_iowrite16((val), (base) + ((reg) << (shift)))
+#define sd_config_write32(base, shift, reg, val) \
+	do { \
+		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
+		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
+	} while (0)
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
+
 /*
  * data for the MMC controller
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From ec51b7f538c440bfa5a4d538133c659071c02155 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 19 Jan 2010 00:27:58 -0800
Subject: Input: ad7879 - support auxiliary GPIOs via gpiolib

Drop the simple fancy sysfs hooks for the aux GPIOs and expose these via
the gpiolib interface so that other drivers can use them.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ad7879.c | 197 ++++++++++++++++++++++++++-----------
 include/linux/spi/ad7879.h         |  12 ++-
 2 files changed, 149 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index c21e6d3a8844..794d070c6900 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -47,6 +47,7 @@
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/ad7879.h>
 
@@ -132,7 +133,9 @@ struct ad7879 {
 	struct input_dev	*input;
 	struct work_struct	work;
 	struct timer_list	timer;
-
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip	gc;
+#endif
 	struct mutex		mutex;
 	unsigned		disabled:1;	/* P: mutex */
 
@@ -150,11 +153,9 @@ struct ad7879 {
 	u8			median;
 	u16			x_plate_ohms;
 	u16			pressure_max;
-	u16			gpio_init;
 	u16			cmd_crtl1;
 	u16			cmd_crtl2;
 	u16			cmd_crtl3;
-	unsigned		gpio:1;
 };
 
 static int ad7879_read(bus_device *, u8);
@@ -237,24 +238,6 @@ static irqreturn_t ad7879_irq(int irq, void *handle)
 
 static void ad7879_setup(struct ad7879 *ts)
 {
-	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
-			AD7879_XPLUS_BIT |
-			AD7879_Z2_BIT |
-			AD7879_Z1_BIT |
-			AD7879_TEMPMASK_BIT |
-			AD7879_AUXVBATMASK_BIT |
-			AD7879_GPIOALERTMASK_BIT;
-
-	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
-			AD7879_AVG(ts->averaging) |
-			AD7879_MFS(ts->median) |
-			AD7879_FCD(ts->first_conversion_delay) |
-			ts->gpio_init;
-
-	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
-			AD7879_ACQ(ts->acquisition_time) |
-			AD7879_TMR(ts->pen_down_acc_interval);
-
 	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
 	ad7879_write(ts->bus, AD7879_REG_CTRL3, ts->cmd_crtl3);
 	ad7879_write(ts->bus, AD7879_REG_CTRL1, ts->cmd_crtl1);
@@ -324,48 +307,132 @@ static ssize_t ad7879_disable_store(struct device *dev,
 
 static DEVICE_ATTR(disable, 0664, ad7879_disable_show, ad7879_disable_store);
 
-static ssize_t ad7879_gpio_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static struct attribute *ad7879_attributes[] = {
+	&dev_attr_disable.attr,
+	NULL
+};
+
+static const struct attribute_group ad7879_attr_group = {
+	.attrs = ad7879_attributes,
+};
+
+#ifdef CONFIG_GPIOLIB
+static int ad7879_gpio_direction_input(struct gpio_chip *chip,
+					unsigned gpio)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	return sprintf(buf, "%u\n", ts->gpio);
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIODIR | AD7879_GPIOPOL;
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
 }
 
-static ssize_t ad7879_gpio_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static int ad7879_gpio_direction_output(struct gpio_chip *chip,
+					unsigned gpio, int level)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
-	unsigned long val;
-	int error;
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	error = strict_strtoul(buf, 10, &val);
-	if (error)
-		return error;
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 &= ~AD7879_GPIODIR;
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIOPOL;
+	if (level)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
+
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
+}
+
+static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	u16 val;
 
 	mutex_lock(&ts->mutex);
-	ts->gpio = !!val;
-	error = ad7879_write(ts->bus, AD7879_REG_CTRL2,
-			   ts->gpio ?
-				ts->cmd_crtl2 & ~AD7879_GPIO_DATA :
-				ts->cmd_crtl2 | AD7879_GPIO_DATA);
+	val = ad7879_read(ts->bus, AD7879_REG_CTRL2);
 	mutex_unlock(&ts->mutex);
 
-	return error ? : count;
+	return !!(val & AD7879_GPIO_DATA);
 }
 
-static DEVICE_ATTR(gpio, 0664, ad7879_gpio_show, ad7879_gpio_store);
+static void ad7879_gpio_set_value(struct gpio_chip *chip,
+				  unsigned gpio, int value)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
 
-static struct attribute *ad7879_attributes[] = {
-	&dev_attr_disable.attr,
-	&dev_attr_gpio.attr,
-	NULL
-};
+	mutex_lock(&ts->mutex);
+	if (value)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
 
-static const struct attribute_group ad7879_attr_group = {
-	.attrs = ad7879_attributes,
-};
+	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+}
+
+static int __devinit ad7879_gpio_add(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret = 0;
+
+	if (pdata->gpio_export) {
+		ts->gc.direction_input = ad7879_gpio_direction_input;
+		ts->gc.direction_output = ad7879_gpio_direction_output;
+		ts->gc.get = ad7879_gpio_get_value;
+		ts->gc.set = ad7879_gpio_set_value;
+		ts->gc.can_sleep = 1;
+		ts->gc.base = pdata->gpio_base;
+		ts->gc.ngpio = 1;
+		ts->gc.label = "AD7879-GPIO";
+		ts->gc.owner = THIS_MODULE;
+		ts->gc.dev = dev;
+
+		ret = gpiochip_add(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to register gpio %d\n",
+				ts->gc.base);
+	}
+
+	return ret;
+}
+
+/*
+ * We mark ad7879_gpio_remove inline so there is a chance the code
+ * gets discarded when not needed. We can't do __devinit/__devexit
+ * markup since it is used in both probe and remove methods.
+ */
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret;
+
+	if (pdata->gpio_export) {
+		ret = gpiochip_remove(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to remove gpio %d\n",
+				ts->gc.base);
+	}
+}
+#else
+static inline int ad7879_gpio_add(struct device *dev)
+{
+	return 0;
+}
+
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+}
+#endif
 
 static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 {
@@ -403,12 +470,6 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	ts->pen_down_acc_interval = pdata->pen_down_acc_interval;
 	ts->median = pdata->median;
 
-	if (pdata->gpio_output)
-		ts->gpio_init = AD7879_GPIO_EN |
-				(pdata->gpio_default ? 0 : AD7879_GPIO_DATA);
-	else
-		ts->gpio_init = AD7879_GPIO_EN | AD7879_GPIODIR;
-
 	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&bus->dev));
 
 	input_dev->name = "AD7879 Touchscreen";
@@ -446,6 +507,23 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 		goto err_free_mem;
 	}
 
+	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
+			AD7879_XPLUS_BIT |
+			AD7879_Z2_BIT |
+			AD7879_Z1_BIT |
+			AD7879_TEMPMASK_BIT |
+			AD7879_AUXVBATMASK_BIT |
+			AD7879_GPIOALERTMASK_BIT;
+
+	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
+			AD7879_AVG(ts->averaging) |
+			AD7879_MFS(ts->median) |
+			AD7879_FCD(ts->first_conversion_delay);
+
+	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
+			AD7879_ACQ(ts->acquisition_time) |
+			AD7879_TMR(ts->pen_down_acc_interval);
+
 	ad7879_setup(ts);
 
 	err = request_irq(bus->irq, ad7879_irq,
@@ -460,15 +538,21 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	if (err)
 		goto err_free_irq;
 
-	err = input_register_device(input_dev);
+	err = ad7879_gpio_add(&bus->dev);
 	if (err)
 		goto err_remove_attr;
 
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_remove_gpio;
+
 	dev_info(&bus->dev, "Rev.%d touchscreen, irq %d\n",
 		 revid >> 8, bus->irq);
 
 	return 0;
 
+err_remove_gpio:
+	ad7879_gpio_remove(&bus->dev);
 err_remove_attr:
 	sysfs_remove_group(&bus->dev.kobj, &ad7879_attr_group);
 err_free_irq:
@@ -481,6 +565,7 @@ err_free_mem:
 
 static int __devexit ad7879_destroy(bus_device *bus, struct ad7879 *ts)
 {
+	ad7879_gpio_remove(&bus->dev);
 	ad7879_disable(ts);
 	sysfs_remove_group(&ts->bus->dev.kobj, &ad7879_attr_group);
 	free_irq(ts->bus->irq, ts);
diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h
index 4231104c9afa..6334cee1a3be 100644
--- a/include/linux/spi/ad7879.h
+++ b/include/linux/spi/ad7879.h
@@ -28,8 +28,12 @@ struct ad7879_platform_data {
 	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
 	 */
 	u8	median;
-	/* 1 = AUX/VBAT/GPIO set to GPIO Output */
-	u8	gpio_output;
-	/* Initial GPIO pin state (valid if gpio_output = 1) */
-	u8	gpio_default;
+	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
+	 * requires CONFIG_GPIOLIB
+	 */
+	bool	gpio_export;
+	/* identifies the first GPIO number handled by this chip;
+	 * or, if negative, requests dynamic ID allocation.
+	 */
+	s32	gpio_base;
 };
-- 
cgit v1.2.3


From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 18 Jan 2010 05:37:16 +0000
Subject: phylib: Move workqueue initialization to a proper place

commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock
on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume:

 PM: resume of devices complete after 237.098 msecs
 Restarting tasks ... done.
 kernel BUG at kernel/workqueue.c:354!
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 [...]
 Backtrace:
 [<c002c598>] (__bug+0x0/0x2c) from [<c0052a54>] (queue_delayed_work_on+0x74/0xf8)
 [<c00529e0>] (queue_delayed_work_on+0x0/0xf8) from [<c0052b30>] (queue_delayed_work+0x2c/0x30)

The oops pops up because TI DaVinci EMAC driver detaches PHY on
suspend and attaches it back on resume. Attaching makes phylib call
phy_start_machine() that initializes a workqueue. On the other hand,
PHY's resume routine will call phy_start_machine() again, and that
will cause the oops since we just destroyed the already scheduled
workqueue.

This patch fixes the issue by moving workqueue initialization to
phy_device_create().

p.s. We don't see this oops with ucc_geth and gianfar drivers because
they perform a fine-grained suspend, i.e. they just stop the PHYs
without detaching.

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 4 +---
 drivers/net/phy/phy_device.c | 1 +
 include/linux/phy.h          | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b0e9f9c51721..0295097d6c44 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -410,7 +410,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 
 static void phy_change(struct work_struct *work);
-static void phy_state_machine(struct work_struct *work);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -430,7 +429,6 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
 	schedule_delayed_work(&phydev->state_queue, HZ);
 }
 
@@ -761,7 +759,7 @@ EXPORT_SYMBOL(phy_start);
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
  */
-static void phy_state_machine(struct work_struct *work)
+void phy_state_machine(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8212b2b93422..adbc0fded130 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -177,6 +177,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->state = PHY_DOWN;
 
 	mutex_init(&dev->lock);
+	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
 	return dev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7968defd2fa7..6a7eb402165d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
 		void (*adjust_link)(struct net_device *));
+void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3


From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 6 Jan 2010 10:16:51 -0800
Subject: USB: Fix duplicate sysfs problem after device reset.

Borislav Petkov reports issues with duplicate sysfs endpoint files after a
resume from a hibernate.  It turns out that the code to support alternate
settings under xHCI has issues when a device with a non-default alternate
setting is reset during the hibernate:

[  427.681810] Restarting tasks ...
[  427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000
[  427.682019] usb usb3: usb resume
[  427.682030] ohci_hcd 0000:00:12.0: wakeup root hub
[  427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s
[  427.682205] usb 1-2: usb wakeup-resume
[  427.682226] usb 1-2: finish reset-resume
[  427.682886] done.
[  427.734658] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.746682] hub 3-0:1.0: hub_reset_resume
[  427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub
[  427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2
[  427.839653] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS
[  427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1
[  427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1
[  427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed.
[  427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0
[  427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1
[  427.916609]  ep_81: create, parent hub
[  427.916632] ------------[ cut here ]------------
[  427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96()
[  427.916649] Hardware name: System Product Name
[  427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81'
[  427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat
+8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core
[  427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13
[  427.916699] Call Trace:

The problem is caused by a mismatch between the USB core's view of the
device state and the USB device and xHCI host's view of the device state.

After the device reset and re-configuration, the device and the xHCI host
think they are using alternate setting 0 of all interfaces.  However, the
USB core keeps track of the old state, which may include non-zero
alternate settings.  It uses intf->cur_altsetting to keep the endpoint
sysfs files for the old state across the reset.

The bandwidth allocation functions need to know what the xHCI host thinks
the current alternate settings are, so original patch set
intf->cur_altsetting to the alternate setting 0.  This caused duplicate
endpoint files to be created.

The solution is to not set intf->cur_altsetting before calling
usb_set_interface() in usb_reset_and_verify_device().  Instead, we add a
new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use
alternate setting 0 as the currently installed alternate setting.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 18 ++++++++++++++++++
 drivers/usb/core/hub.c | 15 +++++----------
 include/linux/usb.h    |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0495fa651225..80995ef0868c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1684,6 +1684,24 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		}
 	}
 	if (cur_alt && new_alt) {
+		struct usb_interface *iface = usb_ifnum_to_if(udev,
+				cur_alt->desc.bInterfaceNumber);
+
+		if (iface->resetting_device) {
+			/*
+			 * The USB core just reset the device, so the xHCI host
+			 * and the device will think alt setting 0 is installed.
+			 * However, the USB core will pass in the alternate
+			 * setting installed before the reset as cur_alt.  Dig
+			 * out the alternate setting 0 structure, or the first
+			 * alternate setting if a broken device doesn't have alt
+			 * setting 0.
+			 */
+			cur_alt = usb_altnum_to_altsetting(iface, 0);
+			if (!cur_alt)
+				cur_alt = &iface->altsetting[0];
+		}
+
 		/* Drop all the endpoints in the current alt setting */
 		for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
 			ret = hcd->driver->drop_endpoint(hcd, udev,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b9f5fcd713e2..35cc8b9ba1f5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3695,19 +3695,14 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 			usb_enable_interface(udev, intf, true);
 			ret = 0;
 		} else {
-			/* We've just reset the device, so it will think alt
-			 * setting 0 is installed.  For usb_set_interface() to
-			 * work properly, we need to set the current alternate
-			 * interface setting to 0 (or the first alt setting, if
-			 * the device doesn't have alt setting 0).
+			/* Let the bandwidth allocation function know that this
+			 * device has been reset, and it will have to use
+			 * alternate setting 0 as the current alternate setting.
 			 */
-			intf->cur_altsetting =
-				usb_find_alt_setting(config, i, 0);
-			if (!intf->cur_altsetting)
-				intf->cur_altsetting =
-					&config->intf_cache[i]->altsetting[0];
+			intf->resetting_device = 1;
 			ret = usb_set_interface(udev, desc->bInterfaceNumber,
 					desc->bAlternateSetting);
+			intf->resetting_device = 0;
 		}
 		if (ret < 0) {
 			dev_err(&udev->dev, "failed to restore interface %d "
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e101a2d04d75..d7ace1b80f09 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -192,6 +192,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned reset_running:1;
+	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 2 +-
 kernel/sched_fair.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 57e63579bfdd..5b81156780b1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
-				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3


From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 14:02:16 +0100
Subject: perf: Change the is_software_event() definition

The is_software_event() definition always confuses me because its an
exclusive expression, make it an inclusive one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..8fa71874113f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return (event->attr.type != PERF_TYPE_RAW) &&
-		(event->attr.type != PERF_TYPE_HARDWARE) &&
-		(event->attr.type != PERF_TYPE_HW_CACHE);
+	switch (event->attr.type) {
+	case PERF_TYPE_SOFTWARE:
+	case PERF_TYPE_TRACEPOINT:
+	/* for now the breakpoint stuff also works as software event */
+	case PERF_TYPE_BREAKPOINT:
+		return 1;
+	}
+	return 0;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
-- 
cgit v1.2.3


From cb289d6244a37cf932c571d6deb0daa8030f931b Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 13 Jan 2010 09:34:36 -0800
Subject: eventfd - allow atomic read and waitqueue remove

KVM needs a wait to atomically remove themselves from the eventfd ->poll()
wait queue head, in order to handle correctly their IRQfd deassign
operation.

This patch introduces such API, plus a way to read an eventfd from its
context.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 fs/eventfd.c            | 89 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/eventfd.h | 16 +++++++++
 2 files changed, 90 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->wqh.lock, flags);
+	eventfd_ctx_do_read(ctx, cnt);
+	__remove_wait_queue(&ctx->wqh, wait);
+	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+		wake_up_locked_poll(&ctx->wqh, POLLOUT);
+	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+	return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	ssize_t res;
-	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (count < sizeof(ucnt))
-		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
+	*cnt = 0;
 	res = -EAGAIN;
 	if (ctx->count > 0)
-		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+		res = 0;
+	else if (!no_wait) {
 		__add_wait_queue(&ctx->wqh, &wait);
-		for (res = 0;;) {
+		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (ctx->count > 0) {
-				res = sizeof(ucnt);
+				res = 0;
 				break;
 			}
 			if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (likely(res > 0)) {
-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-		ctx->count -= ucnt;
+	if (likely(res == 0)) {
+		eventfd_ctx_do_read(ctx, cnt);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
-	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-		return -EFAULT;
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+	ssize_t res;
+	__u64 cnt;
+
+	if (count < sizeof(cnt))
+		return -EINVAL;
+	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+	if (res < 0)
+		return res;
+
+	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
 			     loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a78..91bb4f27238c 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/file.h>
+#include <linux/wait.h>
 
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
 
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
 }
 
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+				       __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+						wait_queue_t *wait, __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
-- 
cgit v1.2.3


From 6d3faf6f431bafb25f4b9926c50a7e5c267738c6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 24 Jan 2010 14:48:00 +0100
Subject: firewire: cdev: add_descriptor documentation fix

struct fw_cdev_add_descriptor.length is in quadlets, not in bytes.
Also remove any doubts about the endianess of descriptor data.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 1f716d9f714b..520ecf86cbb3 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -380,7 +380,7 @@ struct fw_cdev_initiate_bus_reset {
  * @immediate:	If non-zero, immediate key to insert before pointer
  * @key:	Upper 8 bits of root directory pointer
  * @data:	Userspace pointer to contents of descriptor block
- * @length:	Length of descriptor block data, in bytes
+ * @length:	Length of descriptor block data, in quadlets
  * @handle:	Handle to the descriptor, written by the kernel
  *
  * Add a descriptor block and optionally a preceding immediate key to the local
@@ -394,6 +394,8 @@ struct fw_cdev_initiate_bus_reset {
  * If not 0, the @immediate field specifies an immediate key which will be
  * inserted before the root directory pointer.
  *
+ * @immediate, @key, and @data array elements are CPU-endian quadlets.
+ *
  * If successful, the kernel adds the descriptor and writes back a handle to the
  * kernel-side object to be used for later removal of the descriptor block and
  * immediate key.
-- 
cgit v1.2.3


From 0531b2aac59c2296570ac52bfc032ef2ace7d5e1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jan 2010 09:20:03 -0800
Subject: mm: add new 'read_cache_page_gfp()' helper function

It's a simplified 'read_cache_page()' which takes a page allocation
flag, so that different paths can control how aggressive the memory
allocations are that populate a address space.

In particular, the intel GPU object mapping code wants to be able to do
a certain amount of own internal memory management by automatically
shrinking the address space when memory starts getting tight.  This
allows it to dynamically use different memory allocation policies on a
per-allocation basis, rather than depend on the (static) address space
gfp policy.

The actual new function is a one-liner, but re-organizing the helper
functions to the point where you can do this with a single line of code
is what most of the patch is all about.

Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |   2 +
 mm/filemap.c            | 100 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 70 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ed5d7501e181..3c62ed408492 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -253,6 +253,8 @@ extern struct page * read_cache_page_async(struct address_space *mapping,
 extern struct page * read_cache_page(struct address_space *mapping,
 				pgoff_t index, filler_t *filler,
 				void *data);
+extern struct page * read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..e3736923220e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
 static struct page *__read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = page_cache_alloc_cold(mapping);
+		page = __page_cache_alloc(gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
 	return page;
 }
 
-/**
- * read_cache_page_async - read into page cache, fill it if needed
- * @mapping:	the page's address_space
- * @index:	the page index
- * @filler:	function to perform the read
- * @data:	destination for read data
- *
- * Same as read_cache_page, but don't wait for page to become unlocked
- * after submitting it to the filler.
- *
- * Read into the page cache. If a page already exists, and PageUptodate() is
- * not set, try to fill the page but don't wait for it to become unlocked.
- *
- * If the page does not get brought uptodate, return -EIO.
- */
-struct page *read_cache_page_async(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
+
 {
 	struct page *page;
 	int err;
 
 retry:
-	page = __read_cache_page(mapping, index, filler, data);
+	page = __read_cache_page(mapping, index, filler, data, gfp);
 	if (IS_ERR(page))
 		return page;
 	if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
 	mark_page_accessed(page);
 	return page;
 }
+
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @filler:	function to perform the read
+ * @data:	destination for read data
+ *
+ * Same as read_cache_page, but don't wait for page to become unlocked
+ * after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_async(struct address_space *mapping,
+				pgoff_t index,
+				int (*filler)(void *,struct page*),
+				void *data)
+{
+	return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+}
 EXPORT_SYMBOL(read_cache_page_async);
 
+static struct page *wait_on_page_read(struct page *page)
+{
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		if (!PageUptodate(page)) {
+			page_cache_release(page);
+			page = ERR_PTR(-EIO);
+		}
+	}
+	return page;
+}
+
+/**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @gfp:	the page allocator flags to use if allocating
+ *
+ * This is the same as "read_mapping_page(mapping, index, NULL)", but with
+ * any new page allocations done using the specified allocation flags. Note
+ * that the Radix tree operations will still use GFP_KERNEL, so you can't
+ * expect to do this atomically or anything like that - but you can pass in
+ * other page requirements.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index,
+				gfp_t gfp)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+	return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
+}
+EXPORT_SYMBOL(read_cache_page_gfp);
+
 /**
  * read_cache_page - read into page cache, fill it if needed
  * @mapping:	the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
-	struct page *page;
-
-	page = read_cache_page_async(mapping, index, filler, data);
-	if (IS_ERR(page))
-		goto out;
-	wait_on_page_locked(page);
-	if (!PageUptodate(page)) {
-		page_cache_release(page);
-		page = ERR_PTR(-EIO);
-	}
- out:
-	return page;
+	return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
 }
 EXPORT_SYMBOL(read_cache_page);
 
-- 
cgit v1.2.3


From bb209c8287d2d55ec4a67e3933346e0a3ee0da76 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 26 Jan 2010 17:10:03 +0000
Subject: powerpc/pci: Add calls to set_pcie_port_type() and
 set_pcie_hotplug_bridge()

We are missing these when building the pci_dev from scratch off
the Open Firmware device-tree

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci_of_scan.c | 2 ++
 drivers/pci/probe.c               | 4 ++--
 include/linux/pci.h               | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 7311fdfb9bf8..693eb9a25bfa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -140,6 +140,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
+	set_pcie_port_type(dev);
 
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
@@ -164,6 +165,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 		/* a PCI-PCI bridge */
 		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
 		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+		set_pcie_hotplug_bridge(dev);
 	} else if (!strcmp(type, "cardbus")) {
 		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
 	} else {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 98ffb2de22e9..446e4a94d7d3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -681,7 +681,7 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 }
 
-static void set_pcie_port_type(struct pci_dev *pdev)
+void set_pcie_port_type(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
@@ -695,7 +695,7 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
-static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..c1968f464c38 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -756,6 +756,10 @@ pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
+/* For use by arch with custom probe code */
+void set_pcie_port_type(struct pci_dev *pdev);
+void set_pcie_hotplug_bridge(struct pci_dev *pdev);
+
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 #ifdef CONFIG_HOTPLUG
-- 
cgit v1.2.3


From cb6ecf6f7afece066265e243657b0ac28150a7b2 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 28 Jan 2010 22:28:27 -0800
Subject: Input: add the ABS_MT_PRESSURE event

For pressure-based multi-touch devices, a direct way to send sensor
intensity data per finger is needed. This patch adds the ABS_MT_PRESSURE
event to the MT protocol.

Requested-by: Yoonyoung Shim <jy0922.shim@samsung.com>
Requested-by: Mika Kuoppala <mika.kuoppala@nokia.com>
Requested-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 1 +
 include/linux/input.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 30b503b8d67b..86cb2d2196ff 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -46,6 +46,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
 	ABS_MT_TOOL_TYPE,
 	ABS_MT_BLOB_ID,
 	ABS_MT_TRACKING_ID,
+	ABS_MT_PRESSURE,
 	0
 };
 static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..735ceaf1bc2d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -660,6 +660,7 @@ struct input_absinfo {
 #define ABS_MT_TOOL_TYPE	0x37	/* Type of touching device */
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
+#define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
 
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
-- 
cgit v1.2.3


From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Jan 2010 22:14:42 -0800
Subject: Split 'flush_old_exec' into two functions

'flush_old_exec()' is the point of no return when doing an execve(), and
it is pretty badly misnamed.  It doesn't just flush the old executable
environment, it also starts up the new one.

Which is very inconvenient for things like setting up the new
personality, because we want the new personality to affect the starting
of the new environment, but at the same time we do _not_ want the new
personality to take effect if flushing the old one fails.

As a result, the x86-64 '32-bit' personality is actually done using this
insane "I'm going to change the ABI, but I haven't done it yet" bit
(TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the
personality, but just the "pending" bit, so that "flush_thread()" can do
the actual personality magic.

This patch in no way changes any of that insanity, but it does split the
'flush_old_exec()' function up into a preparatory part that can fail
(still called flush_old_exec()), and a new part that will actually set
up the new exec environment (setup_new_exec()).  All callers are changed
to trivially comply with the new world order.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/process_64.c |  2 +-
 arch/x86/ia32/ia32_aout.c   | 10 ++++++----
 fs/binfmt_aout.c            |  1 +
 fs/binfmt_elf.c             | 27 ++-------------------------
 fs/binfmt_elf_fdpic.c       |  3 +++
 fs/binfmt_flat.c            |  1 +
 fs/binfmt_som.c             |  1 +
 fs/exec.c                   | 26 ++++++++++++++++----------
 include/linux/binfmts.h     |  1 +
 include/linux/sched.h       |  2 +-
 10 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 31f80c61b031..ec79faf6f021 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -368,7 +368,7 @@ void exit_thread(void)
 void flush_thread(void)
 {
 
-	/* Called by fs/exec.c (flush_old_exec) to remove traces of a
+	/* Called by fs/exec.c (setup_new_exec) to remove traces of a
 	 * previously running executable. */
 #ifdef CONFIG_SH_FPU
 	if (last_task_used_math == current) {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..435d2a5323da 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS;
-	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-		regs->r13 = regs->r14 = regs->r15 = 0;
-
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
 	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
+	setup_new_exec(bprm);
+
+	regs->cs = __USER32_CS;
+	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+		regs->r13 = regs->r14 = regs->r15 = 0;
+
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
 	current->mm->end_data = ex.a_data +
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #else
 	set_personality(PER_LINUX);
 #endif
+	setup_new_exec(bprm);
 
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 				goto out_free_interp;
 
-			/*
-			 * The early SET_PERSONALITY here is so that the lookup
-			 * for the interpreter happens in the namespace of the 
-			 * to-be-execed image.  SET_PERSONALITY can select an
-			 * alternate root.
-			 *
-			 * However, SET_PERSONALITY is NOT allowed to switch
-			 * this task into the new images's memory mapping
-			 * policy - that is, TASK_SIZE must still evaluate to
-			 * that which is appropriate to the execing application.
-			 * This is because exit_mmap() needs to have TASK_SIZE
-			 * evaluate to the size of the old image.
-			 *
-			 * So if (say) a 64-bit application is execing a 32-bit
-			 * application it is the architecture's responsibility
-			 * to defer changing the value of TASK_SIZE until the
-			 * switch really is going to happen - do this in
-			 * flush_thread().	- akpm
-			 */
-			SET_PERSONALITY(loc->elf_ex);
-
 			interpreter = open_exec(elf_interpreter);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		/* Verify the interpreter has a valid arch */
 		if (!elf_check_arch(&loc->interp_elf_ex))
 			goto out_free_dentry;
-	} else {
-		/* Executables without an interpreter also need a personality  */
-		SET_PERSONALITY(loc->elf_ex);
 	}
 
 	/* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
-	arch_pick_mmap_layout(current->mm);
+
+	setup_new_exec(bprm);
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	set_personality(PER_LINUX_FDPIC);
 	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
+
+	setup_new_exec(bprm);
+
 	set_binfmt(&elf_fdpic_format);
 
 	current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX_32BIT);
+		setup_new_exec(bprm);
 	}
 
 	/*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->personality = PER_HPUX;
+	setup_new_exec(bprm);
 
 	/* Set the task size for HP-UX processes such that
 	 * the gateway page is outside the address space.
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..675c3f44c2ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 
 int flush_old_exec(struct linux_binprm * bprm)
 {
-	char * name;
-	int i, ch, retval;
-	char tcomm[sizeof(current->comm)];
+	int retval;
 
 	/*
 	 * Make sure we have a private signal table and that
@@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 		goto out;
 
 	bprm->mm = NULL;		/* We're using it now */
+	return 0;
+
+out:
+	return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+
+void setup_new_exec(struct linux_binprm * bprm)
+{
+	int i, ch;
+	char * name;
+	char tcomm[sizeof(current->comm)];
+
+	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
 	current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
 			
 	flush_signal_handlers(current, 0);
 	flush_old_files(current->files);
-
-	return 0;
-
-out:
-	return retval;
 }
-
-EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(setup_new_exec);
 
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cd4349bdc34e..89c6249fc561 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
+extern void setup_new_exec(struct linux_binprm * bprm);
 
 extern int suid_dumpable;
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..abdfacc58653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1369,7 +1369,7 @@ struct task_struct {
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
-				     - initialized normally by flush_old_exec */
+				     - initialized normally by setup_new_exec */
 /* file system info */
 	int link_count, total_link_count;
 #ifdef CONFIG_SYSVIPC
-- 
cgit v1.2.3


From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 28 Jan 2010 17:04:43 -0600
Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger

This patch fixes the regression in functionality where the
kernel debugger and the perf API do not nicely share hw
breakpoint reservations.

The kernel debugger cannot use any mutex_lock() calls because it
can start the kernel running from an invalid context.

A mutex free version of the reservation API needed to get
created for the kernel debugger to safely update hw breakpoint
reservations.

The possibility for a breakpoint reservation to be concurrently
processed at the time that kgdb interrupts the system is
improbable. Should this corner case occur the end user is
warned, and the kernel debugger will prohibit updating the
hardware breakpoint reservations.

Any time the kernel debugger reserves a hardware breakpoint it
will be a system wide reservation.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: torvalds@linux-foundation.org
LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kgdb.c        | 51 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/hw_breakpoint.h |  2 ++
 kernel/hw_breakpoint.c        | 52 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 62bea7307eaa..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void)
 	hw_breakpoint_restore();
 }
 
+static int hw_break_reserve_slot(int breakno)
+{
+	int cpu;
+	int cnt = 0;
+	struct perf_event **pevent;
+
+	for_each_online_cpu(cpu) {
+		cnt++;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_reserve_bp_slot(*pevent))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for_each_online_cpu(cpu) {
+		cnt--;
+		if (!cnt)
+			break;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		dbg_release_bp_slot(*pevent);
+	}
+	return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+	struct perf_event **pevent;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_release_bp_slot(*pevent))
+			/*
+			 * The debugger is responisble for handing the retry on
+			 * remove failure.
+			 */
+			return -1;
+	}
+	return 0;
+}
+
 static int
 kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 {
@@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 	if (i == 4)
 		return -1;
 
+	if (hw_break_release_slot(i)) {
+		printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
+		return -1;
+	}
 	breakinfo[i].enabled = 0;
 
 	return 0;
@@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 		return -1;
 	}
 	breakinfo[i].addr = addr;
+	if (hw_break_reserve_slot(i)) {
+		breakinfo[i].addr = 0;
+		return -1;
+	}
 	breakinfo[i].enabled = 1;
 
 	return 0;
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 41235c93e4e9..070ba0621738 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
 extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
 
+extern int dbg_reserve_bp_slot(struct perf_event *bp);
+extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..8a5c7d55ac9f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
  *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
  *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
  */
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
 {
 	struct bp_busy_slots slots = {0};
-	int ret = 0;
-
-	mutex_lock(&nr_bp_mutex);
 
 	fetch_bp_busy_slots(&slots, bp);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
-		ret = -ENOSPC;
-		goto end;
-	}
+	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+		return -ENOSPC;
 
 	toggle_bp_slot(bp, true);
 
-end:
+	return 0;
+}
+
+int reserve_bp_slot(struct perf_event *bp)
+{
+	int ret;
+
+	mutex_lock(&nr_bp_mutex);
+
+	ret = __reserve_bp_slot(bp);
+
 	mutex_unlock(&nr_bp_mutex);
 
 	return ret;
 }
 
+static void __release_bp_slot(struct perf_event *bp)
+{
+	toggle_bp_slot(bp, false);
+}
+
 void release_bp_slot(struct perf_event *bp)
 {
 	mutex_lock(&nr_bp_mutex);
 
-	toggle_bp_slot(bp, false);
+	__release_bp_slot(bp);
 
 	mutex_unlock(&nr_bp_mutex);
 }
 
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	return __reserve_bp_slot(bp);
+}
+
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	__release_bp_slot(bp);
+
+	return 0;
+}
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-- 
cgit v1.2.3


From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 27 Jan 2010 16:25:22 -0600
Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb
 resume

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets
the time from hardware such as the TSC on x86. In this
configuration kgdb will report a softlock warning message on
resuming or detaching from a debug session.

Sequence of events in the problem case:

 1) "cpu sched clock" and "hardware time" are at 100 sec prior
    to a call to kgdb_handle_exception()

 2) Debugger waits in kgdb_handle_exception() for 80 sec and on
    exit the following is called ...  touch_softlockup_watchdog() -->
    __raw_get_cpu_var(touch_timestamp) = 0;

 3) "cpu sched clock" = 100s (it was not updated, because the
    interrupt was disabled in kgdb) but the "hardware time" = 180 sec

 4) The first timer interrupt after resuming from
    kgdb_handle_exception updates the watchdog from the "cpu sched clock"

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> check (touch_timestamp == 0) (it is "YES"
here, we have set "touch_timestamp = 0" at kgdb) -->
__touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp"
to "get_timestamp()" (Here, the "touch_timestamp" will still be
set to 100s.)  ...

    scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched
    clock" to "hardware time" = 180s) ...  }

 5) The Second timer interrupt handler appears to have a large
    jump and trips the softlockup warning.

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> "cpu sched clock" - "touch_timestamp" =
180s-100s > 60s --> printk "soft lockup error messages" ...  }

note: ***(A) reset "touch_timestamp" to
"get_timestamp(this_cpu)"

Why is "touch_timestamp" 100 sec, instead of 180 sec?

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of
get_timestamp() is:

get_timestamp(this_cpu)
 -->cpu_clock(this_cpu)
 -->sched_clock_cpu(this_cpu)
 -->__update_sched_clock(sched_clock_data, now)

The __update_sched_clock() function uses the GTOD tick value to
create a window to normalize the "now" values.  So if "now"
value is too big for sched_clock_data, it will be ignored.

The fix is to invoke sched_clock_tick() to update "cpu sched
clock" in order to recover from this state.  This is done by
introducing the function touch_softlockup_watchdog_sync(). This
allows kgdb to request that the sched clock is updated when the
watchdog thread runs the first time after a resume from kgdb.

[yong.zhang0@gmail.com: Use per cpu instead of an array]
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Dongdong Deng <Dongdong.Deng@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: peterz@infradead.org
LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/kgdb.c         |  6 +++---
 kernel/softlockup.c   | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..89232151a9d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 				    void __user *buffer,
@@ -323,6 +324,9 @@ static inline void softlockup_tick(void)
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..87f2cc557553 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs)
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1450,7 +1450,7 @@ acquirelock:
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
-		touch_softlockup_watchdog();
+		touch_softlockup_watchdog_sync();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1550,7 +1550,7 @@ kgdb_restore:
 	}
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlock_touch_sync) = true;
+	__raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
 void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
 	}
 
 	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlock_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
 		__touch_softlockup_watchdog();
 		return;
 	}
-- 
cgit v1.2.3


From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Tue, 2 Feb 2010 15:58:48 -0800
Subject: connector: Delete buggy notification code.

On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote:
> > There are at least two ways to fix it: using a big cannon and a small
> > one. The former way is to disable notification registration, since it is
> > not used by anyone at all. Second way is to check whether calling
> > process is root and its destination group is -1 (kind of priveledged
> > one) before command is dispatched to workqueue.
>
> Well if no one is using it, removing it makes the most sense, right?
>
> No objection from me, care to make up a patch either way for this?

Getting it is not used, let's drop support for notifications about
(un)registered events from connector.
Another option was to check credentials on receiving, but we can always
restore it without bugs if needed, but genetlink has a wider code base
and none complained, that userspace can not get notification when some
other clients were (un)registered.

Kudos for Sebastian Krahmer <krahmer@suse.de>, who found a bug in the
code.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 175 ------------------------------------------
 include/linux/connector.h     |  32 --------
 2 files changed, 207 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index f06024668f99..537c29ac4487 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -36,17 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
 
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DEFINE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
 static struct cn_dev cdev;
 
 static int cn_already_initialized;
@@ -209,54 +198,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	}
 }
 
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	mutex_lock(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first &&
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first &&
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	mutex_unlock(&notify_lock);
-}
-
 /*
  * Callback add routing - adds callback with given ID and name.
  * If there is registered callback with the same ID it will not be added.
@@ -276,8 +217,6 @@ int cn_add_callback(struct cb_id *id, char *name,
 	if (err)
 		return err;
 
-	cn_notify(id, 0);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cn_add_callback);
@@ -295,111 +234,9 @@ void cn_del_callback(struct cb_id *id)
 	struct cn_dev *dev = &cdev;
 
 	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		mutex_lock(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		mutex_unlock(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kzalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	mutex_lock(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	mutex_unlock(&notify_lock);
-}
-
 static int cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
@@ -437,11 +274,8 @@ static const struct file_operations cn_file_ops = {
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
-	int err;
 
 	dev->input = cn_rx_skb;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
@@ -457,14 +291,6 @@ static int __devinit cn_init(void)
 
 	cn_already_initialized = 1;
 
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_already_initialized = 0;
-		cn_queue_free_dev(dev->cbdev);
-		netlink_kernel_release(dev->nls);
-		return -EINVAL;
-	}
-
 	proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops);
 
 	return 0;
@@ -478,7 +304,6 @@ static void __devexit cn_fini(void)
 
 	proc_net_remove(&init_net, "connector");
 
-	cn_del_callback(&dev->id);
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 72ba63eb83c5..3a779ffba60b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -24,9 +24,6 @@
 
 #include <linux/types.h>
 
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
 /*
  * Process Events connector unique ids -- used for message routing
  */
@@ -75,30 +72,6 @@ struct cn_msg {
 	__u8 data[0];
 };
 
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
@@ -151,11 +124,6 @@ struct cn_callback_entry {
 	u32 seq, group;
 };
 
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
 struct cn_dev {
 	struct cb_id id;
 
-- 
cgit v1.2.3


From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2010 10:25:18 +0530
Subject: perf: Make bp_len type to u64 generic across the arch

Change 'bp_len' type to __u64 to make it work across archs as
the s390 architecture watch point length can be upto 2^64.

reference:
	http://lkml.org/lkml/2010/1/25/212

This is an ABI change that is not backward compatible with
the previous hardware breakpoint info layout integrated in this
development cycle, a rebuilt of perf tools is necessary for
versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a
kernel based on this patch.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin <schwidefsky@de.ibm.com>
LKML-Reference: <20100130045518.GA20776@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h | 2 +-
 include/linux/perf_event.h    | 6 ++----
 kernel/hw_breakpoint.c        | 2 +-
 kernel/perf_event.c           | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 070ba0621738..5977b724f7c6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp)
 	return bp->attr.bp_type;
 }
 
-static inline int hw_breakpoint_len(struct perf_event *bp)
+static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 {
 	return bp->attr.bp_len;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa71874113f..a177698d95e2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,11 +211,9 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	__u32			__reserved_2;
-
-	__u64			bp_addr;
 	__u32			bp_type;
-	__u32			bp_len;
+	__u64			bp_addr;
+	__u64			bp_len;
 };
 
 /*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 8a5c7d55ac9f..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
 	u64 old_addr = bp->attr.bp_addr;
+	u64 old_len = bp->attr.bp_len;
 	int old_type = bp->attr.bp_type;
-	int old_len = bp->attr.bp_len;
 	int err = 0;
 
 	perf_event_disable(bp);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746bd3a06..2b19297742cb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2)
+	if (attr->__reserved_1)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Jan 2010 13:30:11 +0100
Subject: libata: fix ata_id_logical_per_physical_sectors

The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not
a plain multiple, but the log of it, so fix the helper to give the correct
answer.  Without this we'll get an incorrect minimal I/O size in the block
limits VPD page for 4k sector drives.

Also change the return value of ata_id_logical_per_physical_sectors to u16
for the unlikely case of very large logical sectors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 38a6948ce0c2..20f31567ccee 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id)
 	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
 }
 
-static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
 {
-	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
 }
 
 static inline int ata_id_has_lba48(const u16 *id)
-- 
cgit v1.2.3


From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 5 Feb 2010 16:09:11 +1100
Subject: percpu: add __percpu for sparse

This is to make the annotation of percpu variables during the next merge
window less painfull.

Extracted from a patch by Rusty Russell.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab4a695..188fcae10a99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 15:35:41 -0500
Subject: fix ima breakage

The "Untangling ima mess, part 2 with counters" patch messed
up the counters.  Based on conversations with Al Viro, this patch
streamlines ima_path_check() by removing the counter maintaince.
The counters are now updated independently, from measuring the file,
in __dentry_open() and alloc_file() by calling ima_counts_get().
ima_path_check() is called from nfsd and do_filp_open().
It also did not measure all files that should have been measured.
Reason: ima_path_check() got bogus value passed as mask.
[AV: mea culpa]
[AV: add missing nfsd bits]

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |   6 +-
 fs/nfsd/vfs.c                     |   3 +-
 include/linux/ima.h               |   4 +-
 security/integrity/ima/ima_main.c | 236 +++++++++++++++-----------------------
 4 files changed, 97 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..cd77b6375efd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,8 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC));
+			error = ima_path_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1797,8 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC));
+		error = ima_path_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 325959e264ce..32477e3a645c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(&(*filp)->f_path,
-				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
+	host_err = ima_path_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 99dc6d5cf7e5..aa55a8f1f5b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a89f44d5e030..75aee18f6163 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -84,6 +84,36 @@ out:
 	return found;
 }
 
+/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
+ *
+ * When opening a file for read, if the file is already open for write,
+ * the file could change, resulting in a file measurement error.
+ *
+ * Opening a file for write, if the file is already open for read, results
+ * in a time of measure, time of use (ToMToU) error.
+ *
+ * In either case invalidate the PCR.
+ */
+enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
+static void ima_read_write_check(enum iint_pcr_error error,
+				 struct ima_iint_cache *iint,
+				 struct inode *inode,
+				 const unsigned char *filename)
+{
+	switch (error) {
+	case TOMTOU:
+		if (iint->readcount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "ToMToU");
+		break;
+	case OPEN_WRITERS:
+		if (iint->writecount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "open_writers");
+		break;
+	}
+}
+
 /*
  * Update the counts given an fmode_t
  */
@@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
+/*
+ * ima_counts_get - increment file counts
+ *
+ * Maintain read/write counters for all files, but only
+ * invalidate the PCR for measured files:
+ * 	- Opening a file for write when already open for read,
+ *	  results in a time of measure, time of use (ToMToU) error.
+ *	- Opening a file for read when already open for write,
+ * 	  could result in a file measurement error.
+ *
+ */
+void ima_counts_get(struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	fmode_t mode = file->f_mode;
+	struct ima_iint_cache *iint;
+	int rc;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_get(inode);
+	if (!iint)
+		return;
+	mutex_lock(&iint->mutex);
+	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	if (rc < 0)
+		goto out;
+
+	if (mode & FMODE_WRITE) {
+		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		goto out;
+	}
+	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+out:
+	ima_inc_counts(iint, file->f_mode);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
 /*
  * Decrement ima counts
  */
@@ -153,123 +224,6 @@ void ima_file_free(struct file *file)
 	kref_put(&iint->refcount, iint_free);
 }
 
-/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
- *
- * When opening a file for read, if the file is already open for write,
- * the file could change, resulting in a file measurement error.
- *
- * Opening a file for write, if the file is already open for read, results
- * in a time of measure, time of use (ToMToU) error.
- *
- * In either case invalidate the PCR.
- */
-enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
-static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
-				 struct inode *inode,
-				 const unsigned char *filename)
-{
-	switch (error) {
-	case TOMTOU:
-		if (iint->readcount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "ToMToU");
-		break;
-	case OPEN_WRITERS:
-		if (iint->writecount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "open_writers");
-		break;
-	}
-}
-
-static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
-				const unsigned char *filename)
-{
-	int rc = 0;
-
-	ima_inc_counts(iint, file->f_mode);
-
-	rc = ima_collect_measurement(iint, file);
-	if (!rc)
-		ima_store_measurement(iint, file, filename);
-	return rc;
-}
-
-/**
- * ima_path_check - based on policy, collect/store measurement.
- * @path: contains a pointer to the path to be measured
- * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
- *
- * Measure the file being open for readonly, based on the
- * ima_must_measure() policy decision.
- *
- * Keep read/write counters for all files, but only
- * invalidate the PCR for measured files:
- * 	- Opening a file for write when already open for read,
- *	  results in a time of measure, time of use (ToMToU) error.
- *	- Opening a file for read when already open for write,
- * 	  could result in a file measurement error.
- *
- * Always return 0 and audit dentry_open failures.
- * (Return code will be based upon measurement appraisal.)
- */
-int ima_path_check(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-	struct file *file = NULL;
-	int rc;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return 0;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return 0;
-
-	mutex_lock(&iint->mutex);
-
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
-	if (rc < 0)
-		goto out;
-
-	if ((mask & MAY_WRITE) || (mask == 0))
-		ima_read_write_check(TOMTOU, iint, inode,
-				     path->dentry->d_name.name);
-
-	if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ)
-		goto out;
-
-	ima_read_write_check(OPEN_WRITERS, iint, inode,
-			     path->dentry->d_name.name);
-	if (!(iint->flags & IMA_MEASURED)) {
-		struct dentry *dentry = dget(path->dentry);
-		struct vfsmount *mnt = mntget(path->mnt);
-
-		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
-				   current_cred());
-		if (IS_ERR(file)) {
-			int audit_info = 0;
-
-			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-					    dentry->d_name.name,
-					    "add_measurement",
-					    "dentry_open failed",
-					    1, audit_info);
-			file = NULL;
-			goto out;
-		}
-		rc = get_path_measurement(iint, file, dentry->d_name.name);
-	}
-out:
-	mutex_unlock(&iint->mutex);
-	if (file)
-		fput(file);
-	kref_put(&iint->refcount, iint_free);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ima_path_check);
-
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
@@ -297,33 +251,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_get - increment file counts
- *
- * - for IPC shm and shmat file.
- * - for nfsd exported files.
- *
- * Increment the counts for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_counts_get(struct file *file)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
-	ima_inc_counts(iint, file->f_mode);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-EXPORT_SYMBOL_GPL(ima_counts_get);
-
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured (May be NULL)
@@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
+/**
+ * ima_path_check - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured
+ * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
+ *
+ * Measure files based on the ima_must_measure() policy decision.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
+ */
+int ima_path_check(struct file *file, int mask)
+{
+	int rc;
+
+	rc = process_measurement(file, file->f_dentry->d_name.name,
+				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				 PATH_CHECK);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ima_path_check);
+
 static int __init init_ima(void)
 {
 	int error;
-- 
cgit v1.2.3


From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:40 -0500
Subject: ima: rename ima_path_check to ima_file_check

ima_path_check actually deals with files!  call it ima_file_check instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        | 4 ++--
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/ima.h               | 4 ++--
 security/integrity/ima/ima_main.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index cd77b6375efd..d62fdc875f22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,7 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(filp, acc_mode);
+			error = ima_file_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1796,7 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(filp, acc_mode);
+		error = ima_file_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 32477e3a645c..97d79eff6b7f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(*filp, access);
+	host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index aa55a8f1f5b9..975837e7d6c0 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index eb1cf6498cc9..b76e1f03ea2b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -14,7 +14,7 @@
  *
  * File: ima_main.c
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
- *	and ima_path_check.
+ *	and ima_file_check.
  */
 #include <linux/module.h>
 #include <linux/file.h>
@@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask)
 {
 	int rc;
 
@@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask)
 				 PATH_CHECK);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ima_path_check);
+EXPORT_SYMBOL_GPL(ima_file_check);
 
 static int __init init_ima(void)
 {
-- 
cgit v1.2.3


From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:38 +0300
Subject: kfifo: Don't use integer as NULL pointer

This patch fixes following sparse warnings:

include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer
kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 2 +-
 kernel/kfifo.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6f6c5f300af6..bc0fc795bd35 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
  */
 static inline bool kfifo_initialized(struct kfifo *fifo)
 {
-	return fifo->buffer != 0;
+	return fifo->buffer != NULL;
 }
 
 /**
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 559fb5582b60..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0);
+		_kfifo_init(fifo, NULL, 0);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3